Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
sq-weixin-api
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
庄新伟
sq-weixin-api
Commits
984402fc
Commit
984402fc
authored
Jun 11, 2024
by
盖献康
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
按照指定条数爬取文章
parent
e61a7d47
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
88 additions
and
13 deletions
+88
-13
pom.xml
pom.xml
+6
-0
Application.java
src/main/java/top/iszsq/weixin/Application.java
+10
-9
WeiXinApi.java
src/main/java/top/iszsq/weixin/api/WeiXinApi.java
+72
-4
No files found.
pom.xml
View file @
984402fc
...
@@ -68,6 +68,12 @@
...
@@ -68,6 +68,12 @@
<artifactId>
selenium-java
</artifactId>
<artifactId>
selenium-java
</artifactId>
<version>
4.0.0
</version>
<!-- 确保使用最新版本 -->
<version>
4.0.0
</version>
<!-- 确保使用最新版本 -->
</dependency>
</dependency>
<dependency>
<groupId>
org.projectlombok
</groupId>
<artifactId>
lombok
</artifactId>
<version>
1.18.30
</version>
</dependency>
</dependencies>
</dependencies>
</project>
</project>
src/main/java/top/iszsq/weixin/Application.java
View file @
984402fc
...
@@ -152,17 +152,18 @@ public class Application {
...
@@ -152,17 +152,18 @@ public class Application {
BizData
select
=
list
.
get
(
index
-
1
);
BizData
select
=
list
.
get
(
index
-
1
);
System
.
out
.
println
(
String
.
format
(
"--好的,开始搜索【%s】的文章..."
,
select
.
getNickname
()));
System
.
out
.
println
(
String
.
format
(
"--好的,开始搜索【%s】的文章..."
,
select
.
getNickname
()));
WxResultBody
<
List
<
Article
>>
findExList
=
WeiXinApi
.
findExList
(
select
.
getFakeid
());
// WxResultBody<List<Article>> findExList = WeiXinApi.findExList(select.getFakeid());
List
<
Article
>
exList
=
findExList
.
getApp_msg_list
();
// List<Article> exList = findExList.getApp_msg_list();
for
(
Article
article
:
exList
)
{
List
<
Article
>
articleList
=
WeiXinApi
.
getArticleList
(
30
,
select
.
getFakeid
());
for
(
Article
article
:
articleList
)
{
System
.
out
.
println
(
"---"
+
article
.
getTitle
()
+
"-----"
+
article
.
getLink
());
System
.
out
.
println
(
"---"
+
article
.
getTitle
()
+
"-----"
+
article
.
getLink
());
Document
document
=
Jsoup
.
connect
(
article
.
getLink
()).
get
();
//
Document document = Jsoup.connect(article.getLink()).get();
Element
entiryElement
=
document
.
getElementById
(
"img-content"
);
//
Element entiryElement = document.getElementById("img-content");
if
(
entiryElement
!=
null
)
{
//
if (entiryElement != null) {
System
.
out
.
println
(
"--内容h5"
+
entiryElement
.
html
());
//
System.out.println("--内容h5" + entiryElement.html());
}
//
}
}
}
System
.
out
.
println
(
"---条数:"
+
ex
List
.
size
());
System
.
out
.
println
(
"---条数:"
+
article
List
.
size
());
}
}
}
}
...
...
src/main/java/top/iszsq/weixin/api/WeiXinApi.java
View file @
984402fc
package
top
.
iszsq
.
weixin
.
api
;
package
top
.
iszsq
.
weixin
.
api
;
import
com.fasterxml.jackson.core.type.TypeReference
;
import
com.fasterxml.jackson.core.type.TypeReference
;
import
lombok.SneakyThrows
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
top.iszsq.weixin.enums.WxResultStatus
;
import
top.iszsq.weixin.enums.WxResultStatus
;
import
top.iszsq.weixin.exceptions.WxApiException
;
import
top.iszsq.weixin.exceptions.WxApiException
;
...
@@ -11,9 +12,8 @@ import top.iszsq.weixin.okhttp.MyCookieStore;
...
@@ -11,9 +12,8 @@ import top.iszsq.weixin.okhttp.MyCookieStore;
import
top.iszsq.weixin.utils.HttpUtils
;
import
top.iszsq.weixin.utils.HttpUtils
;
import
top.iszsq.weixin.utils.JsonUtils
;
import
top.iszsq.weixin.utils.JsonUtils
;
import
java.io.InputStream
;
import
java.io.InputStream
;
import
java.util.HashMap
;
import
java.util.*
;
import
java.util.List
;
import
java.util.stream.Collectors
;
import
java.util.Map
;
/**
/**
* 微信api封装
* 微信api封装
...
@@ -156,7 +156,7 @@ public class WeiXinApi {
...
@@ -156,7 +156,7 @@ public class WeiXinApi {
Map
<
String
,
String
>
params
=
new
HashMap
<>(
10
);
Map
<
String
,
String
>
params
=
new
HashMap
<>(
10
);
params
.
put
(
"action"
,
"list_ex"
);
params
.
put
(
"action"
,
"list_ex"
);
params
.
put
(
"begin"
,
"0"
);
params
.
put
(
"begin"
,
"0"
);
params
.
put
(
"count"
,
"
5
"
);
params
.
put
(
"count"
,
"
20
"
);
params
.
put
(
"fakeid"
,
fakeid
);
params
.
put
(
"fakeid"
,
fakeid
);
params
.
put
(
"token"
,
MyCookieStore
.
getToken
());
params
.
put
(
"token"
,
MyCookieStore
.
getToken
());
params
.
put
(
"type"
,
"9"
);
params
.
put
(
"type"
,
"9"
);
...
@@ -173,6 +173,74 @@ public class WeiXinApi {
...
@@ -173,6 +173,74 @@ public class WeiXinApi {
}
}
/**
* 搜索公众号的文章(分页版本)
* @return
*/
public
static
WxResultBody
<
List
<
Article
>>
findExList
(
int
begin
,
int
count
,
String
fakeId
){
Map
<
String
,
String
>
params
=
new
HashMap
<>(
10
);
params
.
put
(
"action"
,
"list_ex"
);
params
.
put
(
"begin"
,
String
.
valueOf
(
begin
));
params
.
put
(
"count"
,
String
.
valueOf
(
count
));
params
.
put
(
"fakeid"
,
fakeId
);
params
.
put
(
"token"
,
MyCookieStore
.
getToken
());
params
.
put
(
"type"
,
"9"
);
params
.
put
(
"query"
,
""
);
params
.
put
(
"lang"
,
"zh_CN"
);
params
.
put
(
"f"
,
"json"
);
params
.
put
(
"ajax"
,
"1"
);
WxResultBody
<
List
<
Article
>>
wxResultBody
=
parseWxResultBody
(
HttpUtils
.
doGet
(
URL_MAP
.
get
(
"findListEx"
),
params
),
new
TypeReference
<
WxResultBody
<
List
<
Article
>>>()
{}
);
return
wxResultBody
;
}
/**
* 根据指定条数获取公众号文章
* @param num
* @param fakeId
* @return
*/
public
static
List
<
Article
>
getArticleList
(
int
num
,
String
fakeId
)
{
List
<
Article
>
articleList
=
new
ArrayList
<>();
int
initialNum
=
0
;
int
loopNum
=
0
;
int
begin
=
0
;
int
count
=
5
;
while
(
initialNum
<
num
)
{
if
(
loopNum
>
0
)
{
begin
+=
5
;
count
+=
5
;
}
WxResultBody
<
List
<
Article
>>
exList
=
findExList
(
begin
,
count
,
fakeId
);
List
<
Article
>
appMsgList
=
exList
.
getApp_msg_list
();
loopNum
++;
initialNum
+=
appMsgList
.
size
();
articleList
.
addAll
(
appMsgList
);
delayedSleep
();
}
List
<
Article
>
collect
=
articleList
.
stream
().
limit
(
num
).
collect
(
Collectors
.
toList
());
return
collect
;
}
/**
* 延迟睡眠
*/
@SneakyThrows
public
static
void
delayedSleep
()
{
Random
random
=
new
Random
();
int
minDelay
=
200
;
int
maxDelay
=
500
;
// 生成一个随机延迟时间,范围在minDelay和maxDelay之间
int
randomDelay
=
minDelay
+
random
.
nextInt
(
maxDelay
-
minDelay
+
1
);
Thread
.
sleep
(
randomDelay
);
}
/**
/**
* 转成java bean
* 转成java bean
* @param jsonRes json结果字符串
* @param jsonRes json结果字符串
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment