Commit 986f9134 authored by 盖献康's avatar 盖献康

通过文章url爬取内容

parent 26e9a211
...@@ -55,6 +55,13 @@ ...@@ -55,6 +55,13 @@
<version>4.12</version> <version>4.12</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<!-- 爬取页面 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
</dependencies> </dependencies>
</project> </project>
\ No newline at end of file
package top.iszsq.weixin; package top.iszsq.weixin;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import top.iszsq.weixin.api.WeiXinApi; import top.iszsq.weixin.api.WeiXinApi;
import top.iszsq.weixin.awt.MyImageShowFrame; import top.iszsq.weixin.awt.MyImageShowFrame;
import top.iszsq.weixin.model.Article; import top.iszsq.weixin.model.Article;
...@@ -14,10 +18,12 @@ import java.io.IOException; ...@@ -14,10 +18,12 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Scanner; import java.util.Scanner;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask; import java.util.concurrent.FutureTask;
import java.util.stream.IntStream;
/** /**
* 程序主入口 * 程序主入口
...@@ -38,7 +44,7 @@ public class Application { ...@@ -38,7 +44,7 @@ public class Application {
myImageShowFrame = new MyImageShowFrame(); myImageShowFrame = new MyImageShowFrame();
// 1. POST请求开始登录接口,初始化cookie // 1. POST请求开始登录接口,初始化cookie
String sessionid = "" + System.currentTimeMillis() + (int)(Math.random()*100); String sessionid = "" + System.currentTimeMillis() + (int) (Math.random() * 100);
WxResultBody wxResultBody = WeiXinApi.startLogin(sessionid); WxResultBody wxResultBody = WeiXinApi.startLogin(sessionid);
System.out.println("---请求开始登录接口 返回结果:" + wxResultBody.toString()); System.out.println("---请求开始登录接口 返回结果:" + wxResultBody.toString());
...@@ -125,7 +131,7 @@ public class Application { ...@@ -125,7 +131,7 @@ public class Application {
List<BizData> list = searchBiz.getList(); List<BizData> list = searchBiz.getList();
int ii = 1; int ii = 1;
for (BizData bizData : list) { for (BizData bizData : list) {
System.out.println( ii + ":" + bizData.getNickname()); System.out.println(ii + ":" + bizData.getNickname());
ii++; ii++;
} }
...@@ -138,7 +144,7 @@ public class Application { ...@@ -138,7 +144,7 @@ public class Application {
System.out.println("超出范围了,重选!!"); System.out.println("超出范围了,重选!!");
} }
break; break;
} catch (Exception e){ } catch (Exception e) {
System.out.println("你得输入一个数字!!!!"); System.out.println("你得输入一个数字!!!!");
} }
} }
...@@ -150,10 +156,22 @@ public class Application { ...@@ -150,10 +156,22 @@ public class Application {
List<Article> exList = findExList.getApp_msg_list(); List<Article> exList = findExList.getApp_msg_list();
for (Article article : exList) { for (Article article : exList) {
System.out.println("---" + article.getTitle() + "-----" + article.getLink()); System.out.println("---" + article.getTitle() + "-----" + article.getLink());
Document document = Jsoup.connect(article.getLink()).get();
Element entiryElement = document.getElementById("img-content");
if (entiryElement != null) {
String articleTitle = Objects.requireNonNull(entiryElement.select("#activity-name")).text();
System.out.println("标题---" + articleTitle + "----");
Element mainContent = entiryElement.getElementById("js_content");
assert mainContent != null;
System.out.println("内容---" + mainContent.text() + "----");
Elements imgs = mainContent.getElementsByTag("img");
for (Element img : imgs) {
System.out.println("图片---" + img.attr("data-src"));
}
}
} }
} }
} }
} }
...@@ -133,7 +133,7 @@ public class WeiXinApi { ...@@ -133,7 +133,7 @@ public class WeiXinApi {
Map<String, String> params = new HashMap<>(10); Map<String, String> params = new HashMap<>(10);
params.put("action", "search_biz"); params.put("action", "search_biz");
params.put("begin", "0"); params.put("begin", "0");
params.put("count", "5"); params.put("count", "10");
params.put("query", keyword); params.put("query", keyword);
params.put("token", MyCookieStore.getToken()); params.put("token", MyCookieStore.getToken());
params.put("lang", "zh_CN"); params.put("lang", "zh_CN");
...@@ -155,7 +155,7 @@ public class WeiXinApi { ...@@ -155,7 +155,7 @@ public class WeiXinApi {
Map<String, String> params = new HashMap<>(10); Map<String, String> params = new HashMap<>(10);
params.put("action", "list_ex"); params.put("action", "list_ex");
params.put("begin", "0"); params.put("begin", "0");
params.put("count", "5"); params.put("count", "10");
params.put("fakeid", fakeid); params.put("fakeid", fakeid);
params.put("token", MyCookieStore.getToken()); params.put("token", MyCookieStore.getToken());
params.put("type", "9"); params.put("type", "9");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment