Commit 986f9134 authored by 盖献康's avatar 盖献康

通过文章url爬取内容

parent 26e9a211
......@@ -55,6 +55,13 @@
<version>4.12</version>
<scope>test</scope>
</dependency>
<!-- 爬取页面 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
</project>
package top.iszsq.weixin;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import top.iszsq.weixin.api.WeiXinApi;
import top.iszsq.weixin.awt.MyImageShowFrame;
import top.iszsq.weixin.model.Article;
......@@ -14,10 +18,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Scanner;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
import java.util.stream.IntStream;
/**
* 程序主入口
......@@ -38,7 +44,7 @@ public class Application {
myImageShowFrame = new MyImageShowFrame();
// 1. POST请求开始登录接口,初始化cookie
String sessionid = "" + System.currentTimeMillis() + (int)(Math.random()*100);
String sessionid = "" + System.currentTimeMillis() + (int) (Math.random() * 100);
WxResultBody wxResultBody = WeiXinApi.startLogin(sessionid);
System.out.println("---请求开始登录接口 返回结果:" + wxResultBody.toString());
......@@ -125,7 +131,7 @@ public class Application {
List<BizData> list = searchBiz.getList();
int ii = 1;
for (BizData bizData : list) {
System.out.println( ii + ":" + bizData.getNickname());
System.out.println(ii + ":" + bizData.getNickname());
ii++;
}
......@@ -138,7 +144,7 @@ public class Application {
System.out.println("超出范围了,重选!!");
}
break;
} catch (Exception e){
} catch (Exception e) {
System.out.println("你得输入一个数字!!!!");
}
}
......@@ -150,10 +156,22 @@ public class Application {
List<Article> exList = findExList.getApp_msg_list();
for (Article article : exList) {
System.out.println("---" + article.getTitle() + "-----" + article.getLink());
Document document = Jsoup.connect(article.getLink()).get();
Element entiryElement = document.getElementById("img-content");
if (entiryElement != null) {
String articleTitle = Objects.requireNonNull(entiryElement.select("#activity-name")).text();
System.out.println("标题---" + articleTitle + "----");
Element mainContent = entiryElement.getElementById("js_content");
assert mainContent != null;
System.out.println("内容---" + mainContent.text() + "----");
Elements imgs = mainContent.getElementsByTag("img");
for (Element img : imgs) {
System.out.println("图片---" + img.attr("data-src"));
}
}
}
}
}
}
......@@ -133,7 +133,7 @@ public class WeiXinApi {
Map<String, String> params = new HashMap<>(10);
params.put("action", "search_biz");
params.put("begin", "0");
params.put("count", "5");
params.put("count", "10");
params.put("query", keyword);
params.put("token", MyCookieStore.getToken());
params.put("lang", "zh_CN");
......@@ -155,7 +155,7 @@ public class WeiXinApi {
Map<String, String> params = new HashMap<>(10);
params.put("action", "list_ex");
params.put("begin", "0");
params.put("count", "5");
params.put("count", "10");
params.put("fakeid", fakeid);
params.put("token", MyCookieStore.getToken());
params.put("type", "9");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment