Commit c56a641a authored by 刘帅阳's avatar 刘帅阳

修改

parent 6cf5d5fc
......@@ -46,7 +46,7 @@ public class CrawlerController {
* web端爬取页面数据 + 定时爬取
*/
@GetMapping(value = "/start")
public CyResult start() {
public CyResult start() throws Exception {
CmsTask cmsTask = cmsTaskService.add("web" + new Date());
//将用户拿出来
String authenBusinessId = CyUserUtil.getAuthenBusinessId();
......
......@@ -54,6 +54,7 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService {
public static final String CS_COM_CN = "https://www.cs.com.cn/";
public static final String CBIMC_CN = "http://www.cbimc.cn/";
public static final String E_CHINALIFE_COM = "https://www.e-chinalife.com/";
public static final String PEOPLEAPP_COM = "https://www.peopleapp.com/";
/**
* 指定URL
......@@ -366,10 +367,12 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService {
map = getFinancePeople(doc);
} else if (articleUrl.contains(FINANCE_CHINA_COM_CN)) {
map = getFinanceChina(doc);
} else if (articleUrl.contains(PEOPLEAPP_COM)) {
map = getPeopleAppCom(doc);
}
//通过 title 判断当前文章是否跟数据库有重复
String title = cmsNewsService.getNewsByTitleByTitle(map.get("title"));
if (title == null) {
if (title == null && map.containsKey(title) && map.containsKey("content")) {
// 图片转换
Document parse = Jsoup.parse(map.get("content"));
replaceImgSrc(parse);
......@@ -395,6 +398,16 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService {
}
private Map<String, String> getPeopleAppCom(Document document) {
Map<String, String> map = new HashMap<>();
String title = document.select("div.title").html();
String content = document.select("body").html();
map.put("title", title);
map.put("content", content);
return map;
}
/**
* 图片转换,防止盗链
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment