Commit c56a641a authored by 刘帅阳's avatar 刘帅阳

修改

parent 6cf5d5fc
...@@ -46,7 +46,7 @@ public class CrawlerController { ...@@ -46,7 +46,7 @@ public class CrawlerController {
* web端爬取页面数据 + 定时爬取 * web端爬取页面数据 + 定时爬取
*/ */
@GetMapping(value = "/start") @GetMapping(value = "/start")
public CyResult start() { public CyResult start() throws Exception {
CmsTask cmsTask = cmsTaskService.add("web" + new Date()); CmsTask cmsTask = cmsTaskService.add("web" + new Date());
//将用户拿出来 //将用户拿出来
String authenBusinessId = CyUserUtil.getAuthenBusinessId(); String authenBusinessId = CyUserUtil.getAuthenBusinessId();
...@@ -111,22 +111,22 @@ public class CrawlerController { ...@@ -111,22 +111,22 @@ public class CrawlerController {
String authenBusinessId = CyUserUtil.getAuthenBusinessId(); String authenBusinessId = CyUserUtil.getAuthenBusinessId();
// new Thread(() -> { // new Thread(() -> {
// 调用接口 // 调用接口
try { try {
Integer count = publicAccountCrawlerService.scanImage(publicAccountNames); Integer count = publicAccountCrawlerService.scanImage(publicAccountNames);
cmsTask.setStatus("1"); cmsTask.setStatus("1");
cmsTask.setUpdateBy(authenBusinessId); cmsTask.setUpdateBy(authenBusinessId);
cmsTask.setNum(count); cmsTask.setNum(count);
cmsTaskService.merge(cmsTask); cmsTaskService.merge(cmsTask);
} catch (Exception e) { } catch (Exception e) {
//抛出异常 //抛出异常
e.printStackTrace(); e.printStackTrace();
System.out.println("------------------" + e); System.out.println("------------------" + e);
//出现异常 修改 数据库 任务表数据 并 返回 //出现异常 修改 数据库 任务表数据 并 返回
cmsTask.setStatus("2"); cmsTask.setStatus("2");
cmsTask.setUpdateBy(authenBusinessId); cmsTask.setUpdateBy(authenBusinessId);
cmsTaskService.merge(cmsTask); cmsTaskService.merge(cmsTask);
} }
// }).start(); // }).start();
return CyResultGenUtil.builder(new CyPersistModel(1), return CyResultGenUtil.builder(new CyPersistModel(1),
CyMessCons.MESSAGE_ALERT_SUCCESS, CyMessCons.MESSAGE_ALERT_SUCCESS,
......
...@@ -54,6 +54,7 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService { ...@@ -54,6 +54,7 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService {
public static final String CS_COM_CN = "https://www.cs.com.cn/"; public static final String CS_COM_CN = "https://www.cs.com.cn/";
public static final String CBIMC_CN = "http://www.cbimc.cn/"; public static final String CBIMC_CN = "http://www.cbimc.cn/";
public static final String E_CHINALIFE_COM = "https://www.e-chinalife.com/"; public static final String E_CHINALIFE_COM = "https://www.e-chinalife.com/";
public static final String PEOPLEAPP_COM = "https://www.peopleapp.com/";
/** /**
* 指定URL * 指定URL
...@@ -366,10 +367,12 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService { ...@@ -366,10 +367,12 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService {
map = getFinancePeople(doc); map = getFinancePeople(doc);
} else if (articleUrl.contains(FINANCE_CHINA_COM_CN)) { } else if (articleUrl.contains(FINANCE_CHINA_COM_CN)) {
map = getFinanceChina(doc); map = getFinanceChina(doc);
} else if (articleUrl.contains(PEOPLEAPP_COM)) {
map = getPeopleAppCom(doc);
} }
//通过 title 判断当前文章是否跟数据库有重复 //通过 title 判断当前文章是否跟数据库有重复
String title = cmsNewsService.getNewsByTitleByTitle(map.get("title")); String title = cmsNewsService.getNewsByTitleByTitle(map.get("title"));
if (title == null) { if (title == null && map.containsKey(title) && map.containsKey("content")) {
// 图片转换 // 图片转换
Document parse = Jsoup.parse(map.get("content")); Document parse = Jsoup.parse(map.get("content"));
replaceImgSrc(parse); replaceImgSrc(parse);
...@@ -395,6 +398,16 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService { ...@@ -395,6 +398,16 @@ public class WebsiteCrawlerServiceImpl implements WebsiteCrawlerService {
} }
private Map<String, String> getPeopleAppCom(Document document) {
Map<String, String> map = new HashMap<>();
String title = document.select("div.title").html();
String content = document.select("body").html();
map.put("title", title);
map.put("content", content);
return map;
}
/** /**
* 图片转换,防止盗链 * 图片转换,防止盗链
* *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment