|
|
package com.yoho.search.spider.job;
|
|
|
|
|
|
import com.yoho.search.consumer.common.DynamicConfigService;
|
|
|
import com.yoho.search.consumer.suggests.common.RetryBusinessFlowExecutor;
|
|
|
import com.yoho.search.spider.conversation.SuggestConvertorFlow;
|
|
|
import com.yoho.search.spider.service.KeywordCrawlerFlow;
|
...
|
...
|
@@ -23,11 +24,19 @@ public class SpiderJob { |
|
|
@Autowired
|
|
|
private SuggestConvertorFlow suggestConvertorFlow;
|
|
|
|
|
|
@Autowired
|
|
|
private DynamicConfigService dynamicConfigService;
|
|
|
|
|
|
@Scheduled(cron = "0 30 0 * * ?")
|
|
|
public void crawleEmptySearchKeywords() {
|
|
|
// 分析前一天前1000个搜索无结果或者小于10个的关键词 到baidubaike爬虫获取内容
|
|
|
long begin = System.currentTimeMillis();
|
|
|
LOGGER.info("[func=crawleEmptySearchKeywords.start][begin={}]", begin);
|
|
|
if (!dynamicConfigService.isIncreasementSpiderOpen()) {
|
|
|
LOGGER.warn("[func=crawleEmptySearchKeywords.end][message=inceasement spider is closed]");
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(keywordCrawlerFlow);
|
|
|
boolean result = flowExecutor.execute();
|
|
|
LOGGER.info("[func=crawleEmptySearchKeywords.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
|
...
|
...
|
@@ -38,6 +47,11 @@ public class SpiderJob { |
|
|
// 将爬虫内容转换为关键词转换关系
|
|
|
long begin = System.currentTimeMillis();
|
|
|
LOGGER.info("[func=convertSpiderContents.start][begin={}]", begin);
|
|
|
if (!dynamicConfigService.isIncreasementSpiderOpen()) {
|
|
|
LOGGER.warn("[func=convertSpiderContents.end][message=inceasement spider is closed]");
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(suggestConvertorFlow);
|
|
|
boolean result = flowExecutor.execute();
|
|
|
LOGGER.info("[func=convertSpiderContents.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
|
...
|
...
|
|