Authored by Gino Zhang

增加锁机制 避免并发支持爬虫

... ... @@ -64,7 +64,7 @@ public class SuggestConvertorFlow implements RetryBusinessFlow {
return true;
}
List<SpiderContent> filteredContentList = spiderContentList.stream().filter(spiderContent -> !existSourceSet.contains(spiderContent.getSubject())).collect(Collectors.toList());
List<SpiderContent> filteredContentList = spiderContentList.stream().filter(spiderContent -> spiderContent != null && !existSourceSet.contains(spiderContent.getSubject())).collect(Collectors.toList());
logger.info("[func=SuggestConversionFlow.doBusiness][pageNo={}][spiderContentListSize={}][filteredContentListSize={}]", pageNo, spiderContentList.size(), filteredContentList.size());
if (CollectionUtils.isEmpty(filteredContentList)) {
return true;
... ...
... ... @@ -10,6 +10,8 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* Created by ginozhang on 2017/3/2.
*/
... ... @@ -27,6 +29,9 @@ public class SpiderJob {
@Autowired
private DynamicConfigService dynamicConfigService;
// 避免连续触发
private volatile AtomicBoolean lockStatus = new AtomicBoolean(false);
@Scheduled(cron = "0 30 0 * * ?")
public void crawleEmptySearchKeywords() {
// 分析前一天前1000个搜索无结果或者小于10个的关键词 到baidubaike爬虫获取内容
... ... @@ -37,9 +42,15 @@ public class SpiderJob {
return;
}
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(incrementCrawlerFlow);
boolean result = flowExecutor.execute();
LOGGER.info("[func=crawleEmptySearchKeywords.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
if (lockStatus.compareAndSet(false, true)) {
try {
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(incrementCrawlerFlow);
boolean result = flowExecutor.execute();
LOGGER.info("[func=crawleEmptySearchKeywords.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
} finally {
lockStatus.set(false);
}
}
}
@Scheduled(cron = "0 40 1 * * ?")
... ... @@ -52,8 +63,14 @@ public class SpiderJob {
return;
}
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(suggestConvertorFlow);
boolean result = flowExecutor.execute();
LOGGER.info("[func=convertSpiderContents.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
if (lockStatus.compareAndSet(false, true)) {
try {
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(suggestConvertorFlow);
boolean result = flowExecutor.execute();
LOGGER.info("[func=convertSpiderContents.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
} finally {
lockStatus.set(false);
}
}
}
}
... ...