Authored by Gino Zhang

先执行全量的 再执行增量的 速度更快一些

... ... @@ -70,7 +70,7 @@ public class BaikeSpiderController {
@RequestMapping(value = "/spider/convertor")
@ResponseBody
public Map<String, Object> convert() {
public Map<String, Object> convertor() {
Map<String, Object> result = new HashMap();
try {
... ... @@ -85,6 +85,23 @@ public class BaikeSpiderController {
}
}
@RequestMapping(value = "/spider/incrementConvertor")
@ResponseBody
public Map<String, Object> incrementConvertor() {
Map<String, Object> result = new HashMap();
try {
spiderJob.incrementSuggestConversion();
result.put("code", 200);
result.put("message", "success");
return result;
} catch (Exception e) {
result.put("code", 500);
result.put("message", e.getMessage());
return result;
}
}
@RequestMapping(value = "/spider/conversion/update")
@ResponseBody
public Map<String, Object> updateConversion(@RequestParam String source, @RequestParam String dest) {
... ...
... ... @@ -60,26 +60,52 @@ public class SpiderJob {
@Scheduled(cron = "0 15 5 * * ?")
public void convertSpiderContents() {
// 将爬虫内容转换为关键词转换关系 需要依赖SuggestionCounter执行后再执行
if (!dynamicConfigService.isIncreasementSpiderOpen()) {
LOGGER.warn("[func=convertSpiderContents.end][message=inceasement spider is closed]");
return;
}
updateSuggestConversion();
incrementSuggestConversion();
}
public void updateSuggestConversion() {
// 执行全量的提取关系刷新
long begin = System.currentTimeMillis();
LOGGER.info("[func=convertSpiderContents.start][begin={}]", begin);
if (!dynamicConfigService.isIncreasementSpiderOpen()) {
LOGGER.warn("[func=convertSpiderContents.end][message=inceasement spider is closed]");
LOGGER.warn("[func=updateSuggestConversion.end][message=inceasement spider is closed]");
return;
}
if (lockStatus.compareAndSet(false, true)) {
try {
LOGGER.info("[func=updateSuggestConversion.begin][cost={}]", System.currentTimeMillis() - begin);
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(suggestConversionUpdateFlow);
boolean result = flowExecutor.execute();
LOGGER.info("[func=updateSuggestConversion.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
} finally {
lockStatus.set(false);
}
}
}
public void incrementSuggestConversion() {
// 执行增量的关系提取 只提取suggest_conversion表不存在的
long begin = System.currentTimeMillis();
LOGGER.info("[func=incrementSuggestConversion.start][begin={}]", begin);
if (!dynamicConfigService.isIncreasementSpiderOpen()) {
LOGGER.warn("[func=incrementSuggestConversion.end][message=inceasement spider is closed]");
return;
}
if (lockStatus.compareAndSet(false, true)) {
try {
// 再执行增量的转换
LOGGER.info("[func=incrementSuggestConversion.begin][cost={}]", System.currentTimeMillis() - begin);
RetryBusinessFlowExecutor flowExecutor = new RetryBusinessFlowExecutor(suggestConvertorFlow);
boolean result = flowExecutor.execute();
LOGGER.info("[func=suggestConvertorFlow.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
if (!result) {
LOGGER.warn("Increment subjects convertor failed and skip to full convertor refresh.");
return;
}
flowExecutor = new RetryBusinessFlowExecutor(suggestConversionUpdateFlow);
result = flowExecutor.execute();
LOGGER.info("[func=suggestConversionUpdateFlow.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
LOGGER.info("[func=incrementSuggestConversion.end][result={}][cost={}]", result, System.currentTimeMillis() - begin);
} finally {
lockStatus.set(false);
}
... ...