Authored by Gino Zhang

代码重构

package com.yoho.search.spider.increment;
import com.yoho.core.redis.YHZSetOperations;
import com.yoho.search.base.utils.RedisKeys;
import com.yoho.search.consumer.index.common.AnalyzerHelper;
import com.yoho.search.consumer.service.base.SpiderContentService;
... ... @@ -14,11 +13,9 @@ import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.ZSetOperations;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
import javax.annotation.Resource;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
... ... @@ -37,11 +34,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow {
private static final Logger REPORT_LOGGER = LoggerFactory.getLogger("CONSUMER_REPORTER");
private static final int KEYWORD_COUNT = 100;
@Resource(name = "yhNoSyncZSetOperations")
private YHZSetOperations<String, String> yhNoSyncZSetOperations;
@Autowired
private SpiderContentService spiderContentService;
... ... @@ -70,23 +62,12 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow {
succeedKeywords.clear();
failedKeywords.clear();
Set<String> keywordSet = new HashSet<>(200);
Set<String> topEmptySeachKeywords = new HashSet<>(100);
Set<String> topLessSeachKeywords = new HashSet<>(100);
Set<ZSetOperations.TypedTuple<String>> redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(RedisKeys.YOHO_SEARCH_KEYWORDS_EMPTY), 0, KEYWORD_COUNT);
for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {
topEmptySeachKeywords.add(typedTuple.getValue());
}
// 只是为了输出conversion调用情况
incrementCrawlerService.getTopSeachKeywords(RedisKeys.YOHO_SEARCH_KEYWORDS_TIPS, 1000);
redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(RedisKeys.YOHO_SEARCH_KEYWORDS_LESS), 0, KEYWORD_COUNT);
for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {
topLessSeachKeywords.add(typedTuple.getValue());
}
REPORT_LOGGER.info("[key=TopEmptySeachKeywords][topEmptySeachKeywords={}]", topEmptySeachKeywords);
REPORT_LOGGER.info("[key=TopLessSeachKeywords][topLessSeachKeywords={}]", topLessSeachKeywords);
keywordSet.addAll(topEmptySeachKeywords);
keywordSet.addAll(topLessSeachKeywords);
Set<String> keywordSet = new HashSet<>(200);
keywordSet.addAll(incrementCrawlerService.getTopSeachKeywords(RedisKeys.YOHO_SEARCH_KEYWORDS_EMPTY, 100));
keywordSet.addAll(incrementCrawlerService.getTopSeachKeywords(RedisKeys.YOHO_SEARCH_KEYWORDS_LESS, 100));
logger.info("[func=IncrementCrawlerFlow.init][keywordSetSize={}]", keywordSet.size());
if (keywordSet.isEmpty()) {
return;
... ... @@ -105,20 +86,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow {
this.validKeywordList = keywordSet.parallelStream().filter(keyword -> validKeyword(keyword)).collect(Collectors.toList());
logger.info("[func=IncrementCrawlerFlow.init][validKeywordListSize={}]", validKeywordList != null ? validKeywordList.size() : 0);
REPORT_LOGGER.info("[key=ValidKeywordList][validIncrementKeywords={}]", validKeywordList);
reportSuggestConversionRequestKeywords();
}
private void reportSuggestConversionRequestKeywords() {
if (REPORT_LOGGER.isInfoEnabled()) {
List<String> resultList = new ArrayList<>(10000);
Set<ZSetOperations.TypedTuple<String>> redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(RedisKeys.YOHO_SEARCH_KEYWORDS_TIPS), 0, 10000);
for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {
resultList.add(typedTuple.getValue() + "|" + typedTuple.getScore());
}
REPORT_LOGGER.info("[key=SuggestConversionRequestKeywords][suggestConversionRequestKeywords={}]", resultList);
}
}
private boolean validKeyword(String keyword) {
... ...
package com.yoho.search.spider.increment;
import com.yoho.core.redis.YHZSetOperations;
import com.yoho.search.base.utils.RedisKeys;
import com.yoho.search.consumer.service.base.SpiderContentService;
import com.yoho.search.dal.model.SpiderContent;
import com.yoho.search.spider.common.BaikeBO;
... ... @@ -8,11 +10,13 @@ import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.ZSetOperations;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
import javax.annotation.Resource;
import java.net.URLEncoder;
import java.util.Arrays;
import java.util.*;
/**
* Created by ginozhang on 2017/3/3.
... ... @@ -22,12 +26,31 @@ public class IncrementCrawlerService {
private static final Logger logger = LoggerFactory.getLogger("SEARCH_SPIDER");
private static final Logger REPORT_LOGGER = LoggerFactory.getLogger("CONSUMER_REPORTER");
@Autowired
private SpiderContentService spiderContentService;
@Autowired
private SpiderBasedHttpRequest spiderBasedHttpRequest;
@Resource(name = "yhNoSyncZSetOperations")
private YHZSetOperations<String, String> yhNoSyncZSetOperations;
public Set<String> getTopSeachKeywords(String redisTemplate, int count){
List<String> resultList = new ArrayList<>(count);
Set<String> keywordSet = new HashSet<>(count);
Set<ZSetOperations.TypedTuple<String>> redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(redisTemplate), 0, count);
for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {
resultList.add(typedTuple.getValue() + "|" + typedTuple.getScore());
keywordSet.add(typedTuple.getValue());
}
REPORT_LOGGER.info("[key=TopSeachKeywords][RedisKeyTemplate={}][topSeachKeywords={}]", redisTemplate, resultList);
return keywordSet;
}
public boolean crawleKeyword(String keyword) {
Assert.isTrue(StringUtils.isNotEmpty(keyword));
SpiderContent spiderContent = spiderContentService.selectSpiderContentBySubject(keyword);
... ...