Authored by Gino Zhang

代码重构

1 package com.yoho.search.spider.increment; 1 package com.yoho.search.spider.increment;
2 2
3 -import com.yoho.core.redis.YHZSetOperations;  
4 import com.yoho.search.base.utils.RedisKeys; 3 import com.yoho.search.base.utils.RedisKeys;
5 import com.yoho.search.consumer.index.common.AnalyzerHelper; 4 import com.yoho.search.consumer.index.common.AnalyzerHelper;
6 import com.yoho.search.consumer.service.base.SpiderContentService; 5 import com.yoho.search.consumer.service.base.SpiderContentService;
@@ -14,11 +13,9 @@ import org.apache.commons.lang.StringUtils; @@ -14,11 +13,9 @@ import org.apache.commons.lang.StringUtils;
14 import org.slf4j.Logger; 13 import org.slf4j.Logger;
15 import org.slf4j.LoggerFactory; 14 import org.slf4j.LoggerFactory;
16 import org.springframework.beans.factory.annotation.Autowired; 15 import org.springframework.beans.factory.annotation.Autowired;
17 -import org.springframework.data.redis.core.ZSetOperations;  
18 import org.springframework.stereotype.Component; 16 import org.springframework.stereotype.Component;
19 import org.springframework.util.Assert; 17 import org.springframework.util.Assert;
20 18
21 -import javax.annotation.Resource;  
22 import java.io.UnsupportedEncodingException; 19 import java.io.UnsupportedEncodingException;
23 import java.net.URLEncoder; 20 import java.net.URLEncoder;
24 import java.util.ArrayList; 21 import java.util.ArrayList;
@@ -37,11 +34,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { @@ -37,11 +34,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow {
37 34
38 private static final Logger REPORT_LOGGER = LoggerFactory.getLogger("CONSUMER_REPORTER"); 35 private static final Logger REPORT_LOGGER = LoggerFactory.getLogger("CONSUMER_REPORTER");
39 36
40 - private static final int KEYWORD_COUNT = 100;  
41 -  
42 - @Resource(name = "yhNoSyncZSetOperations")  
43 - private YHZSetOperations<String, String> yhNoSyncZSetOperations;  
44 -  
45 @Autowired 37 @Autowired
46 private SpiderContentService spiderContentService; 38 private SpiderContentService spiderContentService;
47 39
@@ -70,23 +62,12 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { @@ -70,23 +62,12 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow {
70 succeedKeywords.clear(); 62 succeedKeywords.clear();
71 failedKeywords.clear(); 63 failedKeywords.clear();
72 64
73 - Set<String> keywordSet = new HashSet<>(200);  
74 - Set<String> topEmptySeachKeywords = new HashSet<>(100);  
75 - Set<String> topLessSeachKeywords = new HashSet<>(100);  
76 - Set<ZSetOperations.TypedTuple<String>> redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(RedisKeys.YOHO_SEARCH_KEYWORDS_EMPTY), 0, KEYWORD_COUNT);  
77 - for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {  
78 - topEmptySeachKeywords.add(typedTuple.getValue());  
79 - } 65 + // 只是为了输出conversion调用情况
  66 + incrementCrawlerService.getTopSeachKeywords(RedisKeys.YOHO_SEARCH_KEYWORDS_TIPS, 1000);
80 67
81 - redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(RedisKeys.YOHO_SEARCH_KEYWORDS_LESS), 0, KEYWORD_COUNT);  
82 - for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {  
83 - topLessSeachKeywords.add(typedTuple.getValue());  
84 - }  
85 -  
86 - REPORT_LOGGER.info("[key=TopEmptySeachKeywords][topEmptySeachKeywords={}]", topEmptySeachKeywords);  
87 - REPORT_LOGGER.info("[key=TopLessSeachKeywords][topLessSeachKeywords={}]", topLessSeachKeywords);  
88 - keywordSet.addAll(topEmptySeachKeywords);  
89 - keywordSet.addAll(topLessSeachKeywords); 68 + Set<String> keywordSet = new HashSet<>(200);
  69 + keywordSet.addAll(incrementCrawlerService.getTopSeachKeywords(RedisKeys.YOHO_SEARCH_KEYWORDS_EMPTY, 100));
  70 + keywordSet.addAll(incrementCrawlerService.getTopSeachKeywords(RedisKeys.YOHO_SEARCH_KEYWORDS_LESS, 100));
90 logger.info("[func=IncrementCrawlerFlow.init][keywordSetSize={}]", keywordSet.size()); 71 logger.info("[func=IncrementCrawlerFlow.init][keywordSetSize={}]", keywordSet.size());
91 if (keywordSet.isEmpty()) { 72 if (keywordSet.isEmpty()) {
92 return; 73 return;
@@ -105,20 +86,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { @@ -105,20 +86,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow {
105 this.validKeywordList = keywordSet.parallelStream().filter(keyword -> validKeyword(keyword)).collect(Collectors.toList()); 86 this.validKeywordList = keywordSet.parallelStream().filter(keyword -> validKeyword(keyword)).collect(Collectors.toList());
106 logger.info("[func=IncrementCrawlerFlow.init][validKeywordListSize={}]", validKeywordList != null ? validKeywordList.size() : 0); 87 logger.info("[func=IncrementCrawlerFlow.init][validKeywordListSize={}]", validKeywordList != null ? validKeywordList.size() : 0);
107 REPORT_LOGGER.info("[key=ValidKeywordList][validIncrementKeywords={}]", validKeywordList); 88 REPORT_LOGGER.info("[key=ValidKeywordList][validIncrementKeywords={}]", validKeywordList);
108 -  
109 - reportSuggestConversionRequestKeywords();  
110 - }  
111 -  
112 - private void reportSuggestConversionRequestKeywords() {  
113 - if (REPORT_LOGGER.isInfoEnabled()) {  
114 - List<String> resultList = new ArrayList<>(10000);  
115 - Set<ZSetOperations.TypedTuple<String>> redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(RedisKeys.YOHO_SEARCH_KEYWORDS_TIPS), 0, 10000);  
116 - for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {  
117 - resultList.add(typedTuple.getValue() + "|" + typedTuple.getScore());  
118 - }  
119 -  
120 - REPORT_LOGGER.info("[key=SuggestConversionRequestKeywords][suggestConversionRequestKeywords={}]", resultList);  
121 - }  
122 } 89 }
123 90
124 private boolean validKeyword(String keyword) { 91 private boolean validKeyword(String keyword) {
1 package com.yoho.search.spider.increment; 1 package com.yoho.search.spider.increment;
2 2
  3 +import com.yoho.core.redis.YHZSetOperations;
  4 +import com.yoho.search.base.utils.RedisKeys;
3 import com.yoho.search.consumer.service.base.SpiderContentService; 5 import com.yoho.search.consumer.service.base.SpiderContentService;
4 import com.yoho.search.dal.model.SpiderContent; 6 import com.yoho.search.dal.model.SpiderContent;
5 import com.yoho.search.spider.common.BaikeBO; 7 import com.yoho.search.spider.common.BaikeBO;
@@ -8,11 +10,13 @@ import org.apache.commons.lang.StringUtils; @@ -8,11 +10,13 @@ import org.apache.commons.lang.StringUtils;
8 import org.slf4j.Logger; 10 import org.slf4j.Logger;
9 import org.slf4j.LoggerFactory; 11 import org.slf4j.LoggerFactory;
10 import org.springframework.beans.factory.annotation.Autowired; 12 import org.springframework.beans.factory.annotation.Autowired;
  13 +import org.springframework.data.redis.core.ZSetOperations;
11 import org.springframework.stereotype.Component; 14 import org.springframework.stereotype.Component;
12 import org.springframework.util.Assert; 15 import org.springframework.util.Assert;
13 16
  17 +import javax.annotation.Resource;
14 import java.net.URLEncoder; 18 import java.net.URLEncoder;
15 -import java.util.Arrays; 19 +import java.util.*;
16 20
17 /** 21 /**
18 * Created by ginozhang on 2017/3/3. 22 * Created by ginozhang on 2017/3/3.
@@ -22,12 +26,31 @@ public class IncrementCrawlerService { @@ -22,12 +26,31 @@ public class IncrementCrawlerService {
22 26
23 private static final Logger logger = LoggerFactory.getLogger("SEARCH_SPIDER"); 27 private static final Logger logger = LoggerFactory.getLogger("SEARCH_SPIDER");
24 28
  29 + private static final Logger REPORT_LOGGER = LoggerFactory.getLogger("CONSUMER_REPORTER");
  30 +
25 @Autowired 31 @Autowired
26 private SpiderContentService spiderContentService; 32 private SpiderContentService spiderContentService;
27 33
28 @Autowired 34 @Autowired
29 private SpiderBasedHttpRequest spiderBasedHttpRequest; 35 private SpiderBasedHttpRequest spiderBasedHttpRequest;
30 36
  37 + @Resource(name = "yhNoSyncZSetOperations")
  38 + private YHZSetOperations<String, String> yhNoSyncZSetOperations;
  39 +
  40 + public Set<String> getTopSeachKeywords(String redisTemplate, int count){
  41 + List<String> resultList = new ArrayList<>(count);
  42 + Set<String> keywordSet = new HashSet<>(count);
  43 +
  44 + Set<ZSetOperations.TypedTuple<String>> redisResults = yhNoSyncZSetOperations.reverseRangeWithScores(RedisKeys.getRedisKey4Yesterday(redisTemplate), 0, count);
  45 + for (ZSetOperations.TypedTuple<String> typedTuple : redisResults) {
  46 + resultList.add(typedTuple.getValue() + "|" + typedTuple.getScore());
  47 + keywordSet.add(typedTuple.getValue());
  48 + }
  49 +
  50 + REPORT_LOGGER.info("[key=TopSeachKeywords][RedisKeyTemplate={}][topSeachKeywords={}]", redisTemplate, resultList);
  51 + return keywordSet;
  52 + }
  53 +
31 public boolean crawleKeyword(String keyword) { 54 public boolean crawleKeyword(String keyword) {
32 Assert.isTrue(StringUtils.isNotEmpty(keyword)); 55 Assert.isTrue(StringUtils.isNotEmpty(keyword));
33 SpiderContent spiderContent = spiderContentService.selectSpiderContentBySubject(keyword); 56 SpiderContent spiderContent = spiderContentService.selectSpiderContentBySubject(keyword);