Authored by Gino Zhang

解析时按照顺序处理 且最多支持10个词

@@ -25,6 +25,8 @@ public class SuggestConversionFlow implements RetryBusinessFlow { @@ -25,6 +25,8 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
25 25
26 private static final Logger logger = LoggerFactory.getLogger("FLOW_EXECUTOR"); 26 private static final Logger logger = LoggerFactory.getLogger("FLOW_EXECUTOR");
27 27
  28 + private static final int MAX_KEYWORDS = 10;
  29 +
28 private static final String INDEX_NAME = ISearchConstants.INDEX_NAME_PRODUCT_INDEX; 30 private static final String INDEX_NAME = ISearchConstants.INDEX_NAME_PRODUCT_INDEX;
29 31
30 private static final String ANALYZER = "ik_max_word"; 32 private static final String ANALYZER = "ik_max_word";
@@ -35,6 +37,8 @@ public class SuggestConversionFlow implements RetryBusinessFlow { @@ -35,6 +37,8 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
35 37
36 private volatile Map<String, Set<String>> yohoKeywords = new HashMap<>(); 38 private volatile Map<String, Set<String>> yohoKeywords = new HashMap<>();
37 39
  40 + private volatile List<String> yohoKeywordList = new ArrayList<>();
  41 +
38 @Autowired 42 @Autowired
39 private SuggestConversionService suggestConversionService; 43 private SuggestConversionService suggestConversionService;
40 44
@@ -72,38 +76,66 @@ public class SuggestConversionFlow implements RetryBusinessFlow { @@ -72,38 +76,66 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
72 } 76 }
73 77
74 private void initAllYohoKeywords() { 78 private void initAllYohoKeywords() {
75 - Set<String> resultSet = new HashSet<>(2000);  
76 - List<Brand> brandList = brandService.getBrandPageLists(1, Integer.MAX_VALUE);  
77 - resultSet.addAll(brandList.stream().map(Brand::getBrandName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));  
78 - resultSet.addAll(brandList.stream().map(Brand::getBrandNameEn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));  
79 - resultSet.addAll(brandList.stream().map(Brand::getBrandNameCn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));  
80 - logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadBrand={}]", resultSet.size()); 79 + // 使用List是希望能保持按【sort -> style -> maker -> brand】顺序优先处理
  80 + List<String> keywordList = new ArrayList<>();
  81 + Set<String> keywordSet = new HashSet<>(3000);
  82 +
  83 + addKeywords(keywordSet, keywordList, getSortNameSet());
  84 + addKeywords(keywordSet, keywordList, getStyleNameSet());
  85 + addKeywords(keywordSet, keywordList, getParameterMakeNameSet());
  86 + addKeywords(keywordSet, keywordList, getBrandNameSet());
81 87
  88 + this.yohoKeywordList = keywordList;
  89 + Set<String> tokens;
  90 + for (String keyword : keywordSet) {
  91 + if (keyword.length() > 1 && !IGNORE_KEYWORDS.contains(keyword) && CollectionUtils.isNotEmpty(tokens = getTokens(keyword))) {
  92 + yohoKeywords.put(keyword, tokens);
  93 + }
  94 + }
  95 + }
  96 +
  97 + private void addKeywords(Set<String> keywordSet, List<String> keywordList, Set<String> tempSet) {
  98 + for (String temp : tempSet) {
  99 + if (!keywordSet.contains(temp)) {
  100 + keywordSet.add(temp);
  101 + keywordList.add(temp);
  102 + }
  103 + }
  104 +
  105 + logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoad={}]", keywordSet.size());
  106 + }
  107 +
  108 + private Set<String> getSortNameSet() {
  109 + Set<String> sortNameSet = new HashSet<>(300);
82 List<ProductSort> productSortList = productSortService.getPageLists(1, Integer.MAX_VALUE); 110 List<ProductSort> productSortList = productSortService.getPageLists(1, Integer.MAX_VALUE);
83 - resultSet.addAll(productSortList.stream().map(ProductSort::getSortName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));  
84 for (ProductSort productSort : productSortList) { 111 for (ProductSort productSort : productSortList) {
85 // 有一些sortName是用斜杠连接多个词(如[凉鞋/凉拖]),需要split处理一下 112 // 有一些sortName是用斜杠连接多个词(如[凉鞋/凉拖]),需要split处理一下
86 String sortName = productSort.getSortName(); 113 String sortName = productSort.getSortName();
87 for (String itemName : sortName.split("\\/")) { 114 for (String itemName : sortName.split("\\/")) {
88 - resultSet.add(itemName); 115 + sortNameSet.add(itemName);
89 } 116 }
90 } 117 }
91 118
92 - logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadSort={}]", resultSet.size()); 119 + return sortNameSet;
  120 + }
  121 +
  122 + private Set<String> getStyleNameSet() {
93 List<Style> styleList = styleService.getStylePageLists(1, Integer.MAX_VALUE); 123 List<Style> styleList = styleService.getStylePageLists(1, Integer.MAX_VALUE);
94 - resultSet.addAll(styleList.stream().map(Style::getStyleName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));  
95 - logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadStyle={}]", resultSet.size()); 124 + return styleList.stream().map(Style::getStyleName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet());
  125 + }
96 126
  127 + private Set<String> getParameterMakeNameSet() {
97 List<ParameterMake> parameterMakeList = parameterMakeService.getAll(); 128 List<ParameterMake> parameterMakeList = parameterMakeService.getAll();
98 - resultSet.addAll(parameterMakeList.stream().map(ParameterMake::getParameterValue).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));  
99 - logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadMake={}]", resultSet.size()); 129 + return parameterMakeList.stream().map(ParameterMake::getParameterValue).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet());
  130 + }
100 131
101 - Set<String> tokens;  
102 - for (String keyword : resultSet) {  
103 - if (keyword.length() > 1 && !IGNORE_KEYWORDS.contains(keyword) && CollectionUtils.isNotEmpty(tokens = getTokens(keyword))) {  
104 - yohoKeywords.put(keyword, tokens);  
105 - }  
106 - } 132 + private Set<String> getBrandNameSet() {
  133 + Set<String> brandNameSet = new HashSet<>(3000);
  134 + List<Brand> brandList = brandService.getBrandPageLists(1, Integer.MAX_VALUE);
  135 + brandNameSet.addAll(brandList.stream().map(Brand::getBrandName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
  136 + brandNameSet.addAll(brandList.stream().map(Brand::getBrandNameEn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
  137 + brandNameSet.addAll(brandList.stream().map(Brand::getBrandNameCn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
  138 + return brandNameSet;
107 } 139 }
108 140
109 @Override 141 @Override
@@ -154,11 +186,16 @@ public class SuggestConversionFlow implements RetryBusinessFlow { @@ -154,11 +186,16 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
154 } 186 }
155 187
156 StringBuffer sb = new StringBuffer(); 188 StringBuffer sb = new StringBuffer();
157 - yohoKeywords.forEach((keyword, keywordTokens) -> {  
158 - if (tokenMatch(contentTokens, keywordTokens)) { 189 + int count = 0;
  190 + for (String keyword : yohoKeywordList) {
  191 + if (tokenMatch(contentTokens, yohoKeywords.get(keyword))) {
  192 + count++;
159 sb.append(keyword).append(","); 193 sb.append(keyword).append(",");
  194 + if (count == MAX_KEYWORDS) {
  195 + break;
  196 + }
160 } 197 }
161 - }); 198 + }
162 199
163 if (sb.length() > 1) { 200 if (sb.length() > 1) {
164 return sb.substring(0, sb.length() - 1); 201 return sb.substring(0, sb.length() - 1);
@@ -168,6 +205,10 @@ public class SuggestConversionFlow implements RetryBusinessFlow { @@ -168,6 +205,10 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
168 } 205 }
169 206
170 private boolean tokenMatch(Set<String> contentTokens, Set<String> keywordTokens) { 207 private boolean tokenMatch(Set<String> contentTokens, Set<String> keywordTokens) {
  208 + if (CollectionUtils.isEmpty(keywordTokens)) {
  209 + return false;
  210 + }
  211 +
171 // 判断是否匹配yoho关键词 212 // 判断是否匹配yoho关键词
172 // 当前的设计考虑的是严格匹配 即需要匹配所有词 213 // 当前的设计考虑的是严格匹配 即需要匹配所有词
173 return contentTokens.containsAll(keywordTokens); 214 return contentTokens.containsAll(keywordTokens);