Authored by Gino Zhang

解析时按照顺序处理 且最多支持10个词

... ... @@ -25,6 +25,8 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
private static final Logger logger = LoggerFactory.getLogger("FLOW_EXECUTOR");
private static final int MAX_KEYWORDS = 10;
private static final String INDEX_NAME = ISearchConstants.INDEX_NAME_PRODUCT_INDEX;
private static final String ANALYZER = "ik_max_word";
... ... @@ -35,6 +37,8 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
private volatile Map<String, Set<String>> yohoKeywords = new HashMap<>();
private volatile List<String> yohoKeywordList = new ArrayList<>();
@Autowired
private SuggestConversionService suggestConversionService;
... ... @@ -72,38 +76,66 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
}
private void initAllYohoKeywords() {
Set<String> resultSet = new HashSet<>(2000);
List<Brand> brandList = brandService.getBrandPageLists(1, Integer.MAX_VALUE);
resultSet.addAll(brandList.stream().map(Brand::getBrandName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
resultSet.addAll(brandList.stream().map(Brand::getBrandNameEn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
resultSet.addAll(brandList.stream().map(Brand::getBrandNameCn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadBrand={}]", resultSet.size());
// 使用List是希望能保持按【sort -> style -> maker -> brand】顺序优先处理
List<String> keywordList = new ArrayList<>();
Set<String> keywordSet = new HashSet<>(3000);
addKeywords(keywordSet, keywordList, getSortNameSet());
addKeywords(keywordSet, keywordList, getStyleNameSet());
addKeywords(keywordSet, keywordList, getParameterMakeNameSet());
addKeywords(keywordSet, keywordList, getBrandNameSet());
this.yohoKeywordList = keywordList;
Set<String> tokens;
for (String keyword : keywordSet) {
if (keyword.length() > 1 && !IGNORE_KEYWORDS.contains(keyword) && CollectionUtils.isNotEmpty(tokens = getTokens(keyword))) {
yohoKeywords.put(keyword, tokens);
}
}
}
private void addKeywords(Set<String> keywordSet, List<String> keywordList, Set<String> tempSet) {
for (String temp : tempSet) {
if (!keywordSet.contains(temp)) {
keywordSet.add(temp);
keywordList.add(temp);
}
}
logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoad={}]", keywordSet.size());
}
private Set<String> getSortNameSet() {
Set<String> sortNameSet = new HashSet<>(300);
List<ProductSort> productSortList = productSortService.getPageLists(1, Integer.MAX_VALUE);
resultSet.addAll(productSortList.stream().map(ProductSort::getSortName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
for (ProductSort productSort : productSortList) {
// 有一些sortName是用斜杠连接多个词(如[凉鞋/凉拖]),需要split处理一下
String sortName = productSort.getSortName();
for (String itemName : sortName.split("\\/")) {
resultSet.add(itemName);
sortNameSet.add(itemName);
}
}
logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadSort={}]", resultSet.size());
return sortNameSet;
}
private Set<String> getStyleNameSet() {
List<Style> styleList = styleService.getStylePageLists(1, Integer.MAX_VALUE);
resultSet.addAll(styleList.stream().map(Style::getStyleName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadStyle={}]", resultSet.size());
return styleList.stream().map(Style::getStyleName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet());
}
private Set<String> getParameterMakeNameSet() {
List<ParameterMake> parameterMakeList = parameterMakeService.getAll();
resultSet.addAll(parameterMakeList.stream().map(ParameterMake::getParameterValue).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
logger.info("[func=SuggestConversionFlow.init][resultSetAfterLoadMake={}]", resultSet.size());
return parameterMakeList.stream().map(ParameterMake::getParameterValue).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet());
}
Set<String> tokens;
for (String keyword : resultSet) {
if (keyword.length() > 1 && !IGNORE_KEYWORDS.contains(keyword) && CollectionUtils.isNotEmpty(tokens = getTokens(keyword))) {
yohoKeywords.put(keyword, tokens);
}
}
private Set<String> getBrandNameSet() {
Set<String> brandNameSet = new HashSet<>(3000);
List<Brand> brandList = brandService.getBrandPageLists(1, Integer.MAX_VALUE);
brandNameSet.addAll(brandList.stream().map(Brand::getBrandName).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
brandNameSet.addAll(brandList.stream().map(Brand::getBrandNameEn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
brandNameSet.addAll(brandList.stream().map(Brand::getBrandNameCn).filter(name -> StringUtils.isNotBlank(name)).collect(Collectors.toSet()));
return brandNameSet;
}
@Override
... ... @@ -154,11 +186,16 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
}
StringBuffer sb = new StringBuffer();
yohoKeywords.forEach((keyword, keywordTokens) -> {
if (tokenMatch(contentTokens, keywordTokens)) {
int count = 0;
for (String keyword : yohoKeywordList) {
if (tokenMatch(contentTokens, yohoKeywords.get(keyword))) {
count++;
sb.append(keyword).append(",");
if (count == MAX_KEYWORDS) {
break;
}
}
});
}
if (sb.length() > 1) {
return sb.substring(0, sb.length() - 1);
... ... @@ -168,6 +205,10 @@ public class SuggestConversionFlow implements RetryBusinessFlow {
}
private boolean tokenMatch(Set<String> contentTokens, Set<String> keywordTokens) {
if (CollectionUtils.isEmpty(keywordTokens)) {
return false;
}
// 判断是否匹配yoho关键词
// 当前的设计考虑的是严格匹配 即需要匹配所有词
return contentTokens.containsAll(keywordTokens);
... ...