Authored by Gino Zhang

走conversion的情况不再出现组合词,直接走精确匹配,效果更好

... ... @@ -113,7 +113,7 @@ public class SearchRecommendServiceImpl implements ISearchRecommendService {
Double count = searchKeyWordService.getKeywordCount(RedisKeys.YOHO_SEARCH_KEYWORDS_EMPTY, queryWord);
if (count == null || queryWord.length() >= 5) {
// 1.3) 如果该关键词一次都没有出现在空结果列表或者长度大于5 则该词很有可能是可以搜索出结果的 因此优先取suggest去搜索一把 减少后面的查询动作
JSONObject recommendResult = recommendBySuggestIndex(paramMap, keywordsToSearch);
JSONObject recommendResult = recommendBySuggestIndex(paramMap, keywordsToSearch, false);
if (recommendResult == null) {
return defaultSuggestRecommendation();
} else if (CollectionUtils.isNotEmpty((List) recommendResult.get("terms_suggestion"))) {
... ... @@ -135,7 +135,7 @@ public class SearchRecommendServiceImpl implements ISearchRecommendService {
return defaultSuggestRecommendation();
}
JSONObject recommendResult = recommendBySuggestIndex(paramMap, keywordsToSearch);
JSONObject recommendResult = recommendBySuggestIndex(paramMap, keywordsToSearch, dest != null);
if (recommendResult == null || CollectionUtils.isEmpty((List) recommendResult.get("terms_suggestion"))) {
recommendResult = defaultSuggestRecommendation();
}
... ... @@ -182,9 +182,10 @@ public class SearchRecommendServiceImpl implements ISearchRecommendService {
*
* @param paramMap 搜索参数,用于判断取哪个count
* @param keywordsToSearch 计算获取的推荐词列表
* @param isLimitKeywords 是否限制只能是推荐词列表里面的词
* @return 搜推荐结果
*/
private JSONObject recommendBySuggestIndex(Map<String, String> paramMap, String keywordsToSearch) {
private JSONObject recommendBySuggestIndex(Map<String, String> paramMap, String keywordsToSearch, boolean isLimitKeywords) {
String srcQueryWord = paramMap.get(SearchRequestParams.PARAM_SEARCH_KEYWORD);
long begin = System.currentTimeMillis();
logger.info("[func=recommendBySuggestIndex][srcQueryWord={}][keywordsToSearch={}][begin={}]", srcQueryWord, keywordsToSearch, begin);
... ... @@ -195,7 +196,13 @@ public class SearchRecommendServiceImpl implements ISearchRecommendService {
}
// 2) 先对keywordsToSearch进行分词
List<String> terms = searchKeyWordService.getAnalyzeTerms(keywordsToSearch, "ik_smart", true);
List<String> terms = null;
if (isLimitKeywords) {
terms = Arrays.stream(keywordsToSearch.split(",")).filter(term -> term != null && term.length() > 1).distinct().collect(Collectors.toList());
} else {
terms = searchKeyWordService.getAnalyzeTerms(keywordsToSearch, "ik_smart", true);
}
if (CollectionUtils.isEmpty(terms)) {
return new JSONObject();
}
... ... @@ -207,39 +214,8 @@ public class SearchRecommendServiceImpl implements ISearchRecommendService {
final String countField = suggestService.getCountField(paramMap);
SearchParam searchParam = new SearchParam();
// 3.1) 对于suggest的multi-fields至少要有一个字段匹配到 匹配打分为常量1
MultiMatchQueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(keywordsToSearch.trim().toLowerCase(),
"keyword", "keyword.keyword_ik", "keyword.keyword_pinyin", "keyword.keyword_jianpin", "keyword.keyword_lowercase")
.analyzer("ik_smart")
.type(MultiMatchQueryBuilder.Type.BEST_FIELDS)
.operator(MatchQueryBuilder.Operator.OR)
.minimumShouldMatch("1");
FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(QueryBuilders.constantScoreQuery(queryBuilder));
for (String term : termSet) {
// 3.2) 对于完全匹配Term的加1分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("standardKeyword", CharUtils.standardized(term)),
ScoreFunctionBuilders.weightFactorFunction(1));
// 3.3) 对于匹配到一个Term的加2分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_ik", term),
ScoreFunctionBuilders.weightFactorFunction(2));
}
// 3.4) 处理性别相关的关键词
if (terms.contains("男") && !terms.contains("女")) {
// 给女生相关减分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_ik", "女"),
ScoreFunctionBuilders.weightFactorFunction(-5));
} else if (!terms.contains("男") && terms.contains("女")) {
// 给男生相关减分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_ik", "男"),
ScoreFunctionBuilders.weightFactorFunction(-5));
}
functionScoreQueryBuilder.boostMode(CombineFunction.SUM);
searchParam.setQuery(functionScoreQueryBuilder);
QueryBuilder queryBuilder = isLimitKeywords ? buildQueryBuilderByLimit(terms) : buildQueryBuilder(keywordsToSearch, termSet);
searchParam.setQuery(queryBuilder);
searchParam.setPage(1);
searchParam.setSize(SMART_SUGGESTION_TERM_COUNT);
... ... @@ -248,6 +224,9 @@ public class SearchRecommendServiceImpl implements ISearchRecommendService {
boolFilter.must(QueryBuilders.termQuery("status", VALID_STATUS));
boolFilter.must(QueryBuilders.rangeQuery(countField).gte(SMART_SUGGESTION_COUNT_LIMIT));
boolFilter.mustNot(QueryBuilders.termQuery("standardKeyword", CharUtils.standardized(srcQueryWord)));
if (isLimitKeywords) {
boolFilter.must(QueryBuilders.termsQuery("standardKeyword", termSet.stream().map(term -> CharUtils.standardized(term)).collect(Collectors.toList())));
}
searchParam.setFiter(boolFilter);
// 3.6) 按照得分、权重、数量的规则降序排序
... ... @@ -281,6 +260,57 @@ public class SearchRecommendServiceImpl implements ISearchRecommendService {
return suggestResult;
}
private QueryBuilder buildQueryBuilderByLimit(List<String> terms) {
FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(QueryBuilders.matchAllQuery());
// 给品类类型的关键词加分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("type", Integer.valueOf(2)),
ScoreFunctionBuilders.weightFactorFunction(3));
// 按词出现的顺序加分
for (int i = 0; i < terms.size(); i++) {
functionScoreQueryBuilder.add(QueryBuilders.termQuery("standardKeyword", CharUtils.standardized(terms.get(i))),
ScoreFunctionBuilders.weightFactorFunction(terms.size() - i));
}
functionScoreQueryBuilder.boostMode(CombineFunction.SUM);
return functionScoreQueryBuilder;
}
private QueryBuilder buildQueryBuilder(String keywordsToSearch, Set<String> termSet) {
// 3.1) 对于suggest的multi-fields至少要有一个字段匹配到 匹配打分为常量1
MultiMatchQueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(keywordsToSearch.trim().toLowerCase(),
"keyword", "keyword.keyword_ik", "keyword.keyword_pinyin", "keyword.keyword_jianpin", "keyword.keyword_lowercase")
.analyzer("ik_smart")
.type(MultiMatchQueryBuilder.Type.BEST_FIELDS)
.operator(MatchQueryBuilder.Operator.OR)
.minimumShouldMatch("1");
FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(QueryBuilders.constantScoreQuery(queryBuilder));
for (String term : termSet) {
// 3.2) 对于完全匹配Term的加1分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("standardKeyword", CharUtils.standardized(term)),
ScoreFunctionBuilders.weightFactorFunction(1));
// 3.3) 对于匹配到一个Term的加2分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_ik", term),
ScoreFunctionBuilders.weightFactorFunction(2));
}
// 3.4) 处理性别相关的关键词
if (termSet.contains("男") && !termSet.contains("女")) {
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_ik", "女"),
ScoreFunctionBuilders.weightFactorFunction(-5));
} else if (!termSet.contains("男") && termSet.contains("女")) {
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_ik", "男"),
ScoreFunctionBuilders.weightFactorFunction(-5));
}
functionScoreQueryBuilder.boostMode(CombineFunction.SUM);
return functionScoreQueryBuilder;
}
private boolean containsProductInSearchResult(SearchApiResult searchResult) {
JSONObject dataMap = ((JSONObject) searchResult.getData());
return CollectionUtils.isNotEmpty((List) dataMap.get("product_list"));
... ...