Authored by Gino Zhang

改进搜索提示suggest索引的搜索算法 优先推荐关联term更多的suggest的词

package com.yoho.search.service.service;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.yoho.core.redis.YHRedisTemplate;
import com.yoho.core.redis.YHZSetOperations;
import com.yoho.search.base.utils.DateStyle;
import com.yoho.search.base.utils.DateUtil;
import com.yoho.search.base.utils.ISearchConstants;
import com.yoho.search.base.utils.RedisKeys;
import com.yoho.search.core.es.IElasticsearchClient;
import com.yoho.search.core.es.impl.YohoIndexHelper;
import com.yoho.search.base.utils.RedisKeys;
import com.yoho.search.service.vo.KeyWordWithCount;
import org.apache.commons.lang.StringUtils;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse.AnalyzeToken;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
... ... @@ -16,10 +19,12 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.ZSetOperations;
import org.springframework.data.redis.core.ZSetOperations.TypedTuple;
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;
import javax.annotation.Resource;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
... ... @@ -72,8 +77,35 @@ public class SearchKeyWordService {
* @param keyWord
* @return
*/
public List<String> getAnalyzeTerms(String keyWord, String analyzer) {
public List<String> getAnalyzeTerms(final String keyWord, final String analyzer, boolean useCache) {
if (!useCache) {
return getAnalyzeTermsDirect(keyWord, analyzer);
}
try {
return CacheBuilder.newBuilder()
.maximumSize(10000)
.expireAfterWrite(10, TimeUnit.MINUTES)
.build(new CacheLoader<String, List<String>>() {
@Override
public List<String> load(String cacheKey) throws Exception {
String[] arrays = cacheKey.split("@", 2);
Assert.isTrue(arrays != null && arrays.length == 2);
return getAnalyzeTermsDirect(arrays[1], arrays[0]);
}
}).get(analyzer + "@" + keyWord);
} catch (ExecutionException e) {
logger.error(keyWord, e);
return new ArrayList<>();
}
}
public List<String> getAnalyzeTermsDirect(String keyWord, String analyzer) {
try {
if (StringUtils.isEmpty(keyWord)) {
return new ArrayList<>();
}
List<AnalyzeToken> tokens = getAnalyzeTokens(keyWord, analyzer);
List<String> results = new ArrayList<String>();
for (AnalyzeToken analyzeToken : tokens) {
... ... @@ -86,7 +118,7 @@ public class SearchKeyWordService {
}
}
public void recordSuggestTip(String queryWord){
public void recordSuggestTip(String queryWord) {
recordKeyWord(RedisKeys.YOHO_SEARCH_KEYWORDS_TIPS, queryWord);
}
... ... @@ -216,12 +248,11 @@ public class SearchKeyWordService {
}
}
public String deleteRedisKey(String redisKey){
if(yhNoSyncRedisTemplate.hasKey(redisKey)){
public String deleteRedisKey(String redisKey) {
if (yhNoSyncRedisTemplate.hasKey(redisKey)) {
yhNoSyncRedisTemplate.delete(redisKey);
return "The key has been deleted succede!";
}
else{
} else {
return "The key doesn't exist.";
}
}
... ...
... ... @@ -18,6 +18,7 @@ import com.yoho.search.service.vo.SearchApiResult;
import com.yoho.search.service.vo.SuggestApiResult;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.elasticsearch.common.lucene.search.function.CombineFunction;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
... ... @@ -256,22 +257,44 @@ public class SuggestServiceImpl implements ISuggestService, ApplicationEventPubl
}
public JSONObject suggestTipsBySuggestIndex(Map<String, String> paramMap) {
// 1) 对query进行判断 为空时不处理
String queryWord = paramMap.get(SearchRequestParams.PARAM_SEARCH_KEYWORD);
long begin = System.currentTimeMillis();
logger.info("[func=suggestTipsBySuggestIndex][query={}][begin={}]", queryWord, begin);
// 1) 先对query进行分词
List<String> terms = searchKeyWordService.getAnalyzeTerms(queryWord, "ik_smart", true);
if (CollectionUtils.isEmpty(terms)) {
return null;
}
Set<String> termSet = terms.stream().collect(Collectors.toSet());
logger.info("[func=suggestTipsBySuggestIndex][termSet={}]", termSet);
// 2) 根据terms搜索构造搜索请求
final String countField = getCountField(paramMap);
SearchParam searchParam = new SearchParam();
MultiMatchQueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(queryWord);
queryBuilder.field("keyword").field("keyword.keyword_ik", 10F).field("keyword.keyword_pinyin").field("keyword.keyword_jianpin")
// 2.1) 对于suggest的multi-fields至少要有一个字段匹配到20% 匹配打分为常量1
MultiMatchQueryBuilder queryBuilder = QueryBuilders.multiMatchQuery(queryWord.trim().toLowerCase(),
"keyword", "keyword.keyword_ik", "keyword.keyword_pinyin", "keyword.keyword_jianpin", "keyword.keyword_lowercase")
.analyzer("ik_smart")
.type(MultiMatchQueryBuilder.Type.BEST_FIELDS)
.operator(MatchQueryBuilder.Operator.OR)
.minimumShouldMatch("20%");
searchParam.setQuery(queryBuilder);
FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(QueryBuilders.constantScoreQuery(queryBuilder));
for (String term : termSet) {
// 2.2) 对于完全匹配Term的加1分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_lowercase", term.trim().toLowerCase()),
ScoreFunctionBuilders.weightFactorFunction(1));
// 2.3) 对于匹配到一个Term的加2分
functionScoreQueryBuilder.add(QueryBuilders.termQuery("keyword.keyword_ik", term),
ScoreFunctionBuilders.weightFactorFunction(2));
}
functionScoreQueryBuilder.boostMode(CombineFunction.SUM);
searchParam.setQuery(functionScoreQueryBuilder);
searchParam.setPage(1);
searchParam.setSize(SMART_SUGGESTION_TERM_COUNT);
... ... @@ -280,6 +303,13 @@ public class SuggestServiceImpl implements ISuggestService, ApplicationEventPubl
boolFilter.mustNot(QueryBuilders.termQuery("keyword.keyword_lowercase", queryWord.trim().toLowerCase()));
searchParam.setFiter(boolFilter);
// 2.4) 按照得分、权重、数量的规则降序排序
List<SortBuilder> sortBuilders = new ArrayList<>(3);
sortBuilders.add(SortBuilders.fieldSort("_score").order(SortOrder.DESC));
sortBuilders.add(SortBuilders.fieldSort("weight").order(SortOrder.DESC));
sortBuilders.add(SortBuilders.fieldSort(countField).order(SortOrder.DESC));
searchParam.setSortBuilders(sortBuilders);
// 3) 先从缓存中获取
final String indexName = ISearchConstants.INDEX_NAME_SUGGEST;
JSONObject suggestResult = searchCacheService.getJSONObjectFromCache(indexName, searchParam);
... ...