...
|
...
|
@@ -3,79 +3,162 @@ package com.yoho.search.service.personalized; |
|
|
import java.util.Collections;
|
|
|
import java.util.Comparator;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
|
|
|
import org.apache.commons.lang.StringUtils;
|
|
|
import org.elasticsearch.common.lucene.search.function.CombineFunction;
|
|
|
import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
|
|
|
import org.elasticsearch.index.query.BoolQueryBuilder;
|
|
|
import org.elasticsearch.index.query.QueryBuilder;
|
|
|
import org.elasticsearch.index.query.QueryBuilders;
|
|
|
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
|
|
|
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder;
|
|
|
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
|
|
|
import org.slf4j.Logger;
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
import com.yoho.search.base.utils.ISearchConstants;
|
|
|
import com.yoho.search.core.personalized.PConsts;
|
|
|
import com.yoho.search.service.personalized.model.SearchFeature;
|
|
|
import com.yoho.search.service.service.SearchDynamicConfigService;
|
|
|
import com.yoho.search.service.utils.HttpServletRequestUtils;
|
|
|
|
|
|
@Service
|
|
|
public final class PersonalizedSearch {
|
|
|
|
|
|
private static final float MaxUserFeatureBoost = 50;
|
|
|
|
|
|
@Autowired
|
|
|
private UserFeaturesRedis userFeatures;
|
|
|
@Autowired
|
|
|
private SearchDynamicConfigService dynamicConfig;
|
|
|
|
|
|
private static final float MaxUserFeatureBoost = 50;
|
|
|
private static final Logger PERSONALIZED = LoggerFactory.getLogger("PERSONALIZED");
|
|
|
|
|
|
// public QueryBuilder builder(QueryBuilder queryBuilder, String uid, String
|
|
|
// pageId) {
|
|
|
// QueryBuilder qBuilder = null;
|
|
|
// List<SearchFeature> sfRedis = userFeatures.getUserFeaturesFromRedis(uid,
|
|
|
// pageId);
|
|
|
// if (sfRedis == null || sfRedis.isEmpty()) {
|
|
|
// qBuilder = queryBuilder;
|
|
|
// } else {
|
|
|
// BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
|
|
|
// boolQueryBuilder.must(queryBuilder);
|
|
|
//
|
|
|
// List<SearchFeature> searchFeatures = null;
|
|
|
// // should个数超过一定数量, 按权重值desc排序, 截取数据
|
|
|
// if (sfRedis.size() > ISearchConstants.PERSONALIZED_SEARCH_SHOULD_MAX) {
|
|
|
// // 按照boost进行排序
|
|
|
// Collections.sort(sfRedis, new Comparator<SearchFeature>() {
|
|
|
// @Override
|
|
|
// public int compare(SearchFeature left, SearchFeature right) {
|
|
|
// Float fleft = Float.valueOf(left.getBoost());
|
|
|
// Float fright = Float.valueOf(right.getBoost());
|
|
|
// return fright.compareTo(fleft);
|
|
|
// }
|
|
|
// });
|
|
|
// searchFeatures = sfRedis.subList(0,
|
|
|
// ISearchConstants.PERSONALIZED_SEARCH_SHOULD_MAX);
|
|
|
// } else {
|
|
|
// searchFeatures = sfRedis;
|
|
|
// }
|
|
|
// // 获取用户最大的boost
|
|
|
// float maxBoost = this.getMaxBoost(searchFeatures);
|
|
|
// boolean isFuzzySearch = this.isFuzzySearch(pageId);
|
|
|
// for (SearchFeature searchFeature : searchFeatures) {
|
|
|
// float boost = searchFeature.getBoost();
|
|
|
// if (isFuzzySearch) {
|
|
|
// boost = getAdaptoredBoost(maxBoost, boost);
|
|
|
// }
|
|
|
// boolQueryBuilder.should(QueryBuilders.termQuery(searchFeature.getTargetParam(),
|
|
|
// searchFeature.getParamValues()).boost(boost));
|
|
|
// }
|
|
|
// qBuilder = boolQueryBuilder;
|
|
|
// }
|
|
|
//
|
|
|
// float factor = getFunctionScoreFactor(pageId);
|
|
|
// // new_score = old_score * log(2 + factor * page_boosts)
|
|
|
// String fieldName = getFuncScoreField(pageId);
|
|
|
// FunctionScoreQueryBuilder fsQueryBuilder = new
|
|
|
// FunctionScoreQueryBuilder(qBuilder);
|
|
|
// fsQueryBuilder.add(
|
|
|
// ScoreFunctionBuilders.fieldValueFactorFunction(fieldName).factor(factor).modifier(FieldValueFactorFunction.Modifier.LOG2P)
|
|
|
// .missing(PConsts.PRODUCT_FUNCTION_MISSING_VALUE)).boostMode(CombineFunction.MULT);
|
|
|
//
|
|
|
// return fsQueryBuilder;
|
|
|
// }
|
|
|
|
|
|
private String getUidFromParamMap(Map<String, String> paramMap) {
|
|
|
return paramMap.get("uid");
|
|
|
}
|
|
|
|
|
|
public QueryBuilder builder(QueryBuilder queryBuilder, String uid, String pageId) {
|
|
|
QueryBuilder qBuilder = null;
|
|
|
private String getPageIdFromParamMap(Map<String, String> paramMap) {
|
|
|
String pageId = paramMap.get("pageId");
|
|
|
if (StringUtils.isBlank(pageId)) {
|
|
|
pageId = PConsts.PAGE_ID_NEW;
|
|
|
}
|
|
|
return pageId;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* 针对用户特征,对用户进行加分
|
|
|
*
|
|
|
* @param queryBuilder
|
|
|
* @param uid
|
|
|
* @param pageId
|
|
|
* @return
|
|
|
*/
|
|
|
public QueryBuilder buildPersonalizedQueryBuilder(QueryBuilder queryBuilder, Map<String, String> paramMap) {
|
|
|
PERSONALIZED.info("do personal search , paramString is [{}]", HttpServletRequestUtils.genParamString(paramMap));
|
|
|
|
|
|
// 2、获取用户信息和页面信息
|
|
|
String uid = this.getUidFromParamMap(paramMap);
|
|
|
String pageId = this.getPageIdFromParamMap(paramMap);
|
|
|
List<SearchFeature> sfRedis = userFeatures.getUserFeaturesFromRedis(uid, pageId);
|
|
|
if (sfRedis == null || sfRedis.isEmpty()) {
|
|
|
qBuilder = queryBuilder;
|
|
|
} else {
|
|
|
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
|
|
|
boolQueryBuilder.must(queryBuilder);
|
|
|
|
|
|
List<SearchFeature> searchFeatures = null;
|
|
|
// should个数超过一定数量, 按权重值desc排序, 截取数据
|
|
|
if (sfRedis.size() > ISearchConstants.PERSONALIZED_SEARCH_SHOULD_MAX) {
|
|
|
// 按照boost进行排序
|
|
|
Collections.sort(sfRedis, new Comparator<SearchFeature>() {
|
|
|
@Override
|
|
|
public int compare(SearchFeature left, SearchFeature right) {
|
|
|
Float fleft = Float.valueOf(left.getBoost());
|
|
|
Float fright = Float.valueOf(right.getBoost());
|
|
|
return fright.compareTo(fleft);
|
|
|
}
|
|
|
});
|
|
|
searchFeatures = sfRedis.subList(0, ISearchConstants.PERSONALIZED_SEARCH_SHOULD_MAX);
|
|
|
} else {
|
|
|
searchFeatures = sfRedis;
|
|
|
}
|
|
|
// 获取用户最大的boost
|
|
|
float maxBoost = this.getMaxBoost(searchFeatures);
|
|
|
boolean isFuzzySearch = this.isFuzzySearch(pageId);
|
|
|
for (SearchFeature searchFeature : searchFeatures) {
|
|
|
float boost = searchFeature.getBoost();
|
|
|
if (isFuzzySearch) {
|
|
|
boost = getAdaptoredBoost(maxBoost, boost);
|
|
|
return queryBuilder;
|
|
|
}
|
|
|
|
|
|
// 3、对 用户权重进行排序,should个数超过一定数量, 按权重值desc排序, 截取数据
|
|
|
List<SearchFeature> searchFeatures = null;
|
|
|
if (sfRedis.size() > ISearchConstants.PERSONALIZED_SEARCH_SHOULD_MAX) {
|
|
|
// 按照boost进行排序
|
|
|
Collections.sort(sfRedis, new Comparator<SearchFeature>() {
|
|
|
@Override
|
|
|
public int compare(SearchFeature left, SearchFeature right) {
|
|
|
Float fleft = Float.valueOf(left.getBoost());
|
|
|
Float fright = Float.valueOf(right.getBoost());
|
|
|
return fright.compareTo(fleft);
|
|
|
}
|
|
|
boolQueryBuilder.should(QueryBuilders.termQuery(searchFeature.getTargetParam(), searchFeature.getParamValues()).boost(boost));
|
|
|
});
|
|
|
searchFeatures = sfRedis.subList(0, ISearchConstants.PERSONALIZED_SEARCH_SHOULD_MAX);
|
|
|
} else {
|
|
|
searchFeatures = sfRedis;
|
|
|
}
|
|
|
|
|
|
// 4、对用户权重 进行加分
|
|
|
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
|
|
|
boolQueryBuilder.must(queryBuilder);
|
|
|
float maxBoost = this.getMaxBoost(searchFeatures);// 获取用户最大的boost,已调整boost值
|
|
|
boolean isFuzzySearch = this.isFuzzySearch(pageId);
|
|
|
for (SearchFeature searchFeature : searchFeatures) {
|
|
|
float boost = searchFeature.getBoost();
|
|
|
if (isFuzzySearch) {
|
|
|
boost = getAdaptoredBoost(maxBoost, boost);
|
|
|
}
|
|
|
qBuilder = boolQueryBuilder;
|
|
|
boolQueryBuilder.should(QueryBuilders.termQuery(searchFeature.getTargetParam(), searchFeature.getParamValues()).boost(boost));
|
|
|
}
|
|
|
return boolQueryBuilder;
|
|
|
}
|
|
|
|
|
|
public ScoreFunctionBuilder getPersonalizedScoreFunctionBuilder(Map<String, String> paramMap) {
|
|
|
String pageId = this.getPageIdFromParamMap(paramMap);
|
|
|
float factor = getFunctionScoreFactor(pageId);
|
|
|
// new_score = old_score * log(2 + factor * page_boosts)
|
|
|
String fieldName = getFuncScoreField(pageId);
|
|
|
FunctionScoreQueryBuilder fsQueryBuilder = new FunctionScoreQueryBuilder(qBuilder);
|
|
|
fsQueryBuilder.add(
|
|
|
ScoreFunctionBuilders.fieldValueFactorFunction(fieldName).factor(factor).modifier(FieldValueFactorFunction.Modifier.LOG2P)
|
|
|
.missing(PConsts.PRODUCT_FUNCTION_MISSING_VALUE)).boostMode(CombineFunction.MULT);
|
|
|
|
|
|
return fsQueryBuilder;
|
|
|
return ScoreFunctionBuilders.fieldValueFactorFunction(fieldName).factor(factor).modifier(FieldValueFactorFunction.Modifier.LOG2P)
|
|
|
.missing(PConsts.PRODUCT_FUNCTION_MISSING_VALUE);
|
|
|
}
|
|
|
|
|
|
|
|
|
private float getMaxBoost(List<SearchFeature> searchFeatures) {
|
|
|
float boost = 0f;
|
|
|
for (SearchFeature searchFeature : searchFeatures) {
|
...
|
...
|
@@ -114,7 +197,7 @@ public final class PersonalizedSearch { |
|
|
public static void main(String[] args) {
|
|
|
System.out.println(getAdaptoredBoost(459.90198f, 109.67697f));
|
|
|
}
|
|
|
|
|
|
|
|
|
private float getFunctionScoreFactor(String pageId) {
|
|
|
float factor = 0.0f;
|
|
|
// 对于模糊搜索, 需要把商品特征的权重得分降低, 其余的按其排序
|
...
|
...
|
|