Authored by Gino Zhang

删除DynSegWord相关的废弃代码

package com.yoho.search.dal;
import java.util.List;
import com.yoho.search.dal.model.DynSegWord;
import org.apache.ibatis.annotations.Param;
public interface DynSegWordMapper {
void insertBatch(List<DynSegWord> list);
List<DynSegWord> selectByWordType(@Param(value = "wordType") Integer wordType);
int selectWordsCount(@Param(value = "updateSinceThisTime") Integer updateSinceThisTime);
}
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="com.yoho.search.dal.DynSegWordMapper">
<resultMap id="DynSegWordResultMap" type="com.yoho.search.dal.model.DynSegWord">
<id column="id" property="id" jdbcType="INTEGER" />
<result column="word_type" property="wordType" jdbcType="INTEGER" />
<result column="word" property="word" jdbcType="VARCHAR" />
<result column="last_update_time" property="lastUpdateTime"
jdbcType="INTEGER" />
</resultMap>
<sql id="DynSegWord_Column_List">
id, word_type, word, last_update_time
</sql>
<insert id="insertBatch" parameterType="java.util.List" timeout="20000">
insert ignore into dyn_seg_word (word_type, word, last_update_time)
values
<foreach collection="list" item="item" index="index"
separator=",">
(#{item.wordType,jdbcType=INTEGER},
#{item.word,jdbcType=VARCHAR},
#{item.lastUpdateTime,jdbcType=INTEGER})
</foreach>
</insert>
<select id="selectByWordType" resultMap="DynSegWordResultMap"
timeout="20000">
select
<include refid="DynSegWord_Column_List" />
from dyn_seg_word where word_type = #{wordType,jdbcType=INTEGER}
</select>
<select id="selectWordsCount" resultType="java.lang.Integer"
timeout="20000">
SELECT count(1) FROM dyn_seg_word where
last_update_time >=
#{updateSinceThisTime,jdbcType=INTEGER} LIMIT 1
</select>
</mapper>
\ No newline at end of file
... ... @@ -12,15 +12,9 @@ import javax.servlet.http.HttpServletRequest;
import java.util.*;
@Controller
public class DynSegWordsToolsController {
public class DictToolsController {
private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class);
@Autowired
private UpgradeDynSegWordsFlagService upgradeDynSegWordsFlagService;
@Autowired
private DynSegWordsService dynSegWordsService;
private static Logger logger = LoggerFactory.getLogger(DictToolsController.class);
@Autowired
private EsWordDefLogicService esWordDefLogicService;
... ... @@ -45,31 +39,6 @@ public class DynSegWordsToolsController {
allDictFiles.put("synonyms.dic", Integer.valueOf(7)); // 同义词规则
}
/**
* 批量插入字典文件中的词到数据库表
*
* @return 处理结果
*/
@RequestMapping(value = "/dictTools/loadDictFileToDB")
@ResponseBody
public Map<String, Object> loadDictFileToDB() {
Map<String, Object> rtnMap = new HashMap<String, Object>();
if (upgradeDynSegWordsFlagService.isForceUpgrade()) {
rtnMap.put("code", "400");
rtnMap.put("msg", "Force upgrade now, please try later...");
return rtnMap;
}
long begin = System.currentTimeMillis();
logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB start");
List<String> wordsToAdd = dynSegWordsService.addMainWordsInFile();
rtnMap.put("code", "200");
rtnMap.put("msg", "Update force upgrade seg words succeed.");
rtnMap.put("count", wordsToAdd == null ? 0 : wordsToAdd.size());
logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB end. cost: {}",
System.currentTimeMillis() - begin);
return rtnMap;
}
/**
* 批量插入字典文件中的词到数据库表
... ...
package com.yoho.search.consumer.dynwords;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import com.yoho.search.dal.DynSegWordMapper;
import com.yoho.search.dal.model.DynSegWord;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
@Controller
public class DynSegWordsController {
private static Logger logger = LoggerFactory.getLogger(DynSegWordsController.class);
private static final int NO_RECORDS_CHANGED_STATUS_CODE = 304;
private static final int WORD_TYPE_MAIN = 1;
private static final int WORD_TYPE_STOP = 2;
private static final int DEFAULT_STARTTIME = 0;
@Autowired
private DynSegWordMapper dynSegWordMapper;
@Autowired
private UpgradeDynSegWordsFlagService upgradeDynSegWordsFlagService;
@RequestMapping(value = "/dictService/forceUpgrade")
@ResponseBody
public Map<String, Object> forceUpgradeDynWords() {
upgradeDynSegWordsFlagService.updateForceUpgradeTrue();
Map<String, Object> rtnMap = new HashMap<String, Object>();
rtnMap.put("code", "200");
rtnMap.put("msg", "Update force upgrade seg words succeed.");
return rtnMap;
}
@RequestMapping(method = RequestMethod.HEAD, value = "/dictService/getRemoteMainDict")
public void checkMainWords(HttpServletRequest request, HttpServletResponse response) {
// ES的ik分词插件发送HEAD请求 判断主词典是否有变更
logger.info("[DynSegWords]enter DynSegWordsController.checkMainWords");
checkNeedUpdateWordsDict(request, response, WORD_TYPE_MAIN);
logger.info("[DynSegWords]end DynSegWordsController.checkMainWords.");
}
@RequestMapping(method = RequestMethod.HEAD, value = "/dictService/getRemoteStopDict")
public void checkStopWords(HttpServletRequest request, HttpServletResponse response) {
// ES的ik分词插件发送HEAD请求 判断停用词是否有变更
logger.info("[DynSegWords]enter DynSegWordsController.checkStopWords");
checkNeedUpdateWordsDict(request, response, WORD_TYPE_STOP);
logger.info("[DynSegWords]end DynSegWordsController.checkStopWords.");
}
private void checkNeedUpdateWordsDict(HttpServletRequest request, HttpServletResponse response, Integer wordType) {
String esIp = getIpAddress(request);
logger.info("[DynSegWords]Begin to check need update seg words. wordType: {}, esIP: {}", wordType, esIp);
if (upgradeDynSegWordsFlagService.canUpgrade()
&& dynSegWordMapper.selectWordsCount(getLastModifyTime(request)) > 0) {
// 只有在开始全量建索引的时候才开始增加动态词库
String currentTime = String.valueOf((new Date()).getTime() / 1000L);
response.setHeader("Last-Modified", currentTime);
logger.info("[DynSegWords]Need to update seg words. wordType: {}", wordType);
} else {
response.setStatus(NO_RECORDS_CHANGED_STATUS_CODE);
logger.info("[DynSegWords]No need to update seg words. wordType: {}", wordType);
}
}
private Integer getLastModifyTime(HttpServletRequest request) {
String lastModifyTime = request.getHeader("If-Modified-Since");
logger.info("[DynSegWords]Get last modify time {} from request header.", lastModifyTime);
if (lastModifyTime != null && lastModifyTime.trim().length() > 0) {
try {
return Integer.valueOf(lastModifyTime.trim());
} catch (Exception e) {
// Ignore the exception
}
}
return DEFAULT_STARTTIME;
}
@RequestMapping(method = RequestMethod.GET, value = "/dictService/getRemoteMainDict")
public void getSegMainWords(HttpServletRequest request, HttpServletResponse response) {
logger.info("[DynSegWords]enter DynSegWordsController.getSegMainWords");
addDynWordsByType(request, response, WORD_TYPE_MAIN);
logger.info("[DynSegWords]end DynSegWordsController.getSegMainWords.");
}
@RequestMapping(method = RequestMethod.GET, value = "/dictService/getRemoteStopDict")
public void getSegStopWords(HttpServletRequest request, HttpServletResponse response) {
logger.info("[DynSegWords]enter DynSegWordsController.getSegStopWords");
addDynWordsByType(request, response, WORD_TYPE_STOP);
logger.info("[DynSegWords]end DynSegWordsController.getSegStopWords.");
}
private void addDynWordsByType(HttpServletRequest request, HttpServletResponse response, Integer wordType) {
String esIp = getIpAddress(request);
logger.info("[DynSegWords]Begin to add dyn seg words. wordType: {}, esIP: {}", wordType, esIp);
List<DynSegWord> words = dynSegWordMapper.selectByWordType(wordType);
int count = words != null ? words.size() : 0;
logger.info("[DynSegWords]Find {} words from DB for the word type {}.", count, wordType);
response.setContentType("text/html;charset=UTF-8");
if (count > 0) {
try {
for (DynSegWord word : words) {
response.getWriter().println(word.getWord());
}
} catch (IOException e) {
logger.warn("[DynSegWords]Add dyn words occur exception.", e);
}
logger.info("[DynSegWords]Send {} words to es {}.", count, esIp);
}
}
private String getIpAddress(HttpServletRequest request) {
String ip = request.getHeader("x-forwarded-for");
if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
ip = request.getHeader("Proxy-Client-IP");
}
if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
ip = request.getHeader("WL-Proxy-Client-IP");
}
if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
ip = request.getHeader("HTTP_CLIENT_IP");
}
if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
ip = request.getHeader("HTTP_X_FORWARDED_FOR");
}
if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
ip = request.getRemoteAddr();
}
return ip;
}
public static void main(String[] args) {
List<String> buffer = new ArrayList<String>();
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10000).setConnectTimeout(10000)
.setSocketTimeout(60000).build();
CloseableHttpClient httpclient = HttpClients.createDefault();
String location = "http://localhost:8080/yoho-search-consumer-web/dictService/getRemoteMainDict";
HttpGet get = new HttpGet(location);
get.setConfig(rc);
try {
CloseableHttpResponse response = httpclient.execute(get);
if (response.getStatusLine().getStatusCode() == 200) {
String charset = "UTF-8";
if (response.getEntity().getContentType().getValue().contains("charset=")) {
String contentType = response.getEntity().getContentType().getValue();
charset = contentType.substring(contentType.lastIndexOf("=") + 1);
}
BufferedReader in = new BufferedReader(
new InputStreamReader(response.getEntity().getContent(), charset));
String line;
while ((line = in.readLine()) != null) {
buffer.add(line);
}
in.close();
response.close();
}
response.close();
} catch (ClientProtocolException e) {
logger.error("getRemoteWords {} error", e, new Object[] { location });
} catch (IllegalStateException e) {
logger.error("getRemoteWords {} error", e, new Object[] { location });
} catch (IOException e) {
logger.error("getRemoteWords {} error", e, new Object[] { location });
}
System.out.println(buffer);
}
}
package com.yoho.search.consumer.dynwords;
import com.yoho.search.base.utils.FileUtils;
import com.yoho.search.dal.DynSegWordMapper;
import com.yoho.search.dal.model.DynSegWord;
import org.apache.commons.collections.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Service
public class DynSegWordsService {
private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class);
private static final int WORD_TYPE_MAIN = 1;
private static final int BATCH_NUMBER = 5000;
@Autowired
private DynSegWordMapper dynSegWordMapper;
public List<String> addMainWordsInFile() {
logger.info("[DynSegWords]DynSegWordsService.addMainWordsInFile start");
String fileName = this.getClass().getResource("/").getPath();
List<String> words = FileUtils.readFile(fileName + "/dicts/words-main.dic");
int size = words != null ? words.size() : 0;
logger.info("[DynSegWords]Size of words in the file is {}.", size);
if (size > 0) {
// 1. 先从数据库里查询出所有的
List<String> wordsInDB = getSegWordsFromDBByType(WORD_TYPE_MAIN);
logger.info("[DynSegWords]Size of words in the DB is {}.", wordsInDB.size());
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words list in the file: " + System.getProperty("line.separator") + words);
logger.debug("[DynSegWords]The words list in the DB: " + System.getProperty("line.separator")
+ wordsInDB);
}
// 2. 获取所有增量的词
words.removeAll(wordsInDB);
logger.info("[DynSegWords]Size of words need to add is {}.", words.size());
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words need to add: " + System.getProperty("line.separator") + words);
}
// 3. 分批次插入
int startIndex = 0;
List<String> batch;
while (startIndex < words.size()) {
if (startIndex + BATCH_NUMBER <= words.size()) {
logger.info("[DynSegWords]Begin to get batch words. begin: {}, end: {}", startIndex, startIndex
+ BATCH_NUMBER);
batch = words.subList(startIndex, startIndex + BATCH_NUMBER);
batchAddWords(batch);
startIndex = startIndex + BATCH_NUMBER;
} else {
logger.info("[DynSegWords]Begin to get batch words. begin: {}, end: {}", startIndex, words.size());
batch = words.subList(startIndex, words.size());
batchAddWords(batch);
break;
}
}
}
logger.info("[DynSegWords]DynSegWordsService.addMainWordsInFile finished.");
return words;
}
private void batchAddWords(List<String> batch) {
logger.info("[DynSegWords]Begin to batch insert dyn words to DB.");
if (logger.isDebugEnabled()) {
logger.debug("[DynSegWords]The words batch: " + System.getProperty("line.separator") + batch);
}
List<DynSegWord> wordList = new ArrayList<DynSegWord>();
for (String word : batch) {
wordList.add(new DynSegWord(word, WORD_TYPE_MAIN));
}
dynSegWordMapper.insertBatch(wordList);
logger.info("[DynSegWords]End to batch insert dyn words to DB.");
}
private List<String> getSegWordsFromDBByType(int wordType) {
List<String> result = new ArrayList<String>();
List<DynSegWord> wordList = dynSegWordMapper.selectByWordType(wordType);
if (CollectionUtils.isNotEmpty(wordList)) {
for (DynSegWord word : wordList) {
result.add(word.getWord());
}
}
return result;
}
}
... ... @@ -14,7 +14,7 @@ public class EsWordDefLogicService {
private static final String LINE_SEPARATOR = System.getProperty("line.separator");
private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class);
private static Logger logger = LoggerFactory.getLogger(DictToolsController.class);
private static final int BATCH_NUMBER = 5000;
... ...
package com.yoho.search.consumer.dynwords;
import com.yoho.search.base.utils.Configuration;
import com.yoho.search.base.utils.DateUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.util.Date;
@Service
public class UpgradeDynSegWordsFlagService {
private static final Logger logger = LoggerFactory.getLogger(UpgradeDynSegWordsFlagService.class);
private static final int WAIT_IK_UPGRADE_WORDS = 120000;
private static volatile boolean forceUpgrade = false;
public void updateForceUpgradeTrue() {
forceUpgrade = true;
logger.info("[DynSegWords]Update force upgrade flag to true.");
try {
Thread.sleep(WAIT_IK_UPGRADE_WORDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} finally {
forceUpgrade = false;
logger.info("[DynSegWords]Update force upgrade flag to false.");
}
}
public boolean isForceUpgrade() {
return forceUpgrade;
}
public boolean canUpgrade() {
if (forceUpgrade) {
logger.info("[DynSegWords]Return true for force upgrade.");
return true;
}
// 判断当前时间是否在允许更新词库的时间范围内
Date currentDate = new Date();
String lowerTime = Configuration.getString("upgrade.segwords.lowertime", "02:50:00");
String upperTime = Configuration.getString("upgrade.segwords.uppertime", "02:58:00");
logger.info("[DynSegWords]Current Time: {}, Lower Time: {}, Upper Time: {}", currentDate, lowerTime, upperTime);
String currentDateString = DateUtil.DateToString(currentDate, "yyyy-MM-dd");
Date lowerDate = DateUtil.StringToDate(currentDateString + " " + lowerTime, "yyyy-MM-dd HH:mm:ss");
Date upperDate = DateUtil.StringToDate(currentDateString + " " + upperTime, "yyyy-MM-dd HH:mm:ss");
boolean result = currentDate.after(lowerDate) && currentDate.before(upperDate);
logger.info("[DynSegWords]Return {} for time range limit.", result);
return result;
}
}