Authored by Gino Zhang

删除DynSegWord相关的废弃代码

1 -package com.yoho.search.dal;  
2 -  
3 -import java.util.List;  
4 -  
5 -import com.yoho.search.dal.model.DynSegWord;  
6 -import org.apache.ibatis.annotations.Param;  
7 -  
8 -public interface DynSegWordMapper {  
9 -  
10 - void insertBatch(List<DynSegWord> list);  
11 -  
12 - List<DynSegWord> selectByWordType(@Param(value = "wordType") Integer wordType);  
13 -  
14 - int selectWordsCount(@Param(value = "updateSinceThisTime") Integer updateSinceThisTime);  
15 -}  
1 -<?xml version="1.0" encoding="UTF-8" ?>  
2 -<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >  
3 -<mapper namespace="com.yoho.search.dal.DynSegWordMapper">  
4 - <resultMap id="DynSegWordResultMap" type="com.yoho.search.dal.model.DynSegWord">  
5 - <id column="id" property="id" jdbcType="INTEGER" />  
6 - <result column="word_type" property="wordType" jdbcType="INTEGER" />  
7 - <result column="word" property="word" jdbcType="VARCHAR" />  
8 - <result column="last_update_time" property="lastUpdateTime"  
9 - jdbcType="INTEGER" />  
10 - </resultMap>  
11 - <sql id="DynSegWord_Column_List">  
12 - id, word_type, word, last_update_time  
13 - </sql>  
14 -  
15 - <insert id="insertBatch" parameterType="java.util.List" timeout="20000">  
16 - insert ignore into dyn_seg_word (word_type, word, last_update_time)  
17 - values  
18 - <foreach collection="list" item="item" index="index"  
19 - separator=",">  
20 - (#{item.wordType,jdbcType=INTEGER},  
21 - #{item.word,jdbcType=VARCHAR},  
22 - #{item.lastUpdateTime,jdbcType=INTEGER})  
23 - </foreach>  
24 - </insert>  
25 -  
26 - <select id="selectByWordType" resultMap="DynSegWordResultMap"  
27 - timeout="20000">  
28 - select  
29 - <include refid="DynSegWord_Column_List" />  
30 - from dyn_seg_word where word_type = #{wordType,jdbcType=INTEGER}  
31 - </select>  
32 -  
33 - <select id="selectWordsCount" resultType="java.lang.Integer"  
34 - timeout="20000">  
35 - SELECT count(1) FROM dyn_seg_word where  
36 - last_update_time >=  
37 - #{updateSinceThisTime,jdbcType=INTEGER} LIMIT 1  
38 - </select>  
39 -</mapper>  
@@ -12,15 +12,9 @@ import javax.servlet.http.HttpServletRequest; @@ -12,15 +12,9 @@ import javax.servlet.http.HttpServletRequest;
12 import java.util.*; 12 import java.util.*;
13 13
14 @Controller 14 @Controller
15 -public class DynSegWordsToolsController { 15 +public class DictToolsController {
16 16
17 - private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class);  
18 -  
19 - @Autowired  
20 - private UpgradeDynSegWordsFlagService upgradeDynSegWordsFlagService;  
21 -  
22 - @Autowired  
23 - private DynSegWordsService dynSegWordsService; 17 + private static Logger logger = LoggerFactory.getLogger(DictToolsController.class);
24 18
25 @Autowired 19 @Autowired
26 private EsWordDefLogicService esWordDefLogicService; 20 private EsWordDefLogicService esWordDefLogicService;
@@ -45,31 +39,6 @@ public class DynSegWordsToolsController { @@ -45,31 +39,6 @@ public class DynSegWordsToolsController {
45 allDictFiles.put("synonyms.dic", Integer.valueOf(7)); // 同义词规则 39 allDictFiles.put("synonyms.dic", Integer.valueOf(7)); // 同义词规则
46 } 40 }
47 41
48 - /**  
49 - * 批量插入字典文件中的词到数据库表  
50 - *  
51 - * @return 处理结果  
52 - */  
53 - @RequestMapping(value = "/dictTools/loadDictFileToDB")  
54 - @ResponseBody  
55 - public Map<String, Object> loadDictFileToDB() {  
56 - Map<String, Object> rtnMap = new HashMap<String, Object>();  
57 - if (upgradeDynSegWordsFlagService.isForceUpgrade()) {  
58 - rtnMap.put("code", "400");  
59 - rtnMap.put("msg", "Force upgrade now, please try later...");  
60 - return rtnMap;  
61 - }  
62 -  
63 - long begin = System.currentTimeMillis();  
64 - logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB start");  
65 - List<String> wordsToAdd = dynSegWordsService.addMainWordsInFile();  
66 - rtnMap.put("code", "200");  
67 - rtnMap.put("msg", "Update force upgrade seg words succeed.");  
68 - rtnMap.put("count", wordsToAdd == null ? 0 : wordsToAdd.size());  
69 - logger.info("[DynSegWords]DynSegWordsToolsController.loadDictFileToDB end. cost: {}",  
70 - System.currentTimeMillis() - begin);  
71 - return rtnMap;  
72 - }  
73 42
74 /** 43 /**
75 * 批量插入字典文件中的词到数据库表 44 * 批量插入字典文件中的词到数据库表
1 -package com.yoho.search.consumer.dynwords;  
2 -  
3 -import java.io.BufferedReader;  
4 -import java.io.IOException;  
5 -import java.io.InputStreamReader;  
6 -import java.util.ArrayList;  
7 -import java.util.Date;  
8 -import java.util.HashMap;  
9 -import java.util.List;  
10 -import java.util.Map;  
11 -  
12 -import javax.servlet.http.HttpServletRequest;  
13 -import javax.servlet.http.HttpServletResponse;  
14 -  
15 -import com.yoho.search.dal.DynSegWordMapper;  
16 -import com.yoho.search.dal.model.DynSegWord;  
17 -import org.apache.http.client.ClientProtocolException;  
18 -import org.apache.http.client.config.RequestConfig;  
19 -import org.apache.http.client.methods.CloseableHttpResponse;  
20 -import org.apache.http.client.methods.HttpGet;  
21 -import org.apache.http.impl.client.CloseableHttpClient;  
22 -import org.apache.http.impl.client.HttpClients;  
23 -import org.slf4j.Logger;  
24 -import org.slf4j.LoggerFactory;  
25 -import org.springframework.beans.factory.annotation.Autowired;  
26 -import org.springframework.stereotype.Controller;  
27 -import org.springframework.web.bind.annotation.RequestMapping;  
28 -import org.springframework.web.bind.annotation.RequestMethod;  
29 -import org.springframework.web.bind.annotation.ResponseBody;  
30 -  
31 -@Controller  
32 -public class DynSegWordsController {  
33 -  
34 - private static Logger logger = LoggerFactory.getLogger(DynSegWordsController.class);  
35 -  
36 - private static final int NO_RECORDS_CHANGED_STATUS_CODE = 304;  
37 -  
38 - private static final int WORD_TYPE_MAIN = 1;  
39 -  
40 - private static final int WORD_TYPE_STOP = 2;  
41 -  
42 - private static final int DEFAULT_STARTTIME = 0;  
43 -  
44 - @Autowired  
45 - private DynSegWordMapper dynSegWordMapper;  
46 -  
47 - @Autowired  
48 - private UpgradeDynSegWordsFlagService upgradeDynSegWordsFlagService;  
49 -  
50 - @RequestMapping(value = "/dictService/forceUpgrade")  
51 - @ResponseBody  
52 - public Map<String, Object> forceUpgradeDynWords() {  
53 - upgradeDynSegWordsFlagService.updateForceUpgradeTrue();  
54 - Map<String, Object> rtnMap = new HashMap<String, Object>();  
55 - rtnMap.put("code", "200");  
56 - rtnMap.put("msg", "Update force upgrade seg words succeed.");  
57 - return rtnMap;  
58 - }  
59 -  
60 - @RequestMapping(method = RequestMethod.HEAD, value = "/dictService/getRemoteMainDict")  
61 - public void checkMainWords(HttpServletRequest request, HttpServletResponse response) {  
62 - // ES的ik分词插件发送HEAD请求 判断主词典是否有变更  
63 - logger.info("[DynSegWords]enter DynSegWordsController.checkMainWords");  
64 - checkNeedUpdateWordsDict(request, response, WORD_TYPE_MAIN);  
65 - logger.info("[DynSegWords]end DynSegWordsController.checkMainWords.");  
66 - }  
67 -  
68 - @RequestMapping(method = RequestMethod.HEAD, value = "/dictService/getRemoteStopDict")  
69 - public void checkStopWords(HttpServletRequest request, HttpServletResponse response) {  
70 - // ES的ik分词插件发送HEAD请求 判断停用词是否有变更  
71 - logger.info("[DynSegWords]enter DynSegWordsController.checkStopWords");  
72 - checkNeedUpdateWordsDict(request, response, WORD_TYPE_STOP);  
73 - logger.info("[DynSegWords]end DynSegWordsController.checkStopWords.");  
74 - }  
75 -  
76 - private void checkNeedUpdateWordsDict(HttpServletRequest request, HttpServletResponse response, Integer wordType) {  
77 - String esIp = getIpAddress(request);  
78 - logger.info("[DynSegWords]Begin to check need update seg words. wordType: {}, esIP: {}", wordType, esIp);  
79 - if (upgradeDynSegWordsFlagService.canUpgrade()  
80 - && dynSegWordMapper.selectWordsCount(getLastModifyTime(request)) > 0) {  
81 - // 只有在开始全量建索引的时候才开始增加动态词库  
82 - String currentTime = String.valueOf((new Date()).getTime() / 1000L);  
83 - response.setHeader("Last-Modified", currentTime);  
84 - logger.info("[DynSegWords]Need to update seg words. wordType: {}", wordType);  
85 - } else {  
86 - response.setStatus(NO_RECORDS_CHANGED_STATUS_CODE);  
87 - logger.info("[DynSegWords]No need to update seg words. wordType: {}", wordType);  
88 - }  
89 - }  
90 -  
91 - private Integer getLastModifyTime(HttpServletRequest request) {  
92 - String lastModifyTime = request.getHeader("If-Modified-Since");  
93 - logger.info("[DynSegWords]Get last modify time {} from request header.", lastModifyTime);  
94 - if (lastModifyTime != null && lastModifyTime.trim().length() > 0) {  
95 - try {  
96 - return Integer.valueOf(lastModifyTime.trim());  
97 - } catch (Exception e) {  
98 - // Ignore the exception  
99 - }  
100 - }  
101 -  
102 - return DEFAULT_STARTTIME;  
103 - }  
104 -  
105 - @RequestMapping(method = RequestMethod.GET, value = "/dictService/getRemoteMainDict")  
106 - public void getSegMainWords(HttpServletRequest request, HttpServletResponse response) {  
107 - logger.info("[DynSegWords]enter DynSegWordsController.getSegMainWords");  
108 - addDynWordsByType(request, response, WORD_TYPE_MAIN);  
109 - logger.info("[DynSegWords]end DynSegWordsController.getSegMainWords.");  
110 - }  
111 -  
112 - @RequestMapping(method = RequestMethod.GET, value = "/dictService/getRemoteStopDict")  
113 - public void getSegStopWords(HttpServletRequest request, HttpServletResponse response) {  
114 - logger.info("[DynSegWords]enter DynSegWordsController.getSegStopWords");  
115 - addDynWordsByType(request, response, WORD_TYPE_STOP);  
116 - logger.info("[DynSegWords]end DynSegWordsController.getSegStopWords.");  
117 - }  
118 -  
119 - private void addDynWordsByType(HttpServletRequest request, HttpServletResponse response, Integer wordType) {  
120 - String esIp = getIpAddress(request);  
121 - logger.info("[DynSegWords]Begin to add dyn seg words. wordType: {}, esIP: {}", wordType, esIp);  
122 - List<DynSegWord> words = dynSegWordMapper.selectByWordType(wordType);  
123 - int count = words != null ? words.size() : 0;  
124 - logger.info("[DynSegWords]Find {} words from DB for the word type {}.", count, wordType);  
125 - response.setContentType("text/html;charset=UTF-8");  
126 - if (count > 0) {  
127 - try {  
128 - for (DynSegWord word : words) {  
129 - response.getWriter().println(word.getWord());  
130 - }  
131 - } catch (IOException e) {  
132 - logger.warn("[DynSegWords]Add dyn words occur exception.", e);  
133 - }  
134 -  
135 - logger.info("[DynSegWords]Send {} words to es {}.", count, esIp);  
136 - }  
137 - }  
138 -  
139 - private String getIpAddress(HttpServletRequest request) {  
140 - String ip = request.getHeader("x-forwarded-for");  
141 - if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {  
142 - ip = request.getHeader("Proxy-Client-IP");  
143 - }  
144 - if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {  
145 - ip = request.getHeader("WL-Proxy-Client-IP");  
146 - }  
147 - if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {  
148 - ip = request.getHeader("HTTP_CLIENT_IP");  
149 - }  
150 - if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {  
151 - ip = request.getHeader("HTTP_X_FORWARDED_FOR");  
152 - }  
153 - if (ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {  
154 - ip = request.getRemoteAddr();  
155 - }  
156 - return ip;  
157 - }  
158 -  
159 - public static void main(String[] args) {  
160 - List<String> buffer = new ArrayList<String>();  
161 -  
162 - RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10000).setConnectTimeout(10000)  
163 - .setSocketTimeout(60000).build();  
164 - CloseableHttpClient httpclient = HttpClients.createDefault();  
165 -  
166 - String location = "http://localhost:8080/yoho-search-consumer-web/dictService/getRemoteMainDict";  
167 - HttpGet get = new HttpGet(location);  
168 - get.setConfig(rc);  
169 - try {  
170 - CloseableHttpResponse response = httpclient.execute(get);  
171 - if (response.getStatusLine().getStatusCode() == 200) {  
172 - String charset = "UTF-8";  
173 - if (response.getEntity().getContentType().getValue().contains("charset=")) {  
174 - String contentType = response.getEntity().getContentType().getValue();  
175 - charset = contentType.substring(contentType.lastIndexOf("=") + 1);  
176 - }  
177 - BufferedReader in = new BufferedReader(  
178 - new InputStreamReader(response.getEntity().getContent(), charset));  
179 - String line;  
180 - while ((line = in.readLine()) != null) {  
181 - buffer.add(line);  
182 - }  
183 - in.close();  
184 - response.close();  
185 - }  
186 - response.close();  
187 - } catch (ClientProtocolException e) {  
188 - logger.error("getRemoteWords {} error", e, new Object[] { location });  
189 - } catch (IllegalStateException e) {  
190 - logger.error("getRemoteWords {} error", e, new Object[] { location });  
191 - } catch (IOException e) {  
192 - logger.error("getRemoteWords {} error", e, new Object[] { location });  
193 - }  
194 - System.out.println(buffer);  
195 - }  
196 -}  
1 -package com.yoho.search.consumer.dynwords;  
2 -  
3 -import com.yoho.search.base.utils.FileUtils;  
4 -import com.yoho.search.dal.DynSegWordMapper;  
5 -import com.yoho.search.dal.model.DynSegWord;  
6 -import org.apache.commons.collections.CollectionUtils;  
7 -import org.slf4j.Logger;  
8 -import org.slf4j.LoggerFactory;  
9 -import org.springframework.beans.factory.annotation.Autowired;  
10 -import org.springframework.stereotype.Service;  
11 -  
12 -import java.util.ArrayList;  
13 -import java.util.List;  
14 -  
15 -@Service  
16 -public class DynSegWordsService {  
17 -  
18 - private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class);  
19 -  
20 - private static final int WORD_TYPE_MAIN = 1;  
21 -  
22 - private static final int BATCH_NUMBER = 5000;  
23 -  
24 - @Autowired  
25 - private DynSegWordMapper dynSegWordMapper;  
26 -  
27 - public List<String> addMainWordsInFile() {  
28 - logger.info("[DynSegWords]DynSegWordsService.addMainWordsInFile start");  
29 - String fileName = this.getClass().getResource("/").getPath();  
30 - List<String> words = FileUtils.readFile(fileName + "/dicts/words-main.dic");  
31 - int size = words != null ? words.size() : 0;  
32 - logger.info("[DynSegWords]Size of words in the file is {}.", size);  
33 - if (size > 0) {  
34 - // 1. 先从数据库里查询出所有的  
35 - List<String> wordsInDB = getSegWordsFromDBByType(WORD_TYPE_MAIN);  
36 - logger.info("[DynSegWords]Size of words in the DB is {}.", wordsInDB.size());  
37 - if (logger.isDebugEnabled()) {  
38 - logger.debug("[DynSegWords]The words list in the file: " + System.getProperty("line.separator") + words);  
39 - logger.debug("[DynSegWords]The words list in the DB: " + System.getProperty("line.separator")  
40 - + wordsInDB);  
41 - }  
42 -  
43 - // 2. 获取所有增量的词  
44 - words.removeAll(wordsInDB);  
45 - logger.info("[DynSegWords]Size of words need to add is {}.", words.size());  
46 - if (logger.isDebugEnabled()) {  
47 - logger.debug("[DynSegWords]The words need to add: " + System.getProperty("line.separator") + words);  
48 - }  
49 -  
50 - // 3. 分批次插入  
51 - int startIndex = 0;  
52 - List<String> batch;  
53 - while (startIndex < words.size()) {  
54 - if (startIndex + BATCH_NUMBER <= words.size()) {  
55 - logger.info("[DynSegWords]Begin to get batch words. begin: {}, end: {}", startIndex, startIndex  
56 - + BATCH_NUMBER);  
57 - batch = words.subList(startIndex, startIndex + BATCH_NUMBER);  
58 - batchAddWords(batch);  
59 - startIndex = startIndex + BATCH_NUMBER;  
60 - } else {  
61 - logger.info("[DynSegWords]Begin to get batch words. begin: {}, end: {}", startIndex, words.size());  
62 - batch = words.subList(startIndex, words.size());  
63 - batchAddWords(batch);  
64 - break;  
65 - }  
66 - }  
67 - }  
68 -  
69 - logger.info("[DynSegWords]DynSegWordsService.addMainWordsInFile finished.");  
70 - return words;  
71 - }  
72 -  
73 - private void batchAddWords(List<String> batch) {  
74 - logger.info("[DynSegWords]Begin to batch insert dyn words to DB.");  
75 - if (logger.isDebugEnabled()) {  
76 - logger.debug("[DynSegWords]The words batch: " + System.getProperty("line.separator") + batch);  
77 - }  
78 - List<DynSegWord> wordList = new ArrayList<DynSegWord>();  
79 - for (String word : batch) {  
80 - wordList.add(new DynSegWord(word, WORD_TYPE_MAIN));  
81 - }  
82 -  
83 - dynSegWordMapper.insertBatch(wordList);  
84 - logger.info("[DynSegWords]End to batch insert dyn words to DB.");  
85 - }  
86 -  
87 - private List<String> getSegWordsFromDBByType(int wordType) {  
88 - List<String> result = new ArrayList<String>();  
89 - List<DynSegWord> wordList = dynSegWordMapper.selectByWordType(wordType);  
90 - if (CollectionUtils.isNotEmpty(wordList)) {  
91 - for (DynSegWord word : wordList) {  
92 - result.add(word.getWord());  
93 - }  
94 - }  
95 -  
96 - return result;  
97 - }  
98 -  
99 -}  
@@ -14,7 +14,7 @@ public class EsWordDefLogicService { @@ -14,7 +14,7 @@ public class EsWordDefLogicService {
14 14
15 private static final String LINE_SEPARATOR = System.getProperty("line.separator"); 15 private static final String LINE_SEPARATOR = System.getProperty("line.separator");
16 16
17 - private static Logger logger = LoggerFactory.getLogger(DynSegWordsToolsController.class); 17 + private static Logger logger = LoggerFactory.getLogger(DictToolsController.class);
18 18
19 private static final int BATCH_NUMBER = 5000; 19 private static final int BATCH_NUMBER = 5000;
20 20
1 -package com.yoho.search.consumer.dynwords;  
2 -  
3 -import com.yoho.search.base.utils.Configuration;  
4 -import com.yoho.search.base.utils.DateUtil;  
5 -import org.slf4j.Logger;  
6 -import org.slf4j.LoggerFactory;  
7 -import org.springframework.stereotype.Service;  
8 -  
9 -import java.util.Date;  
10 -  
11 -  
12 -@Service  
13 -public class UpgradeDynSegWordsFlagService {  
14 -  
15 - private static final Logger logger = LoggerFactory.getLogger(UpgradeDynSegWordsFlagService.class);  
16 -  
17 - private static final int WAIT_IK_UPGRADE_WORDS = 120000;  
18 -  
19 - private static volatile boolean forceUpgrade = false;  
20 -  
21 - public void updateForceUpgradeTrue() {  
22 - forceUpgrade = true;  
23 - logger.info("[DynSegWords]Update force upgrade flag to true.");  
24 - try {  
25 - Thread.sleep(WAIT_IK_UPGRADE_WORDS);  
26 - } catch (InterruptedException e) {  
27 - Thread.currentThread().interrupt();  
28 - } finally {  
29 - forceUpgrade = false;  
30 - logger.info("[DynSegWords]Update force upgrade flag to false.");  
31 - }  
32 - }  
33 -  
34 - public boolean isForceUpgrade() {  
35 - return forceUpgrade;  
36 - }  
37 -  
38 - public boolean canUpgrade() {  
39 - if (forceUpgrade) {  
40 - logger.info("[DynSegWords]Return true for force upgrade.");  
41 - return true;  
42 - }  
43 -  
44 - // 判断当前时间是否在允许更新词库的时间范围内  
45 - Date currentDate = new Date();  
46 - String lowerTime = Configuration.getString("upgrade.segwords.lowertime", "02:50:00");  
47 - String upperTime = Configuration.getString("upgrade.segwords.uppertime", "02:58:00");  
48 - logger.info("[DynSegWords]Current Time: {}, Lower Time: {}, Upper Time: {}", currentDate, lowerTime, upperTime);  
49 -  
50 - String currentDateString = DateUtil.DateToString(currentDate, "yyyy-MM-dd");  
51 - Date lowerDate = DateUtil.StringToDate(currentDateString + " " + lowerTime, "yyyy-MM-dd HH:mm:ss");  
52 - Date upperDate = DateUtil.StringToDate(currentDateString + " " + upperTime, "yyyy-MM-dd HH:mm:ss");  
53 - boolean result = currentDate.after(lowerDate) && currentDate.before(upperDate);  
54 - logger.info("[DynSegWords]Return {} for time range limit.", result);  
55 - return result;  
56 - }  
57 -  
58 -}