Showing
7 changed files
with
208 additions
and
24 deletions
@@ -11,6 +11,8 @@ public interface SuggestConversionMapper { | @@ -11,6 +11,8 @@ public interface SuggestConversionMapper { | ||
11 | 11 | ||
12 | List<SuggestConversion> selectPageList(@Param(value = "offset") Integer offset, @Param(value = "pageSize") Integer pageSize); | 12 | List<SuggestConversion> selectPageList(@Param(value = "offset") Integer offset, @Param(value = "pageSize") Integer pageSize); |
13 | 13 | ||
14 | + SuggestConversion selectBySource(@Param(value = "source") String source); | ||
15 | + | ||
14 | List<SuggestConversion> selectAll(); | 16 | List<SuggestConversion> selectAll(); |
15 | 17 | ||
16 | void insertBatch(List<SuggestConversion> list); | 18 | void insertBatch(List<SuggestConversion> list); |
@@ -22,6 +22,12 @@ | @@ -22,6 +22,12 @@ | ||
22 | from suggest_conversion limit #{offset},#{pageSize} | 22 | from suggest_conversion limit #{offset},#{pageSize} |
23 | </select> | 23 | </select> |
24 | 24 | ||
25 | + <select id="selectBySource" resultMap="BaseResultMap" timeout="20000"> | ||
26 | + select | ||
27 | + <include refid="Base_Column_List" /> | ||
28 | + from suggest_conversion where source = #{source,jdbcType=VARCHAR} | ||
29 | + </select> | ||
30 | + | ||
25 | <select id="selectAll" resultMap="BaseResultMap" timeout="20000"> | 31 | <select id="selectAll" resultMap="BaseResultMap" timeout="20000"> |
26 | select * from suggest_conversion | 32 | select * from suggest_conversion |
27 | </select> | 33 | </select> |
@@ -27,6 +27,10 @@ public class SuggestConversionService { | @@ -27,6 +27,10 @@ public class SuggestConversionService { | ||
27 | return suggestConversionMapper.selectPageList(offset, pageSize); | 27 | return suggestConversionMapper.selectPageList(offset, pageSize); |
28 | } | 28 | } |
29 | 29 | ||
30 | + public SuggestConversion selectBySource(String source){ | ||
31 | + return suggestConversionMapper.selectBySource(source); | ||
32 | + } | ||
33 | + | ||
30 | public Set<String> getAllSources() { | 34 | public Set<String> getAllSources() { |
31 | List<SuggestConversion> list = suggestConversionMapper.selectAll(); | 35 | List<SuggestConversion> list = suggestConversionMapper.selectAll(); |
32 | return list == null ? new HashSet<>() : list.stream().map(SuggestConversion::getSource) | 36 | return list == null ? new HashSet<>() : list.stream().map(SuggestConversion::getSource) |
1 | package com.yoho.search.spider.controller; | 1 | package com.yoho.search.spider.controller; |
2 | 2 | ||
3 | import com.yoho.search.spider.conversation.SuggestConvertorService; | 3 | import com.yoho.search.spider.conversation.SuggestConvertorService; |
4 | -import com.yoho.search.spider.job.SpiderJob; | ||
5 | import com.yoho.search.spider.full.BaikeURLSpiderService; | 4 | import com.yoho.search.spider.full.BaikeURLSpiderService; |
6 | import com.yoho.search.spider.increment.BlackKeywordsMgr; | 5 | import com.yoho.search.spider.increment.BlackKeywordsMgr; |
6 | +import com.yoho.search.spider.increment.IncrementCrawlerService; | ||
7 | +import com.yoho.search.spider.job.SpiderJob; | ||
7 | import org.springframework.beans.factory.annotation.Autowired; | 8 | import org.springframework.beans.factory.annotation.Autowired; |
8 | import org.springframework.stereotype.Controller; | 9 | import org.springframework.stereotype.Controller; |
9 | import org.springframework.web.bind.annotation.RequestMapping; | 10 | import org.springframework.web.bind.annotation.RequestMapping; |
@@ -31,6 +32,9 @@ public class BaikeSpiderController { | @@ -31,6 +32,9 @@ public class BaikeSpiderController { | ||
31 | @Autowired | 32 | @Autowired |
32 | private BlackKeywordsMgr blackKeywordsMgr; | 33 | private BlackKeywordsMgr blackKeywordsMgr; |
33 | 34 | ||
35 | + @Autowired | ||
36 | + private IncrementCrawlerService incrementCrawlerService; | ||
37 | + | ||
34 | @RequestMapping(value = "/spider/baike") | 38 | @RequestMapping(value = "/spider/baike") |
35 | @ResponseBody | 39 | @ResponseBody |
36 | public Map<String, Object> baikeSpider() { | 40 | public Map<String, Object> baikeSpider() { |
@@ -41,13 +45,13 @@ public class BaikeSpiderController { | @@ -41,13 +45,13 @@ public class BaikeSpiderController { | ||
41 | return result; | 45 | return result; |
42 | } catch (Exception e) { | 46 | } catch (Exception e) { |
43 | Map<String, Object> result = new HashMap(); | 47 | Map<String, Object> result = new HashMap(); |
44 | - result.put("code", 503); | 48 | + result.put("code", 500); |
45 | result.put("message", e.getMessage()); | 49 | result.put("message", e.getMessage()); |
46 | return result; | 50 | return result; |
47 | } | 51 | } |
48 | } | 52 | } |
49 | 53 | ||
50 | - @RequestMapping(value = "/spider/crawerEmptyKeywords") | 54 | + @RequestMapping(value = "/spider/crawlerEmptyKeywords") |
51 | @ResponseBody | 55 | @ResponseBody |
52 | public Map<String, Object> crawerEmptyKeywords() { | 56 | public Map<String, Object> crawerEmptyKeywords() { |
53 | Map<String, Object> result = new HashMap(); | 57 | Map<String, Object> result = new HashMap(); |
@@ -58,7 +62,7 @@ public class BaikeSpiderController { | @@ -58,7 +62,7 @@ public class BaikeSpiderController { | ||
58 | result.put("message", "success"); | 62 | result.put("message", "success"); |
59 | return result; | 63 | return result; |
60 | } catch (Exception e) { | 64 | } catch (Exception e) { |
61 | - result.put("code", 503); | 65 | + result.put("code", 500); |
62 | result.put("message", e.getMessage()); | 66 | result.put("message", e.getMessage()); |
63 | return result; | 67 | return result; |
64 | } | 68 | } |
@@ -75,7 +79,7 @@ public class BaikeSpiderController { | @@ -75,7 +79,7 @@ public class BaikeSpiderController { | ||
75 | result.put("message", "success"); | 79 | result.put("message", "success"); |
76 | return result; | 80 | return result; |
77 | } catch (Exception e) { | 81 | } catch (Exception e) { |
78 | - result.put("code", 503); | 82 | + result.put("code", 500); |
79 | result.put("message", e.getMessage()); | 83 | result.put("message", e.getMessage()); |
80 | return result; | 84 | return result; |
81 | } | 85 | } |
@@ -93,7 +97,7 @@ public class BaikeSpiderController { | @@ -93,7 +97,7 @@ public class BaikeSpiderController { | ||
93 | result.put("message", "success"); | 97 | result.put("message", "success"); |
94 | return result; | 98 | return result; |
95 | } catch (Exception e) { | 99 | } catch (Exception e) { |
96 | - result.put("code", 503); | 100 | + result.put("code", 500); |
97 | result.put("message", e.getMessage()); | 101 | result.put("message", e.getMessage()); |
98 | return result; | 102 | return result; |
99 | } | 103 | } |
@@ -110,7 +114,78 @@ public class BaikeSpiderController { | @@ -110,7 +114,78 @@ public class BaikeSpiderController { | ||
110 | result.put("message", "success"); | 114 | result.put("message", "success"); |
111 | return result; | 115 | return result; |
112 | } catch (Exception e) { | 116 | } catch (Exception e) { |
113 | - result.put("code", 503); | 117 | + result.put("code", 500); |
118 | + result.put("message", e.getMessage()); | ||
119 | + return result; | ||
120 | + } | ||
121 | + } | ||
122 | + | ||
123 | + @RequestMapping(value = "/spider/single/crawle") | ||
124 | + @ResponseBody | ||
125 | + public Map<String, Object> singleCrawle(@RequestParam String keyword) { | ||
126 | + Map<String, Object> result = new HashMap(); | ||
127 | + | ||
128 | + try { | ||
129 | + result.put("result", incrementCrawlerService.crawleKeyword(keyword)); | ||
130 | + result.put("code", 200); | ||
131 | + result.put("message", "success"); | ||
132 | + return result; | ||
133 | + } catch (Exception e) { | ||
134 | + result.put("code", 500); | ||
135 | + result.put("message", e.getMessage()); | ||
136 | + return result; | ||
137 | + } | ||
138 | + } | ||
139 | + | ||
140 | + @RequestMapping(value = "/spider/single/convertor") | ||
141 | + @ResponseBody | ||
142 | + public Map<String, Object> singleConvertor(@RequestParam String keyword) { | ||
143 | + Map<String, Object> result = new HashMap(); | ||
144 | + | ||
145 | + try { | ||
146 | + result.put("result", suggestConvertorService.convertSingleKeyword(keyword)); | ||
147 | + result.put("code", 200); | ||
148 | + result.put("message", "success"); | ||
149 | + return result; | ||
150 | + } catch (Exception e) { | ||
151 | + result.put("code", 500); | ||
152 | + result.put("message", e.getMessage()); | ||
153 | + return result; | ||
154 | + } | ||
155 | + } | ||
156 | + | ||
157 | + @RequestMapping(value = "/spider/single/updateES") | ||
158 | + @ResponseBody | ||
159 | + public Map<String, Object> singleUpdateES(@RequestParam String keyword) { | ||
160 | + Map<String, Object> result = new HashMap(); | ||
161 | + | ||
162 | + try { | ||
163 | + result.put("result", suggestConvertorService.addOrUpdateIndex(keyword)); | ||
164 | + result.put("code", 200); | ||
165 | + result.put("message", "success"); | ||
166 | + return result; | ||
167 | + } catch (Exception e) { | ||
168 | + result.put("code", 500); | ||
169 | + result.put("message", e.getMessage()); | ||
170 | + return result; | ||
171 | + } | ||
172 | + } | ||
173 | + | ||
174 | + @RequestMapping(value = "/spider/single/increment") | ||
175 | + @ResponseBody | ||
176 | + public Map<String, Object> singleIncrement(@RequestParam String keyword) { | ||
177 | + Map<String, Object> result = new HashMap(); | ||
178 | + | ||
179 | + try { | ||
180 | + boolean incmentResult = incrementCrawlerService.crawleKeyword(keyword) | ||
181 | + && suggestConvertorService.convertSingleKeyword(keyword) | ||
182 | + && suggestConvertorService.addOrUpdateIndex(keyword); | ||
183 | + result.put("result", incmentResult); | ||
184 | + result.put("code", 200); | ||
185 | + result.put("message", "success"); | ||
186 | + return result; | ||
187 | + } catch (Exception e) { | ||
188 | + result.put("code", 500); | ||
114 | result.put("message", e.getMessage()); | 189 | result.put("message", e.getMessage()); |
115 | return result; | 190 | return result; |
116 | } | 191 | } |
1 | package com.yoho.search.spider.conversation; | 1 | package com.yoho.search.spider.conversation; |
2 | 2 | ||
3 | +import com.yoho.search.base.utils.DateUtil; | ||
4 | +import com.yoho.search.base.utils.ISearchConstants; | ||
3 | import com.yoho.search.consumer.index.common.AnalyzerHelper; | 5 | import com.yoho.search.consumer.index.common.AnalyzerHelper; |
6 | +import com.yoho.search.consumer.index.common.IYohoIndexService; | ||
4 | import com.yoho.search.consumer.service.base.*; | 7 | import com.yoho.search.consumer.service.base.*; |
5 | import com.yoho.search.consumer.suggests.common.SuggestionConstants; | 8 | import com.yoho.search.consumer.suggests.common.SuggestionConstants; |
6 | import com.yoho.search.dal.model.*; | 9 | import com.yoho.search.dal.model.*; |
7 | import org.apache.commons.collections.CollectionUtils; | 10 | import org.apache.commons.collections.CollectionUtils; |
8 | -import org.apache.commons.collections.map.HashedMap; | ||
9 | import org.apache.commons.lang.StringUtils; | 11 | import org.apache.commons.lang.StringUtils; |
10 | import org.slf4j.Logger; | 12 | import org.slf4j.Logger; |
11 | import org.slf4j.LoggerFactory; | 13 | import org.slf4j.LoggerFactory; |
12 | import org.springframework.beans.factory.annotation.Autowired; | 14 | import org.springframework.beans.factory.annotation.Autowired; |
13 | import org.springframework.stereotype.Component; | 15 | import org.springframework.stereotype.Component; |
16 | +import org.springframework.util.Assert; | ||
14 | 17 | ||
15 | import java.util.*; | 18 | import java.util.*; |
16 | import java.util.stream.Collectors; | 19 | import java.util.stream.Collectors; |
@@ -43,6 +46,12 @@ public class SuggestConvertorService { | @@ -43,6 +46,12 @@ public class SuggestConvertorService { | ||
43 | @Autowired | 46 | @Autowired |
44 | private SpiderContentService spiderContentService; | 47 | private SpiderContentService spiderContentService; |
45 | 48 | ||
49 | + @Autowired | ||
50 | + private SuggestConversionService suggestConversionService; | ||
51 | + | ||
52 | + @Autowired | ||
53 | + private IYohoIndexService indexService; | ||
54 | + | ||
46 | public YohoKeywordsBO buildYohoKeywordBO() { | 55 | public YohoKeywordsBO buildYohoKeywordBO() { |
47 | Set<String> keywordSet = new HashSet<>(3000); | 56 | Set<String> keywordSet = new HashSet<>(3000); |
48 | Set<String> filterSortNameSet = new HashSet<>(300); | 57 | Set<String> filterSortNameSet = new HashSet<>(300); |
@@ -188,8 +197,48 @@ public class SuggestConvertorService { | @@ -188,8 +197,48 @@ public class SuggestConvertorService { | ||
188 | return score / keywordTokens.size(); | 197 | return score / keywordTokens.size(); |
189 | } | 198 | } |
190 | 199 | ||
200 | + public boolean convertSingleKeyword(String keyword) { | ||
201 | + Assert.notNull(keyword); | ||
202 | + SpiderContent spiderContent = spiderContentService.selectSpiderContentBySubject(keyword); | ||
203 | + if (spiderContent == null) { | ||
204 | + return false; | ||
205 | + } | ||
206 | + | ||
207 | + YohoKeywordsBO yohoKeywordsBO = buildYohoKeywordBO(); | ||
208 | + String dest = convert(spiderContent, yohoKeywordsBO, new HashMap()); | ||
209 | + if (StringUtils.isNotEmpty(dest)) { | ||
210 | + SuggestConversion suggestConversion = new SuggestConversion(); | ||
211 | + suggestConversion.setSource(spiderContent.getSubject()); | ||
212 | + suggestConversion.setDest(dest); | ||
213 | + suggestConversion.setCreateTime(DateUtil.getCurrentTimeSecond()); | ||
214 | + suggestConversionService.insertBatch(Arrays.asList(suggestConversion)); | ||
215 | + return true; | ||
216 | + } | ||
217 | + | ||
218 | + return false; | ||
219 | + } | ||
220 | + | ||
221 | + public boolean addOrUpdateIndex(String keyword) { | ||
222 | + Assert.notNull(keyword); | ||
223 | + SuggestConversion suggestConversion = suggestConversionService.selectBySource(keyword); | ||
224 | + if (suggestConversion == null) { | ||
225 | + return false; | ||
226 | + } | ||
227 | + | ||
228 | + try { | ||
229 | + Map<String, String> dataMap = new HashMap<>(); | ||
230 | + dataMap.put("source", suggestConversion.getSource()); | ||
231 | + dataMap.put("dest", suggestConversion.getDest()); | ||
232 | + indexService.updateIndexData(ISearchConstants.INDEX_NAME_CONVERSION, suggestConversion.getId().toString(), dataMap); | ||
233 | + return true; | ||
234 | + } catch (Exception e) { | ||
235 | + logger.error(e.getMessage(), e); | ||
236 | + return false; | ||
237 | + } | ||
238 | + } | ||
239 | + | ||
191 | public Map<String, Object> explainConversion(String keyword) { | 240 | public Map<String, Object> explainConversion(String keyword) { |
192 | - Map<String, Object> resultMap = new HashedMap(); | 241 | + Map<String, Object> resultMap = new HashMap(); |
193 | SpiderContent spiderContent = spiderContentService.selectSpiderContentBySubject(keyword); | 242 | SpiderContent spiderContent = spiderContentService.selectSpiderContentBySubject(keyword); |
194 | if (spiderContent == null) { | 243 | if (spiderContent == null) { |
195 | throw new RuntimeException("The spider content doesn't exist"); | 244 | throw new RuntimeException("The spider content doesn't exist"); |
@@ -47,10 +47,10 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { | @@ -47,10 +47,10 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { | ||
47 | private AnalyzerHelper analyzerHelper; | 47 | private AnalyzerHelper analyzerHelper; |
48 | 48 | ||
49 | @Autowired | 49 | @Autowired |
50 | - private SpiderBasedHttpRequest spiderBasedHttpRequest; | 50 | + private BlackKeywordsMgr blackKeywordsMgr; |
51 | 51 | ||
52 | @Autowired | 52 | @Autowired |
53 | - private BlackKeywordsMgr blackKeywordsMgr; | 53 | + private IncrementCrawlerService incrementCrawlerService; |
54 | 54 | ||
55 | private List<String> validKeywordList = null; | 55 | private List<String> validKeywordList = null; |
56 | 56 | ||
@@ -133,7 +133,7 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { | @@ -133,7 +133,7 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { | ||
133 | List<BaikeBO> baikeBOList = new ArrayList<>(); | 133 | List<BaikeBO> baikeBOList = new ArrayList<>(); |
134 | BaikeBO tempBaikeBO; | 134 | BaikeBO tempBaikeBO; |
135 | for (String keyword : subListKeywords) { | 135 | for (String keyword : subListKeywords) { |
136 | - if ((tempBaikeBO = crawle(keyword)) != null) { | 136 | + if ((tempBaikeBO = incrementCrawlerService.doCrawle(keyword)) != null) { |
137 | tempBaikeBO.setTitle(keyword); | 137 | tempBaikeBO.setTitle(keyword); |
138 | baikeBOList.add(tempBaikeBO); | 138 | baikeBOList.add(tempBaikeBO); |
139 | } else { | 139 | } else { |
@@ -151,18 +151,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { | @@ -151,18 +151,6 @@ public class IncrementCrawlerFlow implements RetryBusinessFlow { | ||
151 | return true; | 151 | return true; |
152 | } | 152 | } |
153 | 153 | ||
154 | - private BaikeBO crawle(String keyword) { | ||
155 | - try { | ||
156 | - String url = "http://baike.baidu.com/item/" + URLEncoder.encode(keyword, "UTF-8"); | ||
157 | - logger.info("[func=IncrementCrawlerFlow][keyword={}][url={}]", keyword, url); | ||
158 | - return spiderBasedHttpRequest.get(url); | ||
159 | - } catch (Exception e) { | ||
160 | - logger.error("crawle keyword [" + keyword + "] failed!", e); | ||
161 | - } | ||
162 | - | ||
163 | - return null; | ||
164 | - } | ||
165 | - | ||
166 | @Override | 154 | @Override |
167 | public void finish(boolean doBusinessResult, Exception exception) { | 155 | public void finish(boolean doBusinessResult, Exception exception) { |
168 | this.validKeywordList = null; | 156 | this.validKeywordList = null; |
1 | +package com.yoho.search.spider.increment; | ||
2 | + | ||
3 | +import com.yoho.search.consumer.service.base.SpiderContentService; | ||
4 | +import com.yoho.search.dal.model.SpiderContent; | ||
5 | +import com.yoho.search.spider.common.BaikeBO; | ||
6 | +import com.yoho.search.spider.common.SpiderBasedHttpRequest; | ||
7 | +import org.apache.commons.lang.StringUtils; | ||
8 | +import org.slf4j.Logger; | ||
9 | +import org.slf4j.LoggerFactory; | ||
10 | +import org.springframework.beans.factory.annotation.Autowired; | ||
11 | +import org.springframework.stereotype.Component; | ||
12 | +import org.springframework.util.Assert; | ||
13 | + | ||
14 | +import java.net.URLEncoder; | ||
15 | +import java.util.Arrays; | ||
16 | + | ||
17 | +/** | ||
18 | + * Created by ginozhang on 2017/3/3. | ||
19 | + */ | ||
20 | +@Component | ||
21 | +public class IncrementCrawlerService { | ||
22 | + | ||
23 | + private static final Logger logger = LoggerFactory.getLogger("SEARCH_SPIDER"); | ||
24 | + | ||
25 | + @Autowired | ||
26 | + private SpiderContentService spiderContentService; | ||
27 | + | ||
28 | + @Autowired | ||
29 | + private SpiderBasedHttpRequest spiderBasedHttpRequest; | ||
30 | + | ||
31 | + public boolean crawleKeyword(String keyword) { | ||
32 | + Assert.isTrue(StringUtils.isNotEmpty(keyword)); | ||
33 | + SpiderContent spiderContent = spiderContentService.selectSpiderContentBySubject(keyword); | ||
34 | + if (spiderContent != null) { | ||
35 | + return true; | ||
36 | + } | ||
37 | + | ||
38 | + BaikeBO baikeBO = doCrawle(keyword); | ||
39 | + if (baikeBO != null) { | ||
40 | + spiderContentService.insertBatch(Arrays.asList(baikeBO.toSpiderContent())); | ||
41 | + return true; | ||
42 | + } | ||
43 | + | ||
44 | + return false; | ||
45 | + } | ||
46 | + | ||
47 | + | ||
48 | + public BaikeBO doCrawle(String keyword) { | ||
49 | + try { | ||
50 | + String url = "http://baike.baidu.com/item/" + URLEncoder.encode(keyword, "UTF-8"); | ||
51 | + logger.info("[func=IncrementCrawlerFlow][keyword={}][url={}]", keyword, url); | ||
52 | + return spiderBasedHttpRequest.get(url); | ||
53 | + } catch (Exception e) { | ||
54 | + logger.error("crawle keyword [" + keyword + "] failed!", e); | ||
55 | + } | ||
56 | + | ||
57 | + return null; | ||
58 | + } | ||
59 | + | ||
60 | +} |
-
Please register or login to post a comment