|
@@ -11,6 +11,13 @@ const helper = global.yoho.helpers; |
|
@@ -11,6 +11,13 @@ const helper = global.yoho.helpers; |
11
|
const config = require('../config/config');
|
11
|
const config = require('../config/config');
|
12
|
const qs = require('querystring');
|
12
|
const qs = require('querystring');
|
13
|
const fs = require('fs');
|
13
|
const fs = require('fs');
|
|
|
14
|
+const util = require('../libs/util');
|
|
|
15
|
+
|
|
|
16
|
+const ret = {
|
|
|
17
|
+ code: 200,
|
|
|
18
|
+ message: '',
|
|
|
19
|
+ data: {}
|
|
|
20
|
+};
|
14
|
|
21
|
|
15
|
/**
|
22
|
/**
|
16
|
* redis multi command
|
23
|
* redis multi command
|
|
@@ -217,6 +224,19 @@ const writeFile = (file, str) => { |
|
@@ -217,6 +224,19 @@ const writeFile = (file, str) => { |
217
|
* @type {{getKeyWordsUrl}}
|
224
|
* @type {{getKeyWordsUrl}}
|
218
|
*/
|
225
|
*/
|
219
|
|
226
|
|
|
|
227
|
+const getRedisKeywords2 = (start, end) => {
|
|
|
228
|
+ return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
|
|
|
229
|
+ return _.map(res, (elem) => {
|
|
|
230
|
+ return elem.replace('keywords_mana:', '');
|
|
|
231
|
+ });
|
|
|
232
|
+ });
|
|
|
233
|
+};
|
|
|
234
|
+
|
|
|
235
|
+/**
|
|
|
236
|
+ * 查询 redis中 关键词
|
|
|
237
|
+ * @type {{getKeyWordsUrl}}
|
|
|
238
|
+ */
|
|
|
239
|
+
|
220
|
const getRedisKeywords = (start, end) => {
|
240
|
const getRedisKeywords = (start, end) => {
|
221
|
return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
|
241
|
return redis.lrangeAsync(redisKey.keywordsList, start, end).then(res => {
|
222
|
let urls = {pc: [], wap: []};
|
242
|
let urls = {pc: [], wap: []};
|
|
@@ -274,8 +294,90 @@ const sendKeywordsUrls = () => { |
|
@@ -274,8 +294,90 @@ const sendKeywordsUrls = () => { |
274
|
});
|
294
|
});
|
275
|
};
|
295
|
};
|
276
|
|
296
|
|
|
|
297
|
+const rpKeyWordsUrl = (url) => {
|
|
|
298
|
+ if (!url) {
|
|
|
299
|
+ return Promise.resolve(Object.assign({}, ret, {
|
|
|
300
|
+ code: 400,
|
|
|
301
|
+ message: 'url is empty'
|
|
|
302
|
+ }));
|
|
|
303
|
+ }
|
|
|
304
|
+
|
|
|
305
|
+ return rp({
|
|
|
306
|
+ uri: url,
|
|
|
307
|
+ method: 'GET',
|
|
|
308
|
+ headers: {
|
|
|
309
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 Chrome/55.0.2883.75 Safari/537.36'
|
|
|
310
|
+ },
|
|
|
311
|
+ resolveWithFullResponse: true, // header、statusCode
|
|
|
312
|
+ timeout: 10 * 1000,
|
|
|
313
|
+ gzip: true
|
|
|
314
|
+ }).then(body => {
|
|
|
315
|
+ return Object.assign({}, ret, {
|
|
|
316
|
+ data: {
|
|
|
317
|
+ url: url,
|
|
|
318
|
+ cache: body.headers['x-cache-status']
|
|
|
319
|
+ }
|
|
|
320
|
+ });
|
|
|
321
|
+ }).catch(err => {
|
|
|
322
|
+ return Object.assign({}, ret, {
|
|
|
323
|
+ code: err.statusCode,
|
|
|
324
|
+ data: {
|
|
|
325
|
+ url: url
|
|
|
326
|
+ }
|
|
|
327
|
+ });
|
|
|
328
|
+ });
|
|
|
329
|
+};
|
|
|
330
|
+
|
|
|
331
|
+const visitKeyWordsUrl = () => {
|
|
|
332
|
+ let size = 5;
|
|
|
333
|
+ let currentPage = 1;
|
|
|
334
|
+
|
|
|
335
|
+ return redis.hgetAsync('yohoSeo', 'rpPage').then(page => {
|
|
|
336
|
+ page = page || 1;
|
|
|
337
|
+
|
|
|
338
|
+ let intval = setInterval(() => {
|
|
|
339
|
+ currentPage = page;
|
|
|
340
|
+ return redis.hsetAsync('yohoSeo', 'rpPage', page++).then(() => {
|
|
|
341
|
+ return util.sleep(100);
|
|
|
342
|
+ }).then(() => {
|
|
|
343
|
+ return getRedisKeywords2((currentPage - 1) * size, currentPage * size - 1);
|
|
|
344
|
+ }).then(d => {
|
|
|
345
|
+ let ddata = [];
|
|
|
346
|
+ let buff;
|
|
|
347
|
+
|
|
|
348
|
+ if (d.length <= 0) {
|
|
|
349
|
+ clearInterval(intval);
|
|
|
350
|
+ return this.redis.hset('yohoSeo', 'rpPage', 1).then(() => {
|
|
|
351
|
+ return [];
|
|
|
352
|
+ });
|
|
|
353
|
+ }
|
|
|
354
|
+
|
|
|
355
|
+ _.forEach(d, keyword => {
|
|
|
356
|
+ buff = new Buffer(keyword).toString('hex').toUpperCase();
|
|
|
357
|
+
|
|
|
358
|
+ // TODO 纪录已经推送的关键词
|
|
|
359
|
+ ddata.push(
|
|
|
360
|
+ rpKeyWordsUrl(`https://www1.yohobuy.com/so/${buff}.html`),
|
|
|
361
|
+ rpKeyWordsUrl(`https://www2.yohobuy.com/so/${buff}.html`),
|
|
|
362
|
+ rpKeyWordsUrl(`https://m1.yohobuy.com/so/${buff}.html`),
|
|
|
363
|
+ rpKeyWordsUrl(`https://m2.yohobuy.com/so/${buff}.html`)
|
|
|
364
|
+ );
|
|
|
365
|
+ });
|
|
|
366
|
+
|
|
|
367
|
+ return Promise.all(ddata);
|
|
|
368
|
+ }).then(d => {
|
|
|
369
|
+ console.log(`rpKeyWordsUrl => page: ${page}, result: ${JSON.stringify(d)}`);
|
|
|
370
|
+ return d;
|
|
|
371
|
+ });
|
|
|
372
|
+ }, 300);
|
|
|
373
|
+
|
|
|
374
|
+ return page;
|
|
|
375
|
+ });
|
|
|
376
|
+};
|
|
|
377
|
+
|
277
|
module.exports = {
|
378
|
module.exports = {
|
278
|
sendUrls,
|
379
|
sendUrls,
|
279
|
synchronousKeywords,
|
380
|
synchronousKeywords,
|
280
|
- sendKeywordsUrls
|
381
|
+ sendKeywordsUrls,
|
|
|
382
|
+ visitKeyWordsUrl
|
281
|
}; |
383
|
}; |