spider-community.js
3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
const rp = require('request-promise');
const lockup = require('node-lockup');
const {logger} = require('./libs/logger');
const config = require('./spider-buyers.json');
const MysqlAdapter = require('./libs/mysql');
const DEFAULT_MAX_ID = 21913490;
const REQUEST_URL = `https://m.poizon.com/trend/share?trendId=`;
const mysql = new MysqlAdapter(config.connect, config.connect.database);
const requestPage = async (trendId, tid, errTick = 0) => {
if (skip[tid]) {
return;
}
try {
let html = await rp.get({
url: `${REQUEST_URL}${trendId}`,
headers: {
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
appVersion: '3.5.0',
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
},
timeout: 2000
});
const dataStartIndex = html.indexOf('that.data = ');
html = html.substring(dataStartIndex + 12);
const dataEndIndex = html.indexOf('};');
const jsonData = html.substring(0, dataEndIndex + 1);
try {
return JSON.parse(jsonData);
} catch (error) {
return html;
}
} catch (error) {
errTick++;
logger.error(`request error: ${error.message}, trendId: ${trendId} networkErrTick: ${errTick}`);
if (errTick < 3) {
return requestPage(trendId, tid, errTick)
}
return void 0;
}
}
const insertTable = async(params) => {
try {
const sql = 'insert `trend_log` (`trendId`, `userId`, `createAt`, `userName`, `icon`, `sex`, `readCount`, `reply`, `fav`, `city`, `images`) values (:trendId, :userId, :createAt, :userName, :icon, :sex, :readCount, :reply, :fav, :city, :images)';
const result = await mysql.insert(sql, params);
return result;
} catch (error) {
logger.error(`error: ${error.message}, data: ${JSON.stringify(params)}`);
}
}
const getMaxId = async() => {
const result = await mysql.query('select max(`trendId`) as maxId from trend_log');
if (result.length) {
return result[0].maxId || DEFAULT_MAX_ID;
}
return DEFAULT_MAX_ID;
};
let skip = {};
const doWorker = async() => {
const length = 10000;
const thread = 5;
const threadIds = Array.from({length: thread}).map((v, tid) => {
const tids = [];
for (let i = 0; i < length; i++) {
if (i % thread === tid) {
tids.push(i);
}
}
return tids;
});
const maxId = await getMaxId();
threadIds.forEach((trendIds, tid) => {
const locktask = lockup(requestPage);
let errTick = 0;
skip[tid] = false;
trendIds.forEach(async(i) => {
const trendId = i + maxId + 1;
const json = await locktask(trendId, tid);
if (skip[tid]) {
return;
}
if (typeof json === 'object') {
const detail = json.detail;
insertTable({
trendId: detail.trendId,
userId: detail.userInfo.userId,
createAt: detail.createAt,
userName: detail.userInfo.userName,
icon: detail.userInfo.icon,
sex: detail.userInfo.sex,
readCount: detail.readCount,
reply: detail.reply,
fav: detail.fav,
city: detail.city.city,
images: detail.images.length
});
logger.info(`success: trendId: ${trendId}, reply: ${detail.reply}`);
errTick = 0;
} else {
errTick++;
logger.error(`request not content trendId: ${trendId}, errTick: ${errTick}`);
if (errTick >= 20) {
skip[tid] = true;
logger.info('over!!!');
}
}
});
})
}
module.exports = doWorker;