request-job-ex.js
1.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
const ThreadJob = require('./request-job');
const spider = require('../libs/spider');
const fs = require('fs');
const skip = {};
const REG_IMG = /<img src="([^"]+)"[^>]+isSizeImg/;
const fw = fs.createWriteStream('/Users/chenfeng/Downloads/sizeimg.csv', {
// encoding: 'gb2312',
});
let allDone = false;
const checkAllDone = () => {
if (!allDone && Object.keys(skip).every(k => skip[k].skip)) {
allDone = true;
console.log('done')
}
}
const job = async id => {
const threadId = id % 5;
if (!skip[threadId]) {
skip[threadId] = {
err: 1,
skip: false
};
}
if (skip[threadId].skip) {
checkAllDone();
return;
}
const result = await spider.spiderFetch(id);
if (result.status === 200) {
skip[threadId].err = 0;
const imageAndText = result.data.imageAndText;
const match = imageAndText.match(REG_IMG);
if (match && match[1]) {
console.log(`threadId: ${threadId}, id: ${id}, ${match[1]}`);
fw.write(`${id},${result.data.detail.articleNumber},${match[1]}\n`);
} else {
console.log(`threadId: ${threadId}, id: ${id}, no image`);
}
} else {
skip[threadId].err++;
if (skip[threadId].err > 20) {
skip[threadId].skip = true;
}
console.log(`threadId: ${threadId}, id: ${id}, errTick: ${skip[threadId].err}`);
}
return result;
};
const jobs = Array.from({length: 40000}).map((v, i) => {
return job.bind(null, i + 1);
})
const threadJob = new ThreadJob({
jobs,
thread: 5,
});
threadJob.start();