request-job-ex.js 1.48 KB
const ThreadJob = require('./request-job');
const spider = require('../libs/spider');
const fs = require('fs');

const skip = {};

const REG_IMG = /<img src="([^"]+)"[^>]+isSizeImg/;

const fw = fs.createWriteStream('/Users/chenfeng/Downloads/sizeimg.csv', {
  // encoding: 'gb2312',
});
let allDone = false;

const checkAllDone = () => {
  if (!allDone && Object.keys(skip).every(k => skip[k].skip)) {
    allDone = true;
    console.log('done')
  }
}

const job = async id => {
  const threadId = id % 5;
  if (!skip[threadId]) {
    skip[threadId] = {
      err: 1,
      skip: false
    };
  }
  if (skip[threadId].skip) {
    checkAllDone();
    return;
  }
  const result = await spider.spiderFetch(id);

  if (result.status === 200) {
    skip[threadId].err = 0;
    const imageAndText = result.data.imageAndText;
    const match = imageAndText.match(REG_IMG);

    if (match && match[1]) {
      console.log(`threadId: ${threadId}, id: ${id}, ${match[1]}`);
      fw.write(`${id},${result.data.detail.articleNumber},${match[1]}\n`);
    } else {
      console.log(`threadId: ${threadId}, id: ${id}, no image`);
    }
  } else {
    skip[threadId].err++;
    if (skip[threadId].err > 20) {
      skip[threadId].skip = true;
    }
    console.log(`threadId: ${threadId}, id: ${id}, errTick: ${skip[threadId].err}`);
  }
  return result;
};

const jobs = Array.from({length: 40000}).map((v, i) => {
  return job.bind(null, i + 1);
})

const threadJob = new ThreadJob({
  jobs,
  thread: 5,
});

threadJob.start();