spider-buyers.js 5.09 KB
const dayjs = require('dayjs');
const lockup = require('node-lockup');
const isBetween = require('dayjs/plugin/isBetween');
const _ = require('lodash');
const MysqlAdapter = require('./libs/mysql');
const spider = require('./libs/spider');
const config = require('./spider-buyers.json');
const {logger} = require('./libs/logger');

dayjs.extend(isBetween);

const mysql = new MysqlAdapter(config.connect, config.connect.database);

const products = {};
const RegNow = /刚刚/;
const RegMinute = /(\d+)分钟前/;
const RegHour = /(\d+)小时前/;
const RegDay = /(\d+)天前/;
const RegDate = /(\d+)(\d+)日/;
const RegDateFull = /\d+\.\d+\.\d+/;
let tableName;


const parseTime = (relativeTime) => {
  const matchMinute = relativeTime.match(RegMinute);

  if (matchMinute) {
    return dayjs().add(0 - matchMinute[1], 'minute');
  }
  const matchHour = relativeTime.match(RegHour);

  if (matchHour) {
    return dayjs().add(0 - matchHour[1], 'hour');
  }
  const matchDay = relativeTime.match(RegDay);

  if (matchDay) {
    return dayjs().add(0 - matchDay[1], 'day');
  }
  const matchDate = relativeTime.match(RegDate);

  if (matchDate) {
    return dayjs(`2019/${matchDate[1]}/${matchDate[2]}`);
  }
  if (RegDateFull.test(relativeTime)) {
    return dayjs(relativeTime);
  }
  if (RegNow.test(relativeTime)) {
    return dayjs();
  }
  return dayjs('1990-01-01');
};


const spiderBuyers = async(productId, lsId = 0, page = 0, trys = 0) => {
  const result = await spider.spiderFetch(productId, 'https://du.hupu.com/mapi/product/lastSoldList', {
    lastId: lsId
  });

  let lastId;
  let tryLogic = false;
  let list = [];

  if (result.status === 200) {
    let skip = false;

    lastId = result.data.lastId;
    list = result.data.list;


    list.forEach(info => {
      const orderTime = parseTime(info.formatTime);
      const startTime = dayjs().subtract(1, 'day').add(1, 'second');

      if (!orderTime.isBefore(startTime)) {
        mysql.insert('INSERT INTO `' + tableName + '` (`productId`, `nickName`, `icon`, `time`, `size`, `soldNum`, `price`, `productName`, `model`) VALUES (:productId, :nickName, :icon, :time, :size, :soldNum, :price, :productName, :model)', {
          productId: productId,
          nickName: info.buyer.userName,
          icon: info.buyer.icon,
          size: info.item.size,
          time: orderTime.format('YYYY-MM-DD HH:mm:ss'),
          soldNum: products[productId].count,
          price: +products[productId].price / 100,
          productName: products[productId].title,
          model: products[productId].articleNumber
        });
      } else {
        if (!skip) {
          logger.info(`productid: ${productId}, skip time:${orderTime.format('YYYY-MM-DD HH:mm:ss')}`);
        }
        skip = true;
      }
    });

    if (skip) {
      return {productId};
    }
  } else {
    if (trys < 3) {
      tryLogic = true;
    }
  }

  if (tryLogic) {
    logger.info(`tryLogic ${trys}`);
    return spiderBuyers(productId, lsId, page, trys + 1);
  } else {
    if (lastId) {
      logger.info(`productId: ${productId}, nextPage: ${page}, lastId: ${lastId}`);
      return spiderBuyers(productId, lastId, page + 1);
    }
    return {productId};
  }
};

// 2721219

let max = 50000;
let failNum = 0;
let over = false;

const start = async(productId) => {
  if (over) {
    return;
  }

  if (productId >= max) {
    over = true;
    logger.info('over!!!!!!');
    return;
  }

  const result = await spider.spiderFetch(productId);

  if (result.status === 200) {
    failNum = 0;
    products[productId] = {
      count: _.get(result, 'data.detail.soldNum', 0),
      price: _.get(result, 'data.item.price', 0),
      title: _.get(result, 'data.detail.title', ''),
      articleNumber: _.get(result, 'data.detail.articleNumber', ''),
    };
    try {
      await spiderBuyers(productId);
    } catch (error) {
      logger.info('error', error);
    }


  } else {
    logger.info(productId, result.status);
    if (result.status) {
      failNum++;
    }
  }
  if (failNum >= 20) {
    over = true;
    return logger.info('over!!!!!!', JSON.stringify(result));
  }
  return true;
};

const createDayTable = async() => {
  tableName = `buyers_${dayjs().format('YYYY_MM_DD')}`;
  const sql = `CREATE TABLE \`${tableName}\` (\`productId\` int(11) DEFAULT NULL,\`nickName\` varchar(50) DEFAULT NULL,\`icon\` varchar(200) DEFAULT NULL,\`time\` datetime DEFAULT NULL,\`size\` varchar(50) DEFAULT NULL,\`id\` int(11) NOT NULL AUTO_INCREMENT,\`soldNum\` int(11) DEFAULT NULL,\`price\` decimal(10,0) DEFAULT NULL,\`productName\` varchar(200) DEFAULT NULL,\`model\` varchar(100) DEFAULT NULL,PRIMARY KEY (\`id\`),KEY \`index_name_icon\` (\`nickName\`,\`icon\`), KEY \`productid\` (\`productId\`) USING BTREE, KEY \`time\` (\`time\`) USING BTREE) ENGINE=InnoDB DEFAULT CHARSET=utf8;`;

  const result = await mysql.query(`SELECT table_name FROM information_schema.TABLES WHERE table_name ='${tableName}'`);

  if (result.length) {
    return;
  }

  return mysql.execute(sql);
};

const locktask = lockup(start);

// 4211292
module.exports = async() => {
  await createDayTable();
  Array.from({length: max}).forEach((v, i) => {
    locktask(i + 1);
  });
};