...
|
...
|
@@ -66,22 +66,28 @@ public class BaikeSpiderService { |
|
|
Set<String> existSubjects = spiderContentService.getAllSubjects();
|
|
|
//Set<String> yohoKeywords = getAllYohoKeywords();
|
|
|
LOGGER.info("[func=BaikeSpiderService.init][cost={}]", System.currentTimeMillis() - begin);
|
|
|
int newCount = 0;
|
|
|
|
|
|
ExecutorService pool = Executors.newFixedThreadPool(POOL_SIZE, thread -> new Thread(thread, "BaikeSpider-" + atomicInteger.getAndIncrement()));
|
|
|
List<Future<Integer>> futures = new ArrayList<>(baikeUrlFiles.length);
|
|
|
for (File baikeUrlFile : baikeUrlFiles) {
|
|
|
futures.add(pool.submit(new WebCrawler(baikeUrlFile, existSubjects)));
|
|
|
}
|
|
|
try {
|
|
|
BaikeBOBulkService.start();
|
|
|
ExecutorService pool = Executors.newFixedThreadPool(POOL_SIZE, thread -> new Thread(thread, "BaikeSpider-" + atomicInteger.getAndIncrement()));
|
|
|
List<Future<Integer>> futures = new ArrayList<>(baikeUrlFiles.length);
|
|
|
for (File baikeUrlFile : baikeUrlFiles) {
|
|
|
futures.add(pool.submit(new WebCrawler(baikeUrlFile, existSubjects)));
|
|
|
}
|
|
|
|
|
|
int newCount = 0;
|
|
|
for (Future<Integer> future : futures) {
|
|
|
try {
|
|
|
newCount += future.get();
|
|
|
} catch (InterruptedException e) {
|
|
|
Thread.currentThread().interrupt();
|
|
|
} catch (ExecutionException e) {
|
|
|
throw new RuntimeException(e);
|
|
|
for (Future<Integer> future : futures) {
|
|
|
try {
|
|
|
newCount += future.get();
|
|
|
} catch (InterruptedException e) {
|
|
|
Thread.currentThread().interrupt();
|
|
|
} catch (ExecutionException e) {
|
|
|
throw new RuntimeException(e);
|
|
|
}
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
LOGGER.error(e.getMessage(), e);
|
|
|
BaikeBOBulkService.destroy();
|
|
|
}
|
|
|
|
|
|
result.put("existSubjects", existSubjects);
|
...
|
...
|
|