第一次提交
This commit is contained in:
46
src/crawler/services/bid-crawler.service.ts
Normal file
46
src/crawler/services/bid-crawler.service.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import * as puppeteer from 'puppeteer';
|
||||
import { BidsService } from '../../bids/services/bid.service';
|
||||
import { ChdtpCrawler } from './chdtp_target';
|
||||
|
||||
@Injectable()
|
||||
export class BidCrawlerService {
|
||||
private readonly logger = new Logger(BidCrawlerService.name);
|
||||
|
||||
constructor(
|
||||
private bidsService: BidsService,
|
||||
) {}
|
||||
|
||||
async crawlAll() {
|
||||
this.logger.log('Starting crawl task with Puppeteer...');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
try {
|
||||
// Currently only supports ChdtpCrawler, but can be extended to a list of crawlers
|
||||
const crawler = ChdtpCrawler;
|
||||
this.logger.log(`Crawling: ${crawler.name}`);
|
||||
|
||||
const results = await crawler.crawl(browser);
|
||||
this.logger.log(`Extracted ${results.length} items from ${crawler.name}`);
|
||||
|
||||
for (const item of results) {
|
||||
await this.bidsService.createOrUpdate({
|
||||
title,
|
||||
url: itemUrl,
|
||||
publishDate,
|
||||
source: type || 'Unknown',
|
||||
});
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
this.logger.error(`Crawl task failed: ${error.message}`);
|
||||
} finally {
|
||||
await browser.close();
|
||||
this.logger.log('Crawl task finished.');
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user