47 lines
1.3 KiB
TypeScript
47 lines
1.3 KiB
TypeScript
|
|
import { Injectable, Logger } from '@nestjs/common';
|
||
|
|
import * as puppeteer from 'puppeteer';
|
||
|
|
import { BidsService } from '../../bids/services/bid.service';
|
||
|
|
import { ChdtpCrawler } from './chdtp_target';
|
||
|
|
|
||
|
|
@Injectable()
|
||
|
|
export class BidCrawlerService {
|
||
|
|
private readonly logger = new Logger(BidCrawlerService.name);
|
||
|
|
|
||
|
|
constructor(
|
||
|
|
private bidsService: BidsService,
|
||
|
|
) {}
|
||
|
|
|
||
|
|
async crawlAll() {
|
||
|
|
this.logger.log('Starting crawl task with Puppeteer...');
|
||
|
|
|
||
|
|
const browser = await puppeteer.launch({
|
||
|
|
headless: true,
|
||
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||
|
|
});
|
||
|
|
|
||
|
|
try {
|
||
|
|
// Currently only supports ChdtpCrawler, but can be extended to a list of crawlers
|
||
|
|
const crawler = ChdtpCrawler;
|
||
|
|
this.logger.log(`Crawling: ${crawler.name}`);
|
||
|
|
|
||
|
|
const results = await crawler.crawl(browser);
|
||
|
|
this.logger.log(`Extracted ${results.length} items from ${crawler.name}`);
|
||
|
|
|
||
|
|
for (const item of results) {
|
||
|
|
await this.bidsService.createOrUpdate({
|
||
|
|
title,
|
||
|
|
url: itemUrl,
|
||
|
|
publishDate,
|
||
|
|
source: type || 'Unknown',
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
} catch (error) {
|
||
|
|
this.logger.error(`Crawl task failed: ${error.message}`);
|
||
|
|
} finally {
|
||
|
|
await browser.close();
|
||
|
|
this.logger.log('Crawl task finished.');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|