refactor(crawler): 优化爬虫类型定义和接口导出
将 Crawler 接口替换为 AnyCrawler 类型,统一爬虫类型定义 导出 CrawlInfoAddStats 接口以便其他模块使用 简化爬虫调用方式,使用类型断言替代接口方法调用
This commit is contained in:
@@ -24,10 +24,7 @@ interface CrawlResult {
|
||||
url: string;
|
||||
}
|
||||
|
||||
interface Crawler {
|
||||
name: string;
|
||||
crawl(browser: puppeteer.Browser): Promise<CrawlResult[]>;
|
||||
}
|
||||
type AnyCrawler = typeof ChdtpCrawler | typeof ChngCrawler | typeof SzecpCrawler | typeof CdtCrawler | typeof EpsCrawler | typeof CnncecpCrawler | typeof CgnpcCrawler | typeof CeicCrawler | typeof EspicCrawler | typeof PowerbeijingCrawler | typeof SdiccCrawler | typeof CnoocCrawler;
|
||||
|
||||
@Injectable()
|
||||
export class BidCrawlerService {
|
||||
@@ -50,7 +47,7 @@ export class BidCrawlerService {
|
||||
const crawlResults: Record<string, { success: number; error?: string }> =
|
||||
{};
|
||||
// 记录数据为0的爬虫,用于重试
|
||||
const zeroDataCrawlers: Crawler[] = [];
|
||||
const zeroDataCrawlers: AnyCrawler[] = [];
|
||||
// 从环境变量读取代理配置
|
||||
const proxyHost = this.configService.get<string>('PROXY_HOST');
|
||||
const proxyPort = this.configService.get<string>('PROXY_PORT');
|
||||
@@ -114,7 +111,7 @@ export class BidCrawlerService {
|
||||
}
|
||||
|
||||
try {
|
||||
const results = await crawler.crawl(browser);
|
||||
const results = await (crawler as any).crawl(browser);
|
||||
this.logger.log(
|
||||
`Extracted ${results.length} items from ${crawler.name}`,
|
||||
);
|
||||
@@ -184,7 +181,7 @@ export class BidCrawlerService {
|
||||
}
|
||||
|
||||
try {
|
||||
const results = await crawler.crawl(browser);
|
||||
const results = await (crawler as any).crawl(browser);
|
||||
this.logger.log(
|
||||
`Retry extracted ${results.length} items from ${crawler.name}`,
|
||||
);
|
||||
@@ -337,7 +334,7 @@ export class BidCrawlerService {
|
||||
try {
|
||||
this.logger.log(`Crawling: ${targetCrawler.name}`);
|
||||
|
||||
const results = await targetCrawler.crawl(browser);
|
||||
const results = await (targetCrawler as any).crawl(browser);
|
||||
this.logger.log(
|
||||
`Extracted ${results.length} items from ${targetCrawler.name}`,
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user