refactor(crawler): 优化爬虫类型定义和接口导出
将 Crawler 接口替换为 AnyCrawler 类型,统一爬虫类型定义 导出 CrawlInfoAddStats 接口以便其他模块使用 简化爬虫调用方式,使用类型断言替代接口方法调用
This commit is contained in:
@@ -15,7 +15,7 @@ interface SourceResult {
|
|||||||
source: string;
|
source: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface CrawlInfoAddStats {
|
export interface CrawlInfoAddStats {
|
||||||
source: string;
|
source: string;
|
||||||
count: number;
|
count: number;
|
||||||
latestUpdate: Date | string;
|
latestUpdate: Date | string;
|
||||||
|
|||||||
@@ -24,10 +24,7 @@ interface CrawlResult {
|
|||||||
url: string;
|
url: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Crawler {
|
type AnyCrawler = typeof ChdtpCrawler | typeof ChngCrawler | typeof SzecpCrawler | typeof CdtCrawler | typeof EpsCrawler | typeof CnncecpCrawler | typeof CgnpcCrawler | typeof CeicCrawler | typeof EspicCrawler | typeof PowerbeijingCrawler | typeof SdiccCrawler | typeof CnoocCrawler;
|
||||||
name: string;
|
|
||||||
crawl(browser: puppeteer.Browser): Promise<CrawlResult[]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class BidCrawlerService {
|
export class BidCrawlerService {
|
||||||
@@ -50,7 +47,7 @@ export class BidCrawlerService {
|
|||||||
const crawlResults: Record<string, { success: number; error?: string }> =
|
const crawlResults: Record<string, { success: number; error?: string }> =
|
||||||
{};
|
{};
|
||||||
// 记录数据为0的爬虫,用于重试
|
// 记录数据为0的爬虫,用于重试
|
||||||
const zeroDataCrawlers: Crawler[] = [];
|
const zeroDataCrawlers: AnyCrawler[] = [];
|
||||||
// 从环境变量读取代理配置
|
// 从环境变量读取代理配置
|
||||||
const proxyHost = this.configService.get<string>('PROXY_HOST');
|
const proxyHost = this.configService.get<string>('PROXY_HOST');
|
||||||
const proxyPort = this.configService.get<string>('PROXY_PORT');
|
const proxyPort = this.configService.get<string>('PROXY_PORT');
|
||||||
@@ -114,7 +111,7 @@ export class BidCrawlerService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const results = await crawler.crawl(browser);
|
const results = await (crawler as any).crawl(browser);
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Extracted ${results.length} items from ${crawler.name}`,
|
`Extracted ${results.length} items from ${crawler.name}`,
|
||||||
);
|
);
|
||||||
@@ -184,7 +181,7 @@ export class BidCrawlerService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const results = await crawler.crawl(browser);
|
const results = await (crawler as any).crawl(browser);
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Retry extracted ${results.length} items from ${crawler.name}`,
|
`Retry extracted ${results.length} items from ${crawler.name}`,
|
||||||
);
|
);
|
||||||
@@ -337,7 +334,7 @@ export class BidCrawlerService {
|
|||||||
try {
|
try {
|
||||||
this.logger.log(`Crawling: ${targetCrawler.name}`);
|
this.logger.log(`Crawling: ${targetCrawler.name}`);
|
||||||
|
|
||||||
const results = await targetCrawler.crawl(browser);
|
const results = await (targetCrawler as any).crawl(browser);
|
||||||
this.logger.log(
|
this.logger.log(
|
||||||
`Extracted ${results.length} items from ${targetCrawler.name}`,
|
`Extracted ${results.length} items from ${targetCrawler.name}`,
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user