feat: 全面升级系统日志和反爬虫功能

- 新增专业日志系统:集成 Winston 日志框架,支持按天轮转和分级存储
- 增强反爬虫能力:集成 puppeteer-extra-plugin-stealth 插件,提升隐蔽性
- 新增独立爬虫脚本:可通过 npm run crawl 命令单独执行爬虫任务
- 优化前端日期筛选:添加日期范围选择器,支持3天/7天快速筛选
- 改进爬虫统计功能:详细记录每个平台的成功/失败情况和执行时间
- 移除默认关键词初始化:避免重复创建预设关键词
- 扩展环境配置:新增 LOG_LEVEL 日志级别配置选项
- 增强.gitignore:添加日志目录、构建产物等忽略规则
- 升级执行时间限制:将最大执行时间从1小时延长至3小时
- 完善错误处理:更好的异常捕获和日志记录机制
This commit is contained in:
dmy
2026-01-12 10:46:10 +08:00
parent 66f535ed0c
commit 3e6456e120
14 changed files with 495 additions and 119 deletions

View File

@@ -25,10 +25,13 @@ export class BidCrawlerService {
async crawlAll() {
this.logger.log('Starting crawl task with Puppeteer...');
// 设置最大执行时间为1小时
const maxExecutionTime = 60 * 60 * 1000; // 1小时(毫秒)
// 设置最大执行时间为3小时
const maxExecutionTime = 3 * 60 * 60 * 1000; // 3小时(毫秒)
const startTime = Date.now();
// 统计结果
const crawlResults: Record<string, { success: number; error?: string }> = {};
// 从环境变量读取代理配置
const proxyHost = this.configService.get<string>('PROXY_HOST');
const proxyPort = this.configService.get<string>('PROXY_PORT');
@@ -68,7 +71,7 @@ export class BidCrawlerService {
// 检查是否超时
const elapsedTime = Date.now() - startTime;
if (elapsedTime > maxExecutionTime) {
this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 1 hour. Stopping...`);
this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 3 hours. Stopping...`);
this.logger.warn(`⚠️ Total elapsed time: ${Math.floor(elapsedTime / 1000 / 60)} minutes`);
break;
}
@@ -76,18 +79,23 @@ export class BidCrawlerService {
try {
const results = await crawler.crawl(browser);
this.logger.log(`Extracted ${results.length} items from ${crawler.name}`);
// 记录成功数量
crawlResults[crawler.name] = { success: results.length };
for (const item of results) {
await this.bidsService.createOrUpdate({
title: item.title,
url: item.url,
publishDate: item.publishDate,
source: crawler.name,
unit: '',
source: crawler.name,
unit: '',
});
}
} catch (err) {
this.logger.error(`Error crawling ${crawler.name}: ${err.message}`);
// 记录错误信息
crawlResults[crawler.name] = { success: 0, error: err.message };
}
}
} catch (error) {
@@ -100,8 +108,31 @@ export class BidCrawlerService {
this.logger.log(`Crawl task finished. Total time: ${minutes} minutes`);
if (totalTime > maxExecutionTime) {
this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 1 hour.`);
this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 3 hours.`);
}
// 输出统计总结
this.logger.log('='.repeat(50));
this.logger.log('爬虫执行总结 / Crawl Summary');
this.logger.log('='.repeat(50));
let totalSuccess = 0;
let errorCount = 0;
for (const [source, result] of Object.entries(crawlResults)) {
if (result.error) {
this.logger.error(`${source}: 出错 - ${result.error}`);
errorCount++;
} else {
this.logger.log(`${source}: 成功获取 ${result.success} 条工程信息`);
totalSuccess += result.success;
}
}
this.logger.log('='.repeat(50));
this.logger.log(`总计: ${totalSuccess} 条工程信息, ${errorCount} 个来源出错`);
this.logger.log(`Total: ${totalSuccess} items, ${errorCount} sources failed`);
this.logger.log('='.repeat(50));
}
}
}