feat: 全面升级系统日志和反爬虫功能

- 新增专业日志系统：集成 Winston 日志框架，支持按天轮转和分级存储 - 增强反爬虫能力：集成 puppeteer-extra-plugin-stealth 插件，提升隐蔽性 - 新增独立爬虫脚本：可通过 npm run crawl 命令单独执行爬虫任务 - 优化前端日期筛选：添加日期范围选择器，支持3天/7天快速筛选 - 改进爬虫统计功能：详细记录每个平台的成功/失败情况和执行时间 - 移除默认关键词初始化：避免重复创建预设关键词 - 扩展环境配置：新增 LOG_LEVEL 日志级别配置选项 - 增强.gitignore：添加日志目录、构建产物等忽略规则 - 升级执行时间限制：将最大执行时间从1小时延长至3小时 - 完善错误处理：更好的异常捕获和日志记录机制
2026-01-12 10:46:10 +08:00
parent 66f535ed0c
commit 3e6456e120
14 changed files with 495 additions and 119 deletions
--- a/src/crawler/services/bid-crawler.service.ts
+++ b/src/crawler/services/bid-crawler.service.ts
@@ -25,10 +25,13 @@ export class BidCrawlerService {
  async crawlAll() {
    this.logger.log('Starting crawl task with Puppeteer...');
    
-    // 设置最大执行时间为1小时
-    const maxExecutionTime = 60 * 60 * 1000; // 1小时（毫秒）
+    // 设置最大执行时间为3小时
+    const maxExecutionTime = 3 * 60 * 60 * 1000; // 3小时（毫秒）
    const startTime = Date.now();
    
+    // 统计结果
+    const crawlResults: Record<string, { success: number; error?: string }> = {};
+    
    // 从环境变量读取代理配置
    const proxyHost = this.configService.get<string>('PROXY_HOST');
    const proxyPort = this.configService.get<string>('PROXY_PORT');
@@ -68,7 +71,7 @@ export class BidCrawlerService {
        // 检查是否超时
        const elapsedTime = Date.now() - startTime;
        if (elapsedTime > maxExecutionTime) {
-          this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 1 hour. Stopping...`);
+          this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 3 hours. Stopping...`);
          this.logger.warn(`⚠️ Total elapsed time: ${Math.floor(elapsedTime / 1000 / 60)} minutes`);
          break;
        }
@@ -76,18 +79,23 @@ export class BidCrawlerService {
        try {
          const results = await crawler.crawl(browser);
          this.logger.log(`Extracted ${results.length} items from ${crawler.name}`);
+          
+          // 记录成功数量
+          crawlResults[crawler.name] = { success: results.length };

          for (const item of results) {
            await this.bidsService.createOrUpdate({
              title: item.title,
              url: item.url,
              publishDate: item.publishDate,
-              source: crawler.name, 
-              unit: '', 
+              source: crawler.name,
+              unit: '',
            });
          }
        } catch (err) {
          this.logger.error(`Error crawling ${crawler.name}: ${err.message}`);
+          // 记录错误信息
+          crawlResults[crawler.name] = { success: 0, error: err.message };
        }
      }
    } catch (error) {
@@ -100,8 +108,31 @@ export class BidCrawlerService {
      this.logger.log(`Crawl task finished. Total time: ${minutes} minutes`);
      
      if (totalTime > maxExecutionTime) {
-        this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 1 hour.`);
+        this.logger.warn(`⚠️ Crawl task exceeded maximum execution time of 3 hours.`);
      }
+      
+      // 输出统计总结
+      this.logger.log('='.repeat(50));
+      this.logger.log('爬虫执行总结 / Crawl Summary');
+      this.logger.log('='.repeat(50));
+      
+      let totalSuccess = 0;
+      let errorCount = 0;
+      
+      for (const [source, result] of Object.entries(crawlResults)) {
+        if (result.error) {
+          this.logger.error(`❌ ${source}: 出错 - ${result.error}`);
+          errorCount++;
+        } else {
+          this.logger.log(`✅ ${source}: 成功获取 ${result.success} 条工程信息`);
+          totalSuccess += result.success;
+        }
+      }
+      
+      this.logger.log('='.repeat(50));
+      this.logger.log(`总计: ${totalSuccess} 条工程信息, ${errorCount} 个来源出错`);
+      this.logger.log(`Total: ${totalSuccess} items, ${errorCount} sources failed`);
+      this.logger.log('='.repeat(50));
    }
  }
 }