chore: 更新.gitignore并添加新文件

在.gitignore中添加对*.png、*.log、*-lock.json、*.woff2文件的忽略规则，并新增OFL.txt文件。同时，添加vue.svg图标文件以支持前端展示。更新多个TypeScript文件以优化代码格式和增强可读性。
2026-01-14 22:26:32 +08:00
parent 10565af001
commit 82f5a81887
47 changed files with 1513 additions and 814 deletions
--- a/src/crawler/services/chdtp_target.ts
+++ b/src/crawler/services/chdtp_target.ts
@@ -7,22 +7,34 @@ export interface ChdtpResult {
  url: string; // Necessary for system uniqueness
 }

+interface ChdtpCrawlerType {
+  name: string;
+  url: string;
+  baseUrl: string;
+  extract(html: string): ChdtpResult[];
+}
+
 export const ChdtpCrawler = {
  name: '华电集团电子商务平台 ',
  url: 'https://www.chdtp.com/webs/queryWebZbgg.action?zbggType=1',
  baseUrl: 'https://www.chdtp.com/webs/',

-  async crawl(browser: puppeteer.Browser): Promise<ChdtpResult[]> {
+  async crawl(
+    this: ChdtpCrawlerType,
+    browser: puppeteer.Browser,
+  ): Promise<ChdtpResult[]> {
    const logger = new Logger('ChdtpCrawler');
    const page = await browser.newPage();
-    
+
    const username = process.env.PROXY_USERNAME;
    const password = process.env.PROXY_PASSWORD;
    if (username && password) {
      await page.authenticate({ username, password });
    }

-    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36');
+    await page.setUserAgent(
+      'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
+    );

    const allResults: ChdtpResult[] = [];
    let currentPage = 1;
@@ -35,14 +47,16 @@ export const ChdtpCrawler = {
      while (currentPage <= maxPages) {
        const content = await page.content();
        const pageResults = this.extract(content);
-        
+
        if (pageResults.length === 0) {
          logger.warn(`No results found on page ${currentPage}, stopping.`);
          break;
        }

        allResults.push(...pageResults);
-        logger.log(`Extracted ${pageResults.length} items from page ${currentPage}`);
+        logger.log(
+          `Extracted ${pageResults.length} items from page ${currentPage}`,
+        );

        // Find the "Next Page" button
        // Using partial match for src to be robust against path variations
@@ -58,35 +72,43 @@ export const ChdtpCrawler = {
        // For this specific site, we'll try to click.

        logger.log(`Navigating to page ${currentPage + 1}...`);
-        
+
        try {
          await Promise.all([
-            page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }),
+            page.waitForNavigation({
+              waitUntil: 'networkidle2',
+              timeout: 60000,
+            }),
            nextButton.click(),
          ]);
        } catch (navError) {
-          logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
+          const navErrorMessage =
+            navError instanceof Error ? navError.message : String(navError);
+          logger.error(
+            `Navigation to page ${currentPage + 1} failed: ${navErrorMessage}`,
+          );
          break;
        }

        currentPage++;
-        
+
        // Random delay between pages
        const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
-        await new Promise(resolve => setTimeout(resolve, delay));
+        await new Promise((resolve) => setTimeout(resolve, delay));
      }

      return allResults;
-
    } catch (error) {
-      logger.error(`Failed to crawl ${this.name}: ${error.message}`);
+      const errorMessage =
+        error instanceof Error ? error.message : String(error);
+      logger.error(`Failed to crawl ${this.name}: ${errorMessage}`);
      return allResults; // Return what we have so far
    } finally {
      await page.close();
    }
  },

-  extract(html: string): ChdtpResult[] {
+  extract(this: ChdtpCrawlerType, html: string): ChdtpResult[] {
    const results: ChdtpResult[] = [];
    /**
     * Regex groups for chdtp.com:
@@ -96,23 +118,24 @@ export const ChdtpCrawler = {
     * 4: Business Type
     * 5: Date
     */
-    const regex = /<tr[^>]*>\s*<td class="td_1">.*?<span[^>]*>\s*(.*?)\s*<\/span>.*?<\/td>\s*<td class="td_2">\s*<a[^>]*href="javascript:toGetContent\('(.*?)'\)" title="(.*?)">.*?<\/a><\/td>\s*<td class="td_3">\s*<a[^>]*>\s*(.*?)\s*<\/a>\s*<\/td>\s*<td class="td_4"><span>\[(.*?)\]<\/span><\/td>/gs;
+    const regex =
+      /<tr[^>]*>\s*<td class="td_1">.*?<span[^>]*>\s*(.*?)\s*<\/span>.*?<\/td>\s*<td class="td_2">\s*<a[^>]*href="javascript:toGetContent\('(.*?)'\)" title="(.*?)">.*?<\/a><\/td>\s*<td class="td_3">\s*<a[^>]*>\s*(.*?)\s*<\/a>\s*<\/td>\s*<td class="td_4"><span>\[(.*?)\]<\/span><\/td>/gs;

-    let match;
+    let match: RegExpExecArray | null;
    while ((match = regex.exec(html)) !== null) {
-      const urlSuffix = match[2]?.trim();
-      const title = match[3]?.trim();
-      const dateStr = match[5]?.trim();
+      const urlSuffix = match[2]?.trim() ?? '';
+      const title = match[3]?.trim() ?? '';
+      const dateStr = match[5]?.trim() ?? '';

      if (title && urlSuffix) {
        const fullUrl = this.baseUrl + urlSuffix;
        results.push({
          title,
          publishDate: dateStr ? new Date(dateStr) : new Date(),
-          url: fullUrl.replace(/\/\//g, '/')
+          url: fullUrl.replace(/\/\//g, '/'),
        });
      }
    }
    return results;
-  }
-};
+  },
+};