src/crawler/services/chdtp_target.spec.ts

import { ChdtpCrawler } from './chdtp_target';
import * as puppeteer from 'puppeteer';

// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000);

// 获取代理配置
const getProxyArgs = (): string[] => {
  const proxyHost = process.env.PROXY_HOST;
  const proxyPort = process.env.PROXY_PORT;
  const proxyUsername = process.env.PROXY_USERNAME;
  const proxyPassword = process.env.PROXY_PASSWORD;

  if (proxyHost && proxyPort) {
    const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
    if (proxyUsername && proxyPassword) {
      args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
    }
    return args;
  }
  return [];
};

describe('ChdtpCrawler Real Site Test', () => {
  let browser: puppeteer.Browser;

  beforeAll(async () => {
    const proxyArgs = getProxyArgs();
    if (proxyArgs.length > 0) {
      console.log('Using proxy:', proxyArgs.join(' '));
    }
    
    browser = await puppeteer.launch({
      headless: true, // Change to false to see the browser UI
      args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
    });
  });

  afterAll(async () => {
    if (browser) {
      await browser.close();
    }
  });

  it('should visit the website and list all found bid information', async () => {
    console.log(`\nStarting crawl for: ${ChdtpCrawler.name}`);
    console.log(`Target URL: ${ChdtpCrawler.url}`);
    
    const results = await ChdtpCrawler.crawl(browser);
    
    console.log(`\nSuccessfully found ${results.length} items:\n`);
    console.log('----------------------------------------');
    results.forEach((item, index) => {
      console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
      console.log(`   Link: ${item.url}`);
      console.log('----------------------------------------');
    });

    // Basic assertions to ensure the crawler is working
    expect(results).toBeDefined();
    expect(Array.isArray(results)).toBeTruthy();
    // Warn but don't fail if site returns 0 items (could be empty or changed structure)
    if (results.length === 0) {
      console.warn('Warning: No items found. Check if the website structure has changed or if the list is currently empty.');
    } else {
       // Check data integrity of the first item
       const firstItem = results[0];
       expect(firstItem.title).toBeTruthy();
       expect(firstItem.url).toMatch(/^https?:\/\//);
       expect(firstItem.publishDate).toBeInstanceOf(Date);
    }
  });
});
第一次提交 2026-01-09 23:18:52 +08:00			`import { ChdtpCrawler } from './chdtp_target';`
			`import * as puppeteer from 'puppeteer';`

			`// Increase timeout to 60 seconds for network operations`
			`jest.setTimeout(60000);`

feat: 为爬虫测试添加代理支持并通过环境变量配置添加dotenv依赖，创建jest配置文件和setup文件修改所有爬虫测试文件以支持通过环境变量配置代理将jest配置从package.json移动到独立文件 2026-01-12 15:19:54 +08:00			`// 获取代理配置`
			`const getProxyArgs = (): string[] => {`
			`const proxyHost = process.env.PROXY_HOST;`
			`const proxyPort = process.env.PROXY_PORT;`
			`const proxyUsername = process.env.PROXY_USERNAME;`
			`const proxyPassword = process.env.PROXY_PASSWORD;`

			`if (proxyHost && proxyPort) {`
			const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
			`if (proxyUsername && proxyPassword) {`
			args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
			`}`
			`return args;`
			`}`
			`return [];`
			`};`

第一次提交 2026-01-09 23:18:52 +08:00			`describe('ChdtpCrawler Real Site Test', () => {`
			`let browser: puppeteer.Browser;`

			`beforeAll(async () => {`
feat: 为爬虫测试添加代理支持并通过环境变量配置添加dotenv依赖，创建jest配置文件和setup文件修改所有爬虫测试文件以支持通过环境变量配置代理将jest配置从package.json移动到独立文件 2026-01-12 15:19:54 +08:00			`const proxyArgs = getProxyArgs();`
			`if (proxyArgs.length > 0) {`
			`console.log('Using proxy:', proxyArgs.join(' '));`
			`}`

第一次提交 2026-01-09 23:18:52 +08:00			`browser = await puppeteer.launch({`
			`headless: true, // Change to false to see the browser UI`
feat: 为爬虫测试添加代理支持并通过环境变量配置添加dotenv依赖，创建jest配置文件和setup文件修改所有爬虫测试文件以支持通过环境变量配置代理将jest配置从package.json移动到独立文件 2026-01-12 15:19:54 +08:00			`args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],`
第一次提交 2026-01-09 23:18:52 +08:00			`});`
			`});`

			`afterAll(async () => {`
			`if (browser) {`
			`await browser.close();`
			`}`
			`});`

			`it('should visit the website and list all found bid information', async () => {`
			console.log(`\nStarting crawl for: ${ChdtpCrawler.name}`);
			console.log(`Target URL: ${ChdtpCrawler.url}`);

			`const results = await ChdtpCrawler.crawl(browser);`

			console.log(`\nSuccessfully found ${results.length} items:\n`);
			`console.log('----------------------------------------');`
			`results.forEach((item, index) => {`
			console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
			console.log(` Link: ${item.url}`);
			`console.log('----------------------------------------');`
			`});`

			`// Basic assertions to ensure the crawler is working`
			`expect(results).toBeDefined();`
			`expect(Array.isArray(results)).toBeTruthy();`
			`// Warn but don't fail if site returns 0 items (could be empty or changed structure)`
			`if (results.length === 0) {`
			`console.warn('Warning: No items found. Check if the website structure has changed or if the list is currently empty.');`
			`} else {`
			`// Check data integrity of the first item`
			`const firstItem = results[0];`
			`expect(firstItem.title).toBeTruthy();`
			`expect(firstItem.url).toMatch(/^https?:\/\//);`
			`expect(firstItem.publishDate).toBeInstanceOf(Date);`
			`}`
			`});`
			`});`