import { CeicCrawler } from './ceic_target'; import * as puppeteer from 'puppeteer'; // Increase timeout to 120 seconds for manual inspection and slow sites jest.setTimeout(120000); // 获取代理配置 const getProxyArgs = (): string[] => { const proxyHost = process.env.PROXY_HOST; const proxyPort = process.env.PROXY_PORT; const proxyUsername = process.env.PROXY_USERNAME; const proxyPassword = process.env.PROXY_PASSWORD; if (proxyHost && proxyPort) { const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; if (proxyUsername && proxyPassword) { args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); } return args; } return []; }; describe('CeicCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { const proxyArgs = getProxyArgs(); if (proxyArgs.length > 0) { console.log('Using proxy:', proxyArgs.join(' ')); } browser = await puppeteer.launch({ headless: false, // Run in non-headless mode args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled', '--window-size=1920,1080', '--disable-infobars', ...proxyArgs, ], defaultViewport: null, }); }); afterAll(async () => { if (browser) { // Keep open for a few seconds after test to see result await new Promise((r) => setTimeout(r, 50000)); await browser.close(); } }); it('should visit website and list all found bid information', async () => { console.log(` Starting crawl for: ${CeicCrawler.name}`); console.log(`Target URL: ${CeicCrawler.url}`); const results = await CeicCrawler.crawl(browser); console.log(` Successfully found ${results.length} items: `); console.log('----------------------------------------'); results.forEach((item, index) => { console.log( `${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`, ); console.log(` Link: ${item.url}`); console.log('----------------------------------------'); }); expect(results).toBeDefined(); expect(Array.isArray(results)).toBeTruthy(); if (results.length === 0) { console.warn( 'Warning: No items found. Observe browser window to see if content is loading or if there is a verification challenge.', ); } else { const firstItem = results[0]; expect(firstItem.title).toBeTruthy(); expect(firstItem.url).toMatch(/^https?:\/\//); expect(firstItem.publishDate).toBeInstanceOf(Date); } }); });