Files
bidding_watcher/src/crawler/services/chng_target.spec.ts
dmy b1435523e8 feat: 为爬虫测试添加代理支持并通过环境变量配置
添加dotenv依赖,创建jest配置文件和setup文件
修改所有爬虫测试文件以支持通过环境变量配置代理
将jest配置从package.json移动到独立文件
2026-01-12 15:19:54 +08:00

153 lines
4.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { ChngCrawler } from './chng_target';
import * as puppeteer from 'puppeteer';
// Increase timeout to 120 seconds for manual inspection and slow sites
jest.setTimeout(120000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
// 模拟人类鼠标移动
async function simulateHumanMouseMovement(page: puppeteer.Page) {
const viewport = page.viewport();
if (!viewport) return;
const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动
for (let i = 0; i < movements; i++) {
const x = Math.floor(Math.random() * viewport.width);
const y = Math.floor(Math.random() * viewport.height);
await page.mouse.move(x, y, {
steps: 10 + Math.floor(Math.random() * 20) // 10-30步使移动更平滑
});
// 随机停顿 100-500ms
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
}
}
// 模拟人类滚动
async function simulateHumanScrolling(page: puppeteer.Page) {
const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动
for (let i = 0; i < scrollCount; i++) {
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
await page.evaluate((distance) => {
window.scrollBy({
top: distance,
behavior: 'smooth'
});
}, scrollDistance);
// 随机停顿 500-1500ms
await new Promise(r => setTimeout(r, 500 + Math.random() * 1000));
}
// 滚动回顶部
await page.evaluate(() => {
window.scrollTo({ top: 0, behavior: 'smooth' });
});
await new Promise(r => setTimeout(r, 1000));
}
describe('ChngCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Run in non-headless mode
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled',
'--window-size=1920,1080',
"--disable-infobars",
...proxyArgs,
// "--headless=new",
// '--disable-dev-shm-usage',
// '--disable-accelerated-2d-canvas',
// '--no-first-run',
// '--no-zygote',
// '--disable-gpu',
// '--disable-features=VizDisplayCompositor',
// '--disable-webgl',
// '--disable-javascript',
],
defaultViewport: null
});
});
afterAll(async () => {
if (browser) {
// Keep open for a few seconds after test to see result
await new Promise(r => setTimeout(r, 50000));
await browser.close();
}
});
it('should visit the website and list all found bid information', async () => {
console.log(`
Starting crawl for: ${ChngCrawler.name}`);
console.log(`Target URL: ${ChngCrawler.url}`);
// 创建一个临时页面用于模拟人类行为
const tempPage = await browser.newPage();
await tempPage.setViewport({ width: 1920, height: 1080, deviceScaleFactor: 1 });
// 模拟人类鼠标移动
console.log('Simulating human mouse movements...');
await simulateHumanMouseMovement(tempPage);
// 模拟人类滚动
console.log('Simulating human scrolling...');
await simulateHumanScrolling(tempPage);
await tempPage.close();
const results = await ChngCrawler.crawl(browser);
console.log(`
Successfully found ${results.length} items:
`);
console.log('----------------------------------------');
results.forEach((item, index) => {
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
console.log(` Link: ${item.url}`);
console.log('----------------------------------------');
});
expect(results).toBeDefined();
expect(Array.isArray(results)).toBeTruthy();
if (results.length === 0) {
console.warn('Warning: No items found. Observe the browser window to see if content is loading or if there is a verification challenge.');
} else {
const firstItem = results[0];
expect(firstItem.title).toBeTruthy();
expect(firstItem.url).toMatch(/^https?:\/\//);
expect(firstItem.publishDate).toBeInstanceOf(Date);
}
});
});