153 lines
4.6 KiB
TypeScript
153 lines
4.6 KiB
TypeScript
import { ChngCrawler } from './chng_target';
|
||
import * as puppeteer from 'puppeteer';
|
||
|
||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||
jest.setTimeout(120000);
|
||
|
||
// 获取代理配置
|
||
const getProxyArgs = (): string[] => {
|
||
const proxyHost = process.env.PROXY_HOST;
|
||
const proxyPort = process.env.PROXY_PORT;
|
||
const proxyUsername = process.env.PROXY_USERNAME;
|
||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||
|
||
if (proxyHost && proxyPort) {
|
||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||
if (proxyUsername && proxyPassword) {
|
||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||
}
|
||
return args;
|
||
}
|
||
return [];
|
||
};
|
||
|
||
// 模拟人类鼠标移动
|
||
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||
const viewport = page.viewport();
|
||
if (!viewport) return;
|
||
|
||
const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动
|
||
|
||
for (let i = 0; i < movements; i++) {
|
||
const x = Math.floor(Math.random() * viewport.width);
|
||
const y = Math.floor(Math.random() * viewport.height);
|
||
|
||
await page.mouse.move(x, y, {
|
||
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
||
});
|
||
|
||
// 随机停顿 100-500ms
|
||
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
||
}
|
||
}
|
||
|
||
// 模拟人类滚动
|
||
async function simulateHumanScrolling(page: puppeteer.Page) {
|
||
const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动
|
||
|
||
for (let i = 0; i < scrollCount; i++) {
|
||
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
||
|
||
await page.evaluate((distance) => {
|
||
window.scrollBy({
|
||
top: distance,
|
||
behavior: 'smooth'
|
||
});
|
||
}, scrollDistance);
|
||
|
||
// 随机停顿 500-1500ms
|
||
await new Promise(r => setTimeout(r, 500 + Math.random() * 1000));
|
||
}
|
||
|
||
// 滚动回顶部
|
||
await page.evaluate(() => {
|
||
window.scrollTo({ top: 0, behavior: 'smooth' });
|
||
});
|
||
await new Promise(r => setTimeout(r, 1000));
|
||
}
|
||
|
||
describe('ChngCrawler Real Site Test', () => {
|
||
let browser: puppeteer.Browser;
|
||
|
||
beforeAll(async () => {
|
||
const proxyArgs = getProxyArgs();
|
||
if (proxyArgs.length > 0) {
|
||
console.log('Using proxy:', proxyArgs.join(' '));
|
||
}
|
||
|
||
browser = await puppeteer.launch({
|
||
headless: false, // Run in non-headless mode
|
||
args: [
|
||
'--no-sandbox',
|
||
'--disable-setuid-sandbox',
|
||
'--disable-blink-features=AutomationControlled',
|
||
'--window-size=1920,1080',
|
||
"--disable-infobars",
|
||
...proxyArgs,
|
||
// "--headless=new",
|
||
// '--disable-dev-shm-usage',
|
||
// '--disable-accelerated-2d-canvas',
|
||
// '--no-first-run',
|
||
// '--no-zygote',
|
||
// '--disable-gpu',
|
||
// '--disable-features=VizDisplayCompositor',
|
||
// '--disable-webgl',
|
||
// '--disable-javascript',
|
||
],
|
||
defaultViewport: null
|
||
|
||
});
|
||
});
|
||
|
||
afterAll(async () => {
|
||
if (browser) {
|
||
// Keep open for a few seconds after test to see result
|
||
await new Promise(r => setTimeout(r, 50000));
|
||
await browser.close();
|
||
}
|
||
});
|
||
|
||
it('should visit the website and list all found bid information', async () => {
|
||
console.log(`
|
||
Starting crawl for: ${ChngCrawler.name}`);
|
||
console.log(`Target URL: ${ChngCrawler.url}`);
|
||
|
||
// 创建一个临时页面用于模拟人类行为
|
||
const tempPage = await browser.newPage();
|
||
await tempPage.setViewport({ width: 1920, height: 1080, deviceScaleFactor: 1 });
|
||
|
||
// 模拟人类鼠标移动
|
||
console.log('Simulating human mouse movements...');
|
||
await simulateHumanMouseMovement(tempPage);
|
||
|
||
// 模拟人类滚动
|
||
console.log('Simulating human scrolling...');
|
||
await simulateHumanScrolling(tempPage);
|
||
|
||
await tempPage.close();
|
||
|
||
const results = await ChngCrawler.crawl(browser);
|
||
|
||
console.log(`
|
||
Successfully found ${results.length} items:
|
||
`);
|
||
console.log('----------------------------------------');
|
||
results.forEach((item, index) => {
|
||
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
|
||
console.log(` Link: ${item.url}`);
|
||
console.log('----------------------------------------');
|
||
});
|
||
|
||
expect(results).toBeDefined();
|
||
expect(Array.isArray(results)).toBeTruthy();
|
||
|
||
if (results.length === 0) {
|
||
console.warn('Warning: No items found. Observe the browser window to see if content is loading or if there is a verification challenge.');
|
||
} else {
|
||
const firstItem = results[0];
|
||
expect(firstItem.title).toBeTruthy();
|
||
expect(firstItem.url).toMatch(/^https?:\/\//);
|
||
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
||
}
|
||
});
|
||
}); |