2026-01-11 18:20:43 +08:00
|
|
|
|
import { ChngCrawler } from './chng_target';
|
|
|
|
|
|
import * as puppeteer from 'puppeteer';
|
|
|
|
|
|
|
|
|
|
|
|
// Increase timeout to 120 seconds for manual inspection and slow sites
|
|
|
|
|
|
jest.setTimeout(120000);
|
|
|
|
|
|
|
2026-01-11 22:34:38 +08:00
|
|
|
|
// 模拟人类鼠标移动
|
|
|
|
|
|
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
|
|
|
|
|
const viewport = page.viewport();
|
|
|
|
|
|
if (!viewport) return;
|
|
|
|
|
|
|
|
|
|
|
|
const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动
|
|
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < movements; i++) {
|
|
|
|
|
|
const x = Math.floor(Math.random() * viewport.width);
|
|
|
|
|
|
const y = Math.floor(Math.random() * viewport.height);
|
|
|
|
|
|
|
|
|
|
|
|
await page.mouse.move(x, y, {
|
|
|
|
|
|
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// 随机停顿 100-500ms
|
|
|
|
|
|
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 模拟人类滚动
|
|
|
|
|
|
async function simulateHumanScrolling(page: puppeteer.Page) {
|
|
|
|
|
|
const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动
|
|
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < scrollCount; i++) {
|
|
|
|
|
|
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
|
|
|
|
|
|
|
|
|
|
|
await page.evaluate((distance) => {
|
|
|
|
|
|
window.scrollBy({
|
|
|
|
|
|
top: distance,
|
|
|
|
|
|
behavior: 'smooth'
|
|
|
|
|
|
});
|
|
|
|
|
|
}, scrollDistance);
|
|
|
|
|
|
|
|
|
|
|
|
// 随机停顿 500-1500ms
|
|
|
|
|
|
await new Promise(r => setTimeout(r, 500 + Math.random() * 1000));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 滚动回顶部
|
|
|
|
|
|
await page.evaluate(() => {
|
|
|
|
|
|
window.scrollTo({ top: 0, behavior: 'smooth' });
|
|
|
|
|
|
});
|
|
|
|
|
|
await new Promise(r => setTimeout(r, 1000));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-11 18:20:43 +08:00
|
|
|
|
describe('ChngCrawler Real Site Test', () => {
|
|
|
|
|
|
let browser: puppeteer.Browser;
|
|
|
|
|
|
|
|
|
|
|
|
beforeAll(async () => {
|
|
|
|
|
|
browser = await puppeteer.launch({
|
|
|
|
|
|
headless: false, // Run in non-headless mode
|
|
|
|
|
|
args: [
|
|
|
|
|
|
'--no-sandbox',
|
|
|
|
|
|
'--disable-setuid-sandbox',
|
|
|
|
|
|
'--disable-blink-features=AutomationControlled',
|
2026-01-11 22:34:38 +08:00
|
|
|
|
'--window-size=1920,1080',
|
|
|
|
|
|
"--disable-infobars",
|
|
|
|
|
|
// "--headless=new",
|
|
|
|
|
|
// '--disable-dev-shm-usage',
|
|
|
|
|
|
// '--disable-accelerated-2d-canvas',
|
|
|
|
|
|
// '--no-first-run',
|
|
|
|
|
|
// '--no-zygote',
|
|
|
|
|
|
// '--disable-gpu',
|
|
|
|
|
|
// '--disable-features=VizDisplayCompositor',
|
|
|
|
|
|
// '--disable-webgl',
|
|
|
|
|
|
// '--disable-javascript',
|
2026-01-11 18:20:43 +08:00
|
|
|
|
],
|
|
|
|
|
|
defaultViewport: null
|
2026-01-11 22:34:38 +08:00
|
|
|
|
|
2026-01-11 18:20:43 +08:00
|
|
|
|
});
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
afterAll(async () => {
|
|
|
|
|
|
if (browser) {
|
|
|
|
|
|
// Keep open for a few seconds after test to see result
|
|
|
|
|
|
await new Promise(r => setTimeout(r, 50000));
|
2026-01-11 22:34:38 +08:00
|
|
|
|
await browser.close();
|
2026-01-11 18:20:43 +08:00
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
it('should visit the website and list all found bid information', async () => {
|
|
|
|
|
|
console.log(`
|
|
|
|
|
|
Starting crawl for: ${ChngCrawler.name}`);
|
|
|
|
|
|
console.log(`Target URL: ${ChngCrawler.url}`);
|
|
|
|
|
|
|
2026-01-11 22:34:38 +08:00
|
|
|
|
// 创建一个临时页面用于模拟人类行为
|
|
|
|
|
|
const tempPage = await browser.newPage();
|
|
|
|
|
|
await tempPage.setViewport({ width: 1920, height: 1080, deviceScaleFactor: 1 });
|
|
|
|
|
|
|
|
|
|
|
|
// 模拟人类鼠标移动
|
|
|
|
|
|
console.log('Simulating human mouse movements...');
|
|
|
|
|
|
await simulateHumanMouseMovement(tempPage);
|
|
|
|
|
|
|
|
|
|
|
|
// 模拟人类滚动
|
|
|
|
|
|
console.log('Simulating human scrolling...');
|
|
|
|
|
|
await simulateHumanScrolling(tempPage);
|
|
|
|
|
|
|
|
|
|
|
|
await tempPage.close();
|
|
|
|
|
|
|
2026-01-11 18:20:43 +08:00
|
|
|
|
const results = await ChngCrawler.crawl(browser);
|
|
|
|
|
|
|
|
|
|
|
|
console.log(`
|
|
|
|
|
|
Successfully found ${results.length} items:
|
|
|
|
|
|
`);
|
|
|
|
|
|
console.log('----------------------------------------');
|
|
|
|
|
|
results.forEach((item, index) => {
|
|
|
|
|
|
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
|
|
|
|
|
|
console.log(` Link: ${item.url}`);
|
|
|
|
|
|
console.log('----------------------------------------');
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
expect(results).toBeDefined();
|
|
|
|
|
|
expect(Array.isArray(results)).toBeTruthy();
|
|
|
|
|
|
|
|
|
|
|
|
if (results.length === 0) {
|
|
|
|
|
|
console.warn('Warning: No items found. Observe the browser window to see if content is loading or if there is a verification challenge.');
|
|
|
|
|
|
} else {
|
|
|
|
|
|
const firstItem = results[0];
|
|
|
|
|
|
expect(firstItem.title).toBeTruthy();
|
|
|
|
|
|
expect(firstItem.url).toMatch(/^https?:\/\//);
|
|
|
|
|
|
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
});
|