在.gitignore中添加对*.png、*.log、*-lock.json、*.woff2文件的忽略规则,并新增OFL.txt文件。同时,添加vue.svg图标文件以支持前端展示。更新多个TypeScript文件以优化代码格式和增强可读性。
89 lines
2.6 KiB
TypeScript
89 lines
2.6 KiB
TypeScript
import { CeicCrawler } from './ceic_target';
|
|
import * as puppeteer from 'puppeteer';
|
|
|
|
// Increase timeout to 120 seconds for manual inspection and slow sites
|
|
jest.setTimeout(120000);
|
|
|
|
// 获取代理配置
|
|
const getProxyArgs = (): string[] => {
|
|
const proxyHost = process.env.PROXY_HOST;
|
|
const proxyPort = process.env.PROXY_PORT;
|
|
const proxyUsername = process.env.PROXY_USERNAME;
|
|
const proxyPassword = process.env.PROXY_PASSWORD;
|
|
|
|
if (proxyHost && proxyPort) {
|
|
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
|
if (proxyUsername && proxyPassword) {
|
|
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
|
}
|
|
return args;
|
|
}
|
|
return [];
|
|
};
|
|
|
|
describe('CeicCrawler Real Site Test', () => {
|
|
let browser: puppeteer.Browser;
|
|
|
|
beforeAll(async () => {
|
|
const proxyArgs = getProxyArgs();
|
|
if (proxyArgs.length > 0) {
|
|
console.log('Using proxy:', proxyArgs.join(' '));
|
|
}
|
|
|
|
browser = await puppeteer.launch({
|
|
headless: false, // Run in non-headless mode
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-blink-features=AutomationControlled',
|
|
'--window-size=1920,1080',
|
|
'--disable-infobars',
|
|
...proxyArgs,
|
|
],
|
|
defaultViewport: null,
|
|
});
|
|
});
|
|
|
|
afterAll(async () => {
|
|
if (browser) {
|
|
// Keep open for a few seconds after test to see result
|
|
await new Promise((r) => setTimeout(r, 50000));
|
|
await browser.close();
|
|
}
|
|
});
|
|
|
|
it('should visit website and list all found bid information', async () => {
|
|
console.log(`
|
|
Starting crawl for: ${CeicCrawler.name}`);
|
|
console.log(`Target URL: ${CeicCrawler.url}`);
|
|
|
|
const results = await CeicCrawler.crawl(browser);
|
|
|
|
console.log(`
|
|
Successfully found ${results.length} items:
|
|
`);
|
|
console.log('----------------------------------------');
|
|
results.forEach((item, index) => {
|
|
console.log(
|
|
`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`,
|
|
);
|
|
console.log(` Link: ${item.url}`);
|
|
console.log('----------------------------------------');
|
|
});
|
|
|
|
expect(results).toBeDefined();
|
|
expect(Array.isArray(results)).toBeTruthy();
|
|
|
|
if (results.length === 0) {
|
|
console.warn(
|
|
'Warning: No items found. Observe browser window to see if content is loading or if there is a verification challenge.',
|
|
);
|
|
} else {
|
|
const firstItem = results[0];
|
|
expect(firstItem.title).toBeTruthy();
|
|
expect(firstItem.url).toMatch(/^https?:\/\//);
|
|
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
|
}
|
|
});
|
|
});
|