第一次提交
This commit is contained in:
51
src/crawler/services/chdtp_target.spec.ts
Normal file
51
src/crawler/services/chdtp_target.spec.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import { ChdtpCrawler } from './chdtp_target';
|
||||
import * as puppeteer from 'puppeteer';
|
||||
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000);
|
||||
|
||||
describe('ChdtpCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
browser = await puppeteer.launch({
|
||||
headless: true, // Change to false to see the browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('should visit the website and list all found bid information', async () => {
|
||||
console.log(`\nStarting crawl for: ${ChdtpCrawler.name}`);
|
||||
console.log(`Target URL: ${ChdtpCrawler.url}`);
|
||||
|
||||
const results = await ChdtpCrawler.crawl(browser);
|
||||
|
||||
console.log(`\nSuccessfully found ${results.length} items:\n`);
|
||||
console.log('----------------------------------------');
|
||||
results.forEach((item, index) => {
|
||||
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
|
||||
console.log(` Link: ${item.url}`);
|
||||
console.log('----------------------------------------');
|
||||
});
|
||||
|
||||
// Basic assertions to ensure the crawler is working
|
||||
expect(results).toBeDefined();
|
||||
expect(Array.isArray(results)).toBeTruthy();
|
||||
// Warn but don't fail if site returns 0 items (could be empty or changed structure)
|
||||
if (results.length === 0) {
|
||||
console.warn('Warning: No items found. Check if the website structure has changed or if the list is currently empty.');
|
||||
} else {
|
||||
// Check data integrity of the first item
|
||||
const firstItem = results[0];
|
||||
expect(firstItem.title).toBeTruthy();
|
||||
expect(firstItem.url).toMatch(/^https?:\/\//);
|
||||
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
||||
}
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user