refactor: 重构前端代码,拆分组件并优化README文档
This commit is contained in:
@@ -11,11 +11,11 @@ async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||
for (let i = 0; i < movements; i++) {
|
||||
const x = Math.floor(Math.random() * viewport.width);
|
||||
const y = Math.floor(Math.random() * viewport.height);
|
||||
|
||||
|
||||
await page.mouse.move(x, y, {
|
||||
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
||||
});
|
||||
|
||||
|
||||
// 随机停顿 100-500ms
|
||||
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
||||
}
|
||||
@@ -27,7 +27,7 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
|
||||
for (let i = 0; i < scrollCount; i++) {
|
||||
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
||||
|
||||
|
||||
await page.evaluate((distance) => {
|
||||
window.scrollBy({
|
||||
top: distance,
|
||||
@@ -80,7 +80,7 @@ export const CdtCrawler = {
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
|
||||
logger.log('Simulating human scrolling...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
@@ -103,7 +103,7 @@ export const CdtCrawler = {
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
|
||||
logger.log('Simulating human scrolling...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
@@ -113,7 +113,7 @@ export const CdtCrawler = {
|
||||
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
|
||||
return titles.some(title => title.textContent && title.textContent.includes('招标公告'));
|
||||
}, { timeout: 30000 });
|
||||
|
||||
|
||||
await page.evaluate(() => {
|
||||
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
|
||||
const targetTitle = titles.find(title => title.textContent && title.textContent.includes('招标公告'));
|
||||
@@ -133,13 +133,22 @@ export const CdtCrawler = {
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
|
||||
logger.log('Simulating human scrolling...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// 等待表格加载完成
|
||||
logger.log('Waiting for table to load...');
|
||||
await page.waitForSelector('table.layui-table', { timeout: 30000 });
|
||||
|
||||
while (currentPage <= maxPages) {
|
||||
// 等待表格数据加载
|
||||
await page.waitForSelector('tbody tr', { timeout: 10000 });
|
||||
|
||||
// 获取当前页面的 HTML 内容
|
||||
const content = await page.content();
|
||||
const pageResults = this.extract(content);
|
||||
|
||||
if (pageResults.length === 0) {
|
||||
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
||||
break;
|
||||
@@ -151,43 +160,69 @@ export const CdtCrawler = {
|
||||
// 模拟人类行为 - 翻页前
|
||||
logger.log('Simulating human mouse movements before pagination...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
|
||||
logger.log('Simulating human scrolling before pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// Find the "Next Page" button - layui pagination
|
||||
// 查找下一页按钮
|
||||
const nextButtonSelector = 'a.layui-laypage-next:not(.layui-disabled)';
|
||||
const nextButton = await page.$(nextButtonSelector);
|
||||
const nextButtonExists = await page.evaluate((selector) => {
|
||||
const btn = document.querySelector(selector);
|
||||
return btn !== null && !btn.classList.contains('layui-disabled');
|
||||
}, nextButtonSelector);
|
||||
|
||||
if (!nextButton) {
|
||||
logger.log('Next page button not found. Reached end of list.');
|
||||
if (!nextButtonExists) {
|
||||
logger.log('Next page button not found or disabled. Reached end of list.');
|
||||
break;
|
||||
}
|
||||
|
||||
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||
|
||||
try {
|
||||
await Promise.all([
|
||||
page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }),
|
||||
nextButton.click(),
|
||||
]);
|
||||
// 点击下一页按钮
|
||||
await page.evaluate((selector) => {
|
||||
const btn = document.querySelector(selector) as HTMLElement;
|
||||
if (btn) btn.click();
|
||||
}, nextButtonSelector);
|
||||
|
||||
// 等待 AJAX 请求完成(通过监听网络请求)
|
||||
await page.waitForFunction(() => {
|
||||
// 检查表格是否正在加载
|
||||
const loading = document.querySelector('.layui-table-loading');
|
||||
return !loading;
|
||||
}, { timeout: 30000 }).catch(() => {});
|
||||
|
||||
// 额外等待确保数据加载完成
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// 检查是否真的翻页了(通过检查当前页码)
|
||||
const currentActivePage = await page.evaluate(() => {
|
||||
const activeSpan = document.querySelector('.layui-laypage-curr em:last-child');
|
||||
return activeSpan ? parseInt(activeSpan.textContent || '1') : 1;
|
||||
});
|
||||
|
||||
if (currentActivePage <= currentPage) {
|
||||
logger.log('Page did not change, stopping.');
|
||||
break;
|
||||
}
|
||||
|
||||
currentPage++;
|
||||
|
||||
// 模拟人类行为 - 翻页后
|
||||
logger.log('Simulating human mouse movements after pagination...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling after pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// Random delay between pages
|
||||
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
|
||||
} catch (navError) {
|
||||
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
currentPage++;
|
||||
|
||||
// 模拟人类行为 - 翻页后
|
||||
logger.log('Simulating human mouse movements after pagination...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling after pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// Random delay between pages
|
||||
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
}
|
||||
|
||||
return allResults;
|
||||
|
||||
@@ -53,7 +53,7 @@ export interface EspicResult {
|
||||
}
|
||||
|
||||
export const EspicCrawler = {
|
||||
name: '电能e招采平台',
|
||||
name: '电能e招采平台(国电投)',
|
||||
baseUrl: 'https://ebid.espic.com.cn',
|
||||
|
||||
// 生成动态 URL,使用当前日期
|
||||
|
||||
@@ -14,8 +14,8 @@ export class BidCrawlTask {
|
||||
|
||||
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
||||
async handleCron() {
|
||||
this.logger.debug('Scheduled crawl task started');
|
||||
await this.crawlerService.crawlAll();
|
||||
// this.logger.debug('Scheduled crawl task started');
|
||||
// await this.crawlerService.crawlAll();
|
||||
}
|
||||
|
||||
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
||||
|
||||
Reference in New Issue
Block a user