refactor: 重构前端代码,拆分组件并优化README文档

This commit is contained in:
dmy
2026-01-12 14:37:18 +08:00
parent 8b2f328981
commit 4f37b0fb61
9 changed files with 660 additions and 477 deletions

View File

@@ -11,11 +11,11 @@ async function simulateHumanMouseMovement(page: puppeteer.Page) {
for (let i = 0; i < movements; i++) {
const x = Math.floor(Math.random() * viewport.width);
const y = Math.floor(Math.random() * viewport.height);
await page.mouse.move(x, y, {
steps: 10 + Math.floor(Math.random() * 20) // 10-30步使移动更平滑
});
// 随机停顿 100-500ms
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
}
@@ -27,7 +27,7 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
for (let i = 0; i < scrollCount; i++) {
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
await page.evaluate((distance) => {
window.scrollBy({
top: distance,
@@ -80,7 +80,7 @@ export const CdtCrawler = {
// 模拟人类行为
logger.log('Simulating human mouse movements...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling...');
await simulateHumanScrolling(page);
@@ -103,7 +103,7 @@ export const CdtCrawler = {
// 模拟人类行为
logger.log('Simulating human mouse movements...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling...');
await simulateHumanScrolling(page);
@@ -113,7 +113,7 @@ export const CdtCrawler = {
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
return titles.some(title => title.textContent && title.textContent.includes('招标公告'));
}, { timeout: 30000 });
await page.evaluate(() => {
const titles = Array.from(document.querySelectorAll('span.h-notice-title'));
const targetTitle = titles.find(title => title.textContent && title.textContent.includes('招标公告'));
@@ -133,13 +133,22 @@ export const CdtCrawler = {
// 模拟人类行为
logger.log('Simulating human mouse movements...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling...');
await simulateHumanScrolling(page);
// 等待表格加载完成
logger.log('Waiting for table to load...');
await page.waitForSelector('table.layui-table', { timeout: 30000 });
while (currentPage <= maxPages) {
// 等待表格数据加载
await page.waitForSelector('tbody tr', { timeout: 10000 });
// 获取当前页面的 HTML 内容
const content = await page.content();
const pageResults = this.extract(content);
if (pageResults.length === 0) {
logger.warn(`No results found on page ${currentPage}, stopping.`);
break;
@@ -151,43 +160,69 @@ export const CdtCrawler = {
// 模拟人类行为 - 翻页前
logger.log('Simulating human mouse movements before pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling before pagination...');
await simulateHumanScrolling(page);
// Find the "Next Page" button - layui pagination
// 查找下一页按钮
const nextButtonSelector = 'a.layui-laypage-next:not(.layui-disabled)';
const nextButton = await page.$(nextButtonSelector);
const nextButtonExists = await page.evaluate((selector) => {
const btn = document.querySelector(selector);
return btn !== null && !btn.classList.contains('layui-disabled');
}, nextButtonSelector);
if (!nextButton) {
logger.log('Next page button not found. Reached end of list.');
if (!nextButtonExists) {
logger.log('Next page button not found or disabled. Reached end of list.');
break;
}
logger.log(`Navigating to page ${currentPage + 1}...`);
try {
await Promise.all([
page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }),
nextButton.click(),
]);
// 点击下一页按钮
await page.evaluate((selector) => {
const btn = document.querySelector(selector) as HTMLElement;
if (btn) btn.click();
}, nextButtonSelector);
// 等待 AJAX 请求完成(通过监听网络请求)
await page.waitForFunction(() => {
// 检查表格是否正在加载
const loading = document.querySelector('.layui-table-loading');
return !loading;
}, { timeout: 30000 }).catch(() => {});
// 额外等待确保数据加载完成
await new Promise(r => setTimeout(r, 2000));
// 检查是否真的翻页了(通过检查当前页码)
const currentActivePage = await page.evaluate(() => {
const activeSpan = document.querySelector('.layui-laypage-curr em:last-child');
return activeSpan ? parseInt(activeSpan.textContent || '1') : 1;
});
if (currentActivePage <= currentPage) {
logger.log('Page did not change, stopping.');
break;
}
currentPage++;
// 模拟人类行为 - 翻页后
logger.log('Simulating human mouse movements after pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling after pagination...');
await simulateHumanScrolling(page);
// Random delay between pages
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
await new Promise(resolve => setTimeout(resolve, delay));
} catch (navError) {
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
break;
}
currentPage++;
// 模拟人类行为 - 翻页后
logger.log('Simulating human mouse movements after pagination...');
await simulateHumanMouseMovement(page);
logger.log('Simulating human scrolling after pagination...');
await simulateHumanScrolling(page);
// Random delay between pages
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
await new Promise(resolve => setTimeout(resolve, delay));
}
return allResults;

View File

@@ -53,7 +53,7 @@ export interface EspicResult {
}
export const EspicCrawler = {
name: '电能e招采平台',
name: '电能e招采平台(国电投)',
baseUrl: 'https://ebid.espic.com.cn',
// 生成动态 URL使用当前日期