feat: 添加代理隧道连接失败的重试机制
refactor(crawler): 在各爬虫服务中实现代理错误重试逻辑 feat(uni-app): 新增投标项目查看器的uni-app版本
This commit is contained in:
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface CdtResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -87,7 +137,14 @@ export const CdtCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -47,6 +47,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
interface CeicCrawlerType {
|
||||
name: string;
|
||||
url: string;
|
||||
@@ -90,7 +140,14 @@ export const CeicCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface CgnpcResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -96,7 +146,14 @@ export const CgnpcCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -14,6 +14,56 @@ interface ChdtpCrawlerType {
|
||||
extract(html: string): ChdtpResult[];
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export const ChdtpCrawler = {
|
||||
name: '华电集团电子商务平台 ',
|
||||
url: 'https://www.chdtp.com/webs/queryWebZbgg.action?zbggType=1',
|
||||
@@ -42,7 +92,14 @@ export const ChdtpCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
while (currentPage <= maxPages) {
|
||||
const content = await page.content();
|
||||
|
||||
@@ -71,6 +71,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
}
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
interface ChngCrawlerType {
|
||||
name: string;
|
||||
url: string;
|
||||
@@ -115,7 +165,14 @@ export const ChngCrawler = {
|
||||
|
||||
try {
|
||||
logger.log('Navigating to Bing...');
|
||||
await page.goto('https://cn.bing.com', { waitUntil: 'networkidle2' });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto('https://cn.bing.com', { waitUntil: 'networkidle2' });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
logger.log('Searching for target site...');
|
||||
const searchBoxSelector = 'input[name="q"]';
|
||||
|
||||
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface CnncecpResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -96,7 +146,14 @@ export const CnncecpCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface CnoocResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -96,7 +146,14 @@ export const CnoocCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface EpsResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -96,7 +146,14 @@ export const EpsCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface EspicResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -106,7 +156,14 @@ export const EspicCrawler = {
|
||||
try {
|
||||
const url = this.getUrl(currentPage);
|
||||
logger.log(`Navigating to ${url}...`);
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 等待 WAF 验证通过
|
||||
logger.log('Waiting for WAF verification...');
|
||||
|
||||
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface PowerbeijingResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -96,7 +146,14 @@ export const PowerbeijingCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -46,6 +46,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
export interface SdiccResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
@@ -96,7 +146,14 @@ export const SdiccCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
@@ -47,6 +47,56 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// 检查错误是否为代理隧道连接失败
|
||||
function isTunnelConnectionFailedError(error: unknown): boolean {
|
||||
if (error instanceof Error) {
|
||||
return (
|
||||
error.message.includes('net::ERR_TUNNEL_CONNECTION_FAILED') ||
|
||||
error.message.includes('ERR_TUNNEL_CONNECTION_FAILED')
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 延迟重试函数
|
||||
async function delayRetry(
|
||||
operation: () => Promise<void>,
|
||||
maxRetries: number = 3,
|
||||
delayMs: number = 5000,
|
||||
logger?: Logger,
|
||||
): Promise<void> {
|
||||
let lastError: Error | unknown;
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
await operation();
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
if (isTunnelConnectionFailedError(error)) {
|
||||
if (attempt < maxRetries) {
|
||||
const delay = delayMs * attempt; // 递增延迟
|
||||
logger?.warn(
|
||||
`代理隧道连接失败,第 ${attempt} 次尝试失败,${delay / 1000} 秒后重试...`,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, delay));
|
||||
} else {
|
||||
logger?.error(
|
||||
`代理隧道连接失败,已达到最大重试次数 ${maxRetries} 次`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
} else {
|
||||
// 非代理错误,直接抛出
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
interface SzecpCrawlerType {
|
||||
name: string;
|
||||
url: string;
|
||||
@@ -90,7 +140,14 @@ export const SzecpCrawler = {
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await delayRetry(
|
||||
async () => {
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
},
|
||||
3,
|
||||
5000,
|
||||
logger,
|
||||
);
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
|
||||
Reference in New Issue
Block a user