feat: 为爬虫测试添加代理支持并通过环境变量配置

添加dotenv依赖,创建jest配置文件和setup文件
修改所有爬虫测试文件以支持通过环境变量配置代理
将jest配置从package.json移动到独立文件
This commit is contained in:
dmy
2026-01-12 15:19:54 +08:00
parent f1ec37143c
commit b1435523e8
17 changed files with 325 additions and 27 deletions

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CdtCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 120 seconds for manual inspection and slow sites
jest.setTimeout(120000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CeicCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Run in non-headless mode
args: [
@@ -16,6 +38,7 @@ describe('CeicCrawler Real Site Test', () => {
'--disable-blink-features=AutomationControlled',
'--window-size=1920,1080',
'--disable-infobars',
...proxyArgs,
],
defaultViewport: null
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CgnpcCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('ChdtpCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: true, // Change to false to see the browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,6 +4,23 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 120 seconds for manual inspection and slow sites
jest.setTimeout(120000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
// 模拟人类鼠标移动
async function simulateHumanMouseMovement(page: puppeteer.Page) {
const viewport = page.viewport();
@@ -53,6 +70,11 @@ describe('ChngCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Run in non-headless mode
args: [
@@ -61,6 +83,7 @@ describe('ChngCrawler Real Site Test', () => {
'--disable-blink-features=AutomationControlled',
'--window-size=1920,1080',
"--disable-infobars",
...proxyArgs,
// "--headless=new",
// '--disable-dev-shm-usage',
// '--disable-accelerated-2d-canvas',
@@ -69,7 +92,7 @@ describe('ChngCrawler Real Site Test', () => {
// '--disable-gpu',
// '--disable-features=VizDisplayCompositor',
// '--disable-webgl',
// '--disable-javascript',
// '--disable-javascript',
],
defaultViewport: null

View File

@@ -9,6 +9,23 @@ puppeteer.use(StealthPlugin());
// Increase timeout to 180 seconds for slow sites and stealth mode
jest.setTimeout(180000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
// 模拟人类鼠标移动
async function simulateHumanMouseMovement(page: Page) {
const viewport = page.viewport();
@@ -58,6 +75,11 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
let browser: Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: true, // 使用 headless 模式
args: [
@@ -73,6 +95,7 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
'--disable-gpu',
'--disable-features=VizDisplayCompositor',
'--disable-webgl',
...proxyArgs,
],
defaultViewport: null
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CnncecpCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CnoocCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('EpsCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('EspicCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('PowerbeijingCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('SdiccCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'],
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
});
});

View File

@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 120 seconds for manual inspection and slow sites
jest.setTimeout(120000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('SzecpCrawler Real Site Test', () => {
let browser: puppeteer.Browser;
beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({
headless: false, // Run in non-headless mode
args: [
@@ -16,6 +38,7 @@ describe('SzecpCrawler Real Site Test', () => {
'--disable-blink-features=AutomationControlled',
'--window-size=1920,1080',
'--disable-infobars',
...proxyArgs,
],
defaultViewport: null
});