feat: 为爬虫测试添加代理支持并通过环境变量配置

添加dotenv依赖,创建jest配置文件和setup文件
修改所有爬虫测试文件以支持通过环境变量配置代理
将jest配置从package.json移动到独立文件
This commit is contained in:
dmy
2026-01-12 15:19:54 +08:00
parent f1ec37143c
commit b1435523e8
17 changed files with 325 additions and 27 deletions

13
jest.config.js Normal file
View File

@@ -0,0 +1,13 @@
module.exports = {
moduleFileExtensions: ['js', 'json', 'ts'],
rootDir: 'src',
testRegex: '.*\\.spec\\.ts$',
transform: {
'^.+\\.(t|j)s$': 'ts-jest',
},
collectCoverageFrom: ['**/*.(t|j)s'],
coverageDirectory: '../coverage',
testEnvironment: 'node',
// 加载环境变量
setupFiles: ['<rootDir>/../jest.setup.js'],
};

2
jest.setup.js Normal file
View File

@@ -0,0 +1,2 @@
// 加载环境变量
require('dotenv').config({ path: '.env' });

View File

@@ -32,6 +32,7 @@
"axios": "^1.13.2", "axios": "^1.13.2",
"class-transformer": "^0.5.1", "class-transformer": "^0.5.1",
"class-validator": "^0.14.3", "class-validator": "^0.14.3",
"dotenv": "^16.4.7",
"mysql2": "^3.16.0", "mysql2": "^3.16.0",
"puppeteer": "^24.34.0", "puppeteer": "^24.34.0",
"puppeteer-extra": "^3.3.6", "puppeteer-extra": "^3.3.6",
@@ -66,22 +67,5 @@
"tsconfig-paths": "^4.2.0", "tsconfig-paths": "^4.2.0",
"typescript": "^5.7.3", "typescript": "^5.7.3",
"typescript-eslint": "^8.20.0" "typescript-eslint": "^8.20.0"
},
"jest": {
"moduleFileExtensions": [
"js",
"json",
"ts"
],
"rootDir": "src",
"testRegex": ".*\\.spec\\.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
},
"collectCoverageFrom": [
"**/*.(t|j)s"
],
"coverageDirectory": "../coverage",
"testEnvironment": "node"
} }
} }

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CdtCrawler Real Site Test', () => { describe('CdtCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 120 seconds for manual inspection and slow sites // Increase timeout to 120 seconds for manual inspection and slow sites
jest.setTimeout(120000); jest.setTimeout(120000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CeicCrawler Real Site Test', () => { describe('CeicCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Run in non-headless mode headless: false, // Run in non-headless mode
args: [ args: [
@@ -16,6 +38,7 @@ describe('CeicCrawler Real Site Test', () => {
'--disable-blink-features=AutomationControlled', '--disable-blink-features=AutomationControlled',
'--window-size=1920,1080', '--window-size=1920,1080',
'--disable-infobars', '--disable-infobars',
...proxyArgs,
], ],
defaultViewport: null defaultViewport: null
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CgnpcCrawler Real Site Test', () => { describe('CgnpcCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000); jest.setTimeout(60000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('ChdtpCrawler Real Site Test', () => { describe('ChdtpCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: true, // Change to false to see the browser UI headless: true, // Change to false to see the browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,6 +4,23 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 120 seconds for manual inspection and slow sites // Increase timeout to 120 seconds for manual inspection and slow sites
jest.setTimeout(120000); jest.setTimeout(120000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
// 模拟人类鼠标移动 // 模拟人类鼠标移动
async function simulateHumanMouseMovement(page: puppeteer.Page) { async function simulateHumanMouseMovement(page: puppeteer.Page) {
const viewport = page.viewport(); const viewport = page.viewport();
@@ -53,6 +70,11 @@ describe('ChngCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Run in non-headless mode headless: false, // Run in non-headless mode
args: [ args: [
@@ -61,6 +83,7 @@ describe('ChngCrawler Real Site Test', () => {
'--disable-blink-features=AutomationControlled', '--disable-blink-features=AutomationControlled',
'--window-size=1920,1080', '--window-size=1920,1080',
"--disable-infobars", "--disable-infobars",
...proxyArgs,
// "--headless=new", // "--headless=new",
// '--disable-dev-shm-usage', // '--disable-dev-shm-usage',
// '--disable-accelerated-2d-canvas', // '--disable-accelerated-2d-canvas',

View File

@@ -9,6 +9,23 @@ puppeteer.use(StealthPlugin());
// Increase timeout to 180 seconds for slow sites and stealth mode // Increase timeout to 180 seconds for slow sites and stealth mode
jest.setTimeout(180000); jest.setTimeout(180000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
// 模拟人类鼠标移动 // 模拟人类鼠标移动
async function simulateHumanMouseMovement(page: Page) { async function simulateHumanMouseMovement(page: Page) {
const viewport = page.viewport(); const viewport = page.viewport();
@@ -58,6 +75,11 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
let browser: Browser; let browser: Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: true, // 使用 headless 模式 headless: true, // 使用 headless 模式
args: [ args: [
@@ -73,6 +95,7 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
'--disable-gpu', '--disable-gpu',
'--disable-features=VizDisplayCompositor', '--disable-features=VizDisplayCompositor',
'--disable-webgl', '--disable-webgl',
...proxyArgs,
], ],
defaultViewport: null defaultViewport: null
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CnncecpCrawler Real Site Test', () => { describe('CnncecpCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('CnoocCrawler Real Site Test', () => { describe('CnoocCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('EpsCrawler Real Site Test', () => { describe('EpsCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('EspicCrawler Real Site Test', () => { describe('EspicCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('PowerbeijingCrawler Real Site Test', () => { describe('PowerbeijingCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 60 seconds for network operations // Increase timeout to 60 seconds for network operations
jest.setTimeout(60000*5); jest.setTimeout(60000*5);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('SdiccCrawler Real Site Test', () => { describe('SdiccCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Change to false to see browser UI headless: false, // Change to false to see browser UI
args: ['--no-sandbox', '--disable-setuid-sandbox'], args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
}); });
}); });

View File

@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
// Increase timeout to 120 seconds for manual inspection and slow sites // Increase timeout to 120 seconds for manual inspection and slow sites
jest.setTimeout(120000); jest.setTimeout(120000);
// 获取代理配置
const getProxyArgs = (): string[] => {
const proxyHost = process.env.PROXY_HOST;
const proxyPort = process.env.PROXY_PORT;
const proxyUsername = process.env.PROXY_USERNAME;
const proxyPassword = process.env.PROXY_PASSWORD;
if (proxyHost && proxyPort) {
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
if (proxyUsername && proxyPassword) {
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
}
return args;
}
return [];
};
describe('SzecpCrawler Real Site Test', () => { describe('SzecpCrawler Real Site Test', () => {
let browser: puppeteer.Browser; let browser: puppeteer.Browser;
beforeAll(async () => { beforeAll(async () => {
const proxyArgs = getProxyArgs();
if (proxyArgs.length > 0) {
console.log('Using proxy:', proxyArgs.join(' '));
}
browser = await puppeteer.launch({ browser = await puppeteer.launch({
headless: false, // Run in non-headless mode headless: false, // Run in non-headless mode
args: [ args: [
@@ -16,6 +38,7 @@ describe('SzecpCrawler Real Site Test', () => {
'--disable-blink-features=AutomationControlled', '--disable-blink-features=AutomationControlled',
'--window-size=1920,1080', '--window-size=1920,1080',
'--disable-infobars', '--disable-infobars',
...proxyArgs,
], ],
defaultViewport: null defaultViewport: null
}); });

9
test/jest-e2e.json Normal file
View File

@@ -0,0 +1,9 @@
{
"moduleFileExtensions": ["js", "json", "ts"],
"rootDir": ".",
"testEnvironment": "node",
"testRegex": ".e2e-spec.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
}
}