feat: 为爬虫测试添加代理支持并通过环境变量配置
添加dotenv依赖,创建jest配置文件和setup文件 修改所有爬虫测试文件以支持通过环境变量配置代理 将jest配置从package.json移动到独立文件
This commit is contained in:
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CdtCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||
jest.setTimeout(120000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CeicCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Run in non-headless mode
|
||||
args: [
|
||||
@@ -16,6 +38,7 @@ describe('CeicCrawler Real Site Test', () => {
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--window-size=1920,1080',
|
||||
'--disable-infobars',
|
||||
...proxyArgs,
|
||||
],
|
||||
defaultViewport: null
|
||||
});
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CgnpcCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('ChdtpCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: true, // Change to false to see the browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,6 +4,23 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||
jest.setTimeout(120000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
// 模拟人类鼠标移动
|
||||
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||
const viewport = page.viewport();
|
||||
@@ -53,6 +70,11 @@ describe('ChngCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Run in non-headless mode
|
||||
args: [
|
||||
@@ -61,6 +83,7 @@ describe('ChngCrawler Real Site Test', () => {
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--window-size=1920,1080',
|
||||
"--disable-infobars",
|
||||
...proxyArgs,
|
||||
// "--headless=new",
|
||||
// '--disable-dev-shm-usage',
|
||||
// '--disable-accelerated-2d-canvas',
|
||||
@@ -69,7 +92,7 @@ describe('ChngCrawler Real Site Test', () => {
|
||||
// '--disable-gpu',
|
||||
// '--disable-features=VizDisplayCompositor',
|
||||
// '--disable-webgl',
|
||||
// '--disable-javascript',
|
||||
// '--disable-javascript',
|
||||
],
|
||||
defaultViewport: null
|
||||
|
||||
|
||||
@@ -9,6 +9,23 @@ puppeteer.use(StealthPlugin());
|
||||
// Increase timeout to 180 seconds for slow sites and stealth mode
|
||||
jest.setTimeout(180000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
// 模拟人类鼠标移动
|
||||
async function simulateHumanMouseMovement(page: Page) {
|
||||
const viewport = page.viewport();
|
||||
@@ -58,6 +75,11 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
|
||||
let browser: Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: true, // 使用 headless 模式
|
||||
args: [
|
||||
@@ -73,6 +95,7 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
|
||||
'--disable-gpu',
|
||||
'--disable-features=VizDisplayCompositor',
|
||||
'--disable-webgl',
|
||||
...proxyArgs,
|
||||
],
|
||||
defaultViewport: null
|
||||
});
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CnncecpCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CnoocCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('EpsCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('EspicCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('PowerbeijingCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('SdiccCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||
jest.setTimeout(120000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('SzecpCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Run in non-headless mode
|
||||
args: [
|
||||
@@ -16,6 +38,7 @@ describe('SzecpCrawler Real Site Test', () => {
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--window-size=1920,1080',
|
||||
'--disable-infobars',
|
||||
...proxyArgs,
|
||||
],
|
||||
defaultViewport: null
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user