diff --git a/jest.config.js b/jest.config.js new file mode 100644 index 0000000..f4218b0 --- /dev/null +++ b/jest.config.js @@ -0,0 +1,13 @@ +module.exports = { + moduleFileExtensions: ['js', 'json', 'ts'], + rootDir: 'src', + testRegex: '.*\\.spec\\.ts$', + transform: { + '^.+\\.(t|j)s$': 'ts-jest', + }, + collectCoverageFrom: ['**/*.(t|j)s'], + coverageDirectory: '../coverage', + testEnvironment: 'node', + // 加载环境变量 + setupFiles: ['/../jest.setup.js'], +}; diff --git a/jest.setup.js b/jest.setup.js new file mode 100644 index 0000000..5bea4f9 --- /dev/null +++ b/jest.setup.js @@ -0,0 +1,2 @@ +// 加载环境变量 +require('dotenv').config({ path: '.env' }); diff --git a/package.json b/package.json index 342ac7d..c78f9ae 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "axios": "^1.13.2", "class-transformer": "^0.5.1", "class-validator": "^0.14.3", + "dotenv": "^16.4.7", "mysql2": "^3.16.0", "puppeteer": "^24.34.0", "puppeteer-extra": "^3.3.6", @@ -66,22 +67,5 @@ "tsconfig-paths": "^4.2.0", "typescript": "^5.7.3", "typescript-eslint": "^8.20.0" - }, - "jest": { - "moduleFileExtensions": [ - "js", - "json", - "ts" - ], - "rootDir": "src", - "testRegex": ".*\\.spec\\.ts$", - "transform": { - "^.+\\.(t|j)s$": "ts-jest" - }, - "collectCoverageFrom": [ - "**/*.(t|j)s" - ], - "coverageDirectory": "../coverage", - "testEnvironment": "node" } } diff --git a/src/crawler/services/cdt_target.spec.ts b/src/crawler/services/cdt_target.spec.ts index cf73eb1..004899e 100644 --- a/src/crawler/services/cdt_target.spec.ts +++ b/src/crawler/services/cdt_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('CdtCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/ceic_target.spec.ts b/src/crawler/services/ceic_target.spec.ts index 7de5f81..e37deff 100644 --- a/src/crawler/services/ceic_target.spec.ts +++ b/src/crawler/services/ceic_target.spec.ts @@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 120 seconds for manual inspection and slow sites jest.setTimeout(120000); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('CeicCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Run in non-headless mode args: [ @@ -16,6 +38,7 @@ describe('CeicCrawler Real Site Test', () => { '--disable-blink-features=AutomationControlled', '--window-size=1920,1080', '--disable-infobars', + ...proxyArgs, ], defaultViewport: null }); diff --git a/src/crawler/services/cgnpc_target.spec.ts b/src/crawler/services/cgnpc_target.spec.ts index 4ff9839..432474e 100644 --- a/src/crawler/services/cgnpc_target.spec.ts +++ b/src/crawler/services/cgnpc_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('CgnpcCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/chdtp_target.spec.ts b/src/crawler/services/chdtp_target.spec.ts index 4ed6de5..eab266e 100644 --- a/src/crawler/services/chdtp_target.spec.ts +++ b/src/crawler/services/chdtp_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('ChdtpCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: true, // Change to false to see the browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/chng_target.spec.ts b/src/crawler/services/chng_target.spec.ts index 23be984..7253db6 100644 --- a/src/crawler/services/chng_target.spec.ts +++ b/src/crawler/services/chng_target.spec.ts @@ -4,6 +4,23 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 120 seconds for manual inspection and slow sites jest.setTimeout(120000); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + // 模拟人类鼠标移动 async function simulateHumanMouseMovement(page: puppeteer.Page) { const viewport = page.viewport(); @@ -53,6 +70,11 @@ describe('ChngCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Run in non-headless mode args: [ @@ -61,6 +83,7 @@ describe('ChngCrawler Real Site Test', () => { '--disable-blink-features=AutomationControlled', '--window-size=1920,1080', "--disable-infobars", + ...proxyArgs, // "--headless=new", // '--disable-dev-shm-usage', // '--disable-accelerated-2d-canvas', @@ -69,7 +92,7 @@ describe('ChngCrawler Real Site Test', () => { // '--disable-gpu', // '--disable-features=VizDisplayCompositor', // '--disable-webgl', - // '--disable-javascript', + // '--disable-javascript', ], defaultViewport: null diff --git a/src/crawler/services/chng_target_stealth.spec.ts b/src/crawler/services/chng_target_stealth.spec.ts index 5e1bce5..e918f18 100644 --- a/src/crawler/services/chng_target_stealth.spec.ts +++ b/src/crawler/services/chng_target_stealth.spec.ts @@ -9,6 +9,23 @@ puppeteer.use(StealthPlugin()); // Increase timeout to 180 seconds for slow sites and stealth mode jest.setTimeout(180000); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + // 模拟人类鼠标移动 async function simulateHumanMouseMovement(page: Page) { const viewport = page.viewport(); @@ -58,6 +75,11 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => { let browser: Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: true, // 使用 headless 模式 args: [ @@ -73,6 +95,7 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => { '--disable-gpu', '--disable-features=VizDisplayCompositor', '--disable-webgl', + ...proxyArgs, ], defaultViewport: null }); diff --git a/src/crawler/services/cnncecp_target.spec.ts b/src/crawler/services/cnncecp_target.spec.ts index 2be32b8..7bb2620 100644 --- a/src/crawler/services/cnncecp_target.spec.ts +++ b/src/crawler/services/cnncecp_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('CnncecpCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/cnooc_target.spec.ts b/src/crawler/services/cnooc_target.spec.ts index 54b0fdd..543f3b0 100644 --- a/src/crawler/services/cnooc_target.spec.ts +++ b/src/crawler/services/cnooc_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('CnoocCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/eps_target.spec.ts b/src/crawler/services/eps_target.spec.ts index 50e1034..2975bae 100644 --- a/src/crawler/services/eps_target.spec.ts +++ b/src/crawler/services/eps_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('EpsCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/espic_target.spec.ts b/src/crawler/services/espic_target.spec.ts index 50bbf6a..293c251 100644 --- a/src/crawler/services/espic_target.spec.ts +++ b/src/crawler/services/espic_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('EspicCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/powerbeijing_target.spec.ts b/src/crawler/services/powerbeijing_target.spec.ts index 7adf685..b0fa11b 100644 --- a/src/crawler/services/powerbeijing_target.spec.ts +++ b/src/crawler/services/powerbeijing_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('PowerbeijingCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/sdicc_target.spec.ts b/src/crawler/services/sdicc_target.spec.ts index b9b0522..aa461a8 100644 --- a/src/crawler/services/sdicc_target.spec.ts +++ b/src/crawler/services/sdicc_target.spec.ts @@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 60 seconds for network operations jest.setTimeout(60000*5); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('SdiccCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Change to false to see browser UI - args: ['--no-sandbox', '--disable-setuid-sandbox'], + args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs], }); }); diff --git a/src/crawler/services/szecp_target.spec.ts b/src/crawler/services/szecp_target.spec.ts index ad0abb2..26455f1 100644 --- a/src/crawler/services/szecp_target.spec.ts +++ b/src/crawler/services/szecp_target.spec.ts @@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer'; // Increase timeout to 120 seconds for manual inspection and slow sites jest.setTimeout(120000); +// 获取代理配置 +const getProxyArgs = (): string[] => { + const proxyHost = process.env.PROXY_HOST; + const proxyPort = process.env.PROXY_PORT; + const proxyUsername = process.env.PROXY_USERNAME; + const proxyPassword = process.env.PROXY_PASSWORD; + + if (proxyHost && proxyPort) { + const args = [`--proxy-server=${proxyHost}:${proxyPort}`]; + if (proxyUsername && proxyPassword) { + args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`); + } + return args; + } + return []; +}; + describe('SzecpCrawler Real Site Test', () => { let browser: puppeteer.Browser; beforeAll(async () => { + const proxyArgs = getProxyArgs(); + if (proxyArgs.length > 0) { + console.log('Using proxy:', proxyArgs.join(' ')); + } + browser = await puppeteer.launch({ headless: false, // Run in non-headless mode args: [ @@ -16,6 +38,7 @@ describe('SzecpCrawler Real Site Test', () => { '--disable-blink-features=AutomationControlled', '--window-size=1920,1080', '--disable-infobars', + ...proxyArgs, ], defaultViewport: null }); diff --git a/test/jest-e2e.json b/test/jest-e2e.json new file mode 100644 index 0000000..e9d912f --- /dev/null +++ b/test/jest-e2e.json @@ -0,0 +1,9 @@ +{ + "moduleFileExtensions": ["js", "json", "ts"], + "rootDir": ".", + "testEnvironment": "node", + "testRegex": ".e2e-spec.ts$", + "transform": { + "^.+\\.(t|j)s$": "ts-jest" + } +}