Compare commits
5 Commits
8b2f328981
...
f2630ed01c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f2630ed01c | ||
|
|
b1435523e8 | ||
|
|
f1ec37143c | ||
|
|
090e4121ce | ||
|
|
4f37b0fb61 |
100
README.md
100
README.md
@@ -97,26 +97,86 @@ Nest is an MIT-licensed open source project. It can grow thanks to the sponsors
|
||||
|
||||
Nest is [MIT licensed](https://github.com/nestjs/nest/blob/master/LICENSE).
|
||||
|
||||
How to Run:
|
||||
1. Database Setup: Update the .env file with your PostgreSQL credentials.
|
||||
## How to Run
|
||||
|
||||
1 DATABASE_TYPE=postgres
|
||||
2 DATABASE_HOST=localhost
|
||||
3 DATABASE_PORT=5432
|
||||
4 DATABASE_USERNAME=your_username
|
||||
5 DATABASE_PASSWORD=your_password
|
||||
6 DATABASE_NAME=bidding
|
||||
7 DATABASE_SYNCHRONIZE=true
|
||||
2. Install Dependencies:
|
||||
1 npm install
|
||||
2 cd frontend && npm install
|
||||
3. Build and Start:
|
||||
### 1. Database Setup
|
||||
Update the `.env` file with your PostgreSQL credentials:
|
||||
|
||||
1 # From the root directory
|
||||
2 cd frontend && npm run build
|
||||
3 cd ..
|
||||
4 npm run build
|
||||
5 npm run start
|
||||
```env
|
||||
DATABASE_TYPE=postgres
|
||||
DATABASE_HOST=localhost
|
||||
DATABASE_PORT=5432
|
||||
DATABASE_USERNAME=your_username
|
||||
DATABASE_PASSWORD=your_password
|
||||
DATABASE_NAME=bidding
|
||||
DATABASE_SYNCHRONIZE=true
|
||||
```
|
||||
|
||||
The system will automatically initialize with the preset keywords: "山东", "海", "建设", "工程", "采购". You can
|
||||
manage these and view crawled bidding information at http://localhost:3000.
|
||||
### 2. Install Dependencies
|
||||
|
||||
```bash
|
||||
npm install
|
||||
cd frontend && npm install
|
||||
```
|
||||
|
||||
### 3. Build and Start
|
||||
|
||||
```bash
|
||||
# From the root directory
|
||||
cd frontend && npm run build
|
||||
cd ..
|
||||
npm run build
|
||||
npm run start
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
### Frontend Features
|
||||
|
||||
- **Dashboard**: View high priority bids and today's bids
|
||||
- **Date Filtering**:
|
||||
- Click "3天" or "7天" buttons to filter bids from the last 3 or 7 days
|
||||
- The filter only limits the start date, showing all data from the selected start date onwards (including data newer than the end date)
|
||||
- **Keyword Filtering**: Filter bids by keywords (saved in localStorage)
|
||||
- **All Bids**: View all bids with pagination and source filtering
|
||||
- **Keyword Management**: Add and delete keywords with weight-based priority
|
||||
|
||||
### Backend Features
|
||||
|
||||
- **Multi-Source Crawling**: Crawls bidding information from multiple sources:
|
||||
- ChdtpCrawler
|
||||
- ChngCrawler
|
||||
- SzecpCrawler
|
||||
- CdtCrawler
|
||||
- EpsCrawler
|
||||
- CnncecpCrawler
|
||||
- CgnpcCrawler
|
||||
- CeicCrawler
|
||||
- EspicCrawler
|
||||
- PowerbeijingCrawler
|
||||
- **Automatic Retry**: If a crawler returns 0 items, it will be retried after all crawlers complete
|
||||
- **Proxy Support**: Configurable proxy settings via environment variables
|
||||
- **Scheduled Tasks**: Automatic crawling at scheduled intervals
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```env
|
||||
# Database
|
||||
DATABASE_TYPE=postgres
|
||||
DATABASE_HOST=localhost
|
||||
DATABASE_PORT=5432
|
||||
DATABASE_USERNAME=your_username
|
||||
DATABASE_PASSWORD=your_password
|
||||
DATABASE_NAME=bidding
|
||||
DATABASE_SYNCHRONIZE=true
|
||||
|
||||
# Proxy (optional)
|
||||
PROXY_HOST=your_proxy_host
|
||||
PROXY_PORT=your_proxy_port
|
||||
PROXY_USERNAME=your_proxy_username
|
||||
PROXY_PASSWORD=your_proxy_password
|
||||
```
|
||||
|
||||
## Initial Setup
|
||||
|
||||
The system will automatically initialize with the preset keywords: "山东", "海", "建设", "工程", "采购". You can manage these and view crawled bidding information at http://localhost:3000.
|
||||
@@ -31,167 +31,43 @@
|
||||
</el-header>
|
||||
|
||||
<el-main>
|
||||
<div v-if="activeIndex === '1'">
|
||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||
<h2 style="margin: 0;">Dashboard</h2>
|
||||
<el-button type="primary" :loading="crawling" :disabled="isCrawling" @click="handleCrawl">
|
||||
<el-icon style="margin-right: 5px"><Refresh /></el-icon>
|
||||
立刻抓取
|
||||
</el-button>
|
||||
</div>
|
||||
<el-row :gutter="20">
|
||||
<el-col :span="24">
|
||||
<el-card class="box-card" shadow="hover">
|
||||
<template #header>
|
||||
<div class="card-header">
|
||||
<span>High Priority Bids</span>
|
||||
<el-tag type="danger">Top 10</el-tag>
|
||||
</div>
|
||||
</template>
|
||||
<el-table :data="highPriorityBids" style="width: 100%" size="small">
|
||||
<el-table-column prop="title" label="Title">
|
||||
<template #default="scope">
|
||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="source" label="Source" width="240" />
|
||||
<el-table-column prop="publishDate" label="Date" width="120">
|
||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</el-card>
|
||||
</el-col>
|
||||
</el-row>
|
||||
<el-divider />
|
||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
|
||||
<h3 style="margin: 0;">Today's Bids</h3>
|
||||
<div style="display: flex; gap: 10px;">
|
||||
<el-date-picker
|
||||
v-model="dateRange"
|
||||
type="daterange"
|
||||
range-separator="To"
|
||||
start-placeholder="Start Date"
|
||||
end-placeholder="End Date"
|
||||
format="YYYY-MM-DD"
|
||||
value-format="YYYY-MM-DD"
|
||||
clearable
|
||||
style="width: 240px;"
|
||||
<Dashboard
|
||||
v-if="activeIndex === '1'"
|
||||
:today-bids="todayBids"
|
||||
:high-priority-bids="highPriorityBids"
|
||||
:keywords="keywords"
|
||||
:loading="loading"
|
||||
:is-crawling="isCrawling"
|
||||
@refresh="fetchData"
|
||||
/>
|
||||
<el-button type="primary" @click="setLast3Days">3天</el-button>
|
||||
<el-button type="primary" @click="setLast7Days">7天</el-button>
|
||||
<el-select
|
||||
v-model="selectedKeywords"
|
||||
multiple
|
||||
collapse-tags
|
||||
collapse-tags-tooltip
|
||||
placeholder="Filter by Keywords"
|
||||
clearable
|
||||
style="width: 300px;"
|
||||
>
|
||||
<el-option
|
||||
v-for="keyword in keywords"
|
||||
:key="keyword.id"
|
||||
:label="keyword.word"
|
||||
:value="keyword.word"
|
||||
/>
|
||||
</el-select>
|
||||
</div>
|
||||
</div>
|
||||
<el-table :data="filteredTodayBids" v-loading="loading" style="width: 100%">
|
||||
<el-table-column prop="title" label="Title">
|
||||
<template #default="scope">
|
||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="source" label="Source" width="220" />
|
||||
<el-table-column prop="publishDate" label="Date" width="150">
|
||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</div>
|
||||
|
||||
<div v-if="activeIndex === '2'">
|
||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||
<h2 style="margin: 0;">All Bids</h2>
|
||||
<el-select v-model="selectedSource" placeholder="Filter by Source" clearable style="width: 200px" @change="currentPage = 1; fetchData()">
|
||||
<el-option
|
||||
v-for="source in sourceOptions"
|
||||
:key="source"
|
||||
:label="source"
|
||||
:value="source"
|
||||
/>
|
||||
</el-select>
|
||||
</div>
|
||||
<el-table :data="bids" v-loading="loading" style="width: 100%">
|
||||
<el-table-column prop="title" label="Title">
|
||||
<template #default="scope">
|
||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="source" label="Source" width="200" />
|
||||
<el-table-column prop="publishDate" label="Date" width="150">
|
||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
<el-pagination
|
||||
v-model:current-page="currentPage"
|
||||
v-model:page-size="pageSize"
|
||||
:page-sizes="[10, 20, 50, 100]"
|
||||
<Bids
|
||||
v-if="activeIndex === '2'"
|
||||
:bids="bids"
|
||||
:source-options="sourceOptions"
|
||||
:loading="loading"
|
||||
:total="total"
|
||||
layout="total, sizes, prev, pager, next, jumper"
|
||||
@current-change="handlePageChange"
|
||||
@size-change="handleSizeChange"
|
||||
style="margin-top: 20px; justify-content: flex-end;"
|
||||
@fetch="handleFetchBids"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div v-if="activeIndex === '3'">
|
||||
<div class="card-header" style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||
<h2>Keyword Management</h2>
|
||||
<el-button type="primary" @click="dialogVisible = true">Add Keyword</el-button>
|
||||
</div>
|
||||
|
||||
<div v-loading="loading" style="min-height: 200px;">
|
||||
<el-tag
|
||||
v-for="keyword in keywords"
|
||||
:key="keyword.id"
|
||||
closable
|
||||
:type="getTagType(keyword.weight)"
|
||||
@close="handleDeleteKeyword(keyword.id)"
|
||||
style="margin: 5px;"
|
||||
>
|
||||
{{ keyword.word }}
|
||||
</el-tag>
|
||||
<el-empty v-if="keywords.length === 0" description="No keywords" />
|
||||
</div>
|
||||
</div>
|
||||
<Keywords
|
||||
v-if="activeIndex === '3'"
|
||||
:keywords="keywords"
|
||||
:loading="loading"
|
||||
@refresh="fetchData"
|
||||
/>
|
||||
</el-main>
|
||||
</el-container>
|
||||
|
||||
<el-dialog v-model="dialogVisible" title="Add Keyword" width="30%">
|
||||
<el-form :model="form" label-width="120px">
|
||||
<el-form-item label="Keyword">
|
||||
<el-input v-model="form.word" />
|
||||
</el-form-item>
|
||||
<el-form-item label="Weight">
|
||||
<el-input-number v-model="form.weight" :min="1" :max="5" />
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<template #footer>
|
||||
<span class="dialog-footer">
|
||||
<el-button @click="dialogVisible = false">Cancel</el-button>
|
||||
<el-button type="primary" @click="handleAddKeyword">Confirm</el-button>
|
||||
</span>
|
||||
</template>
|
||||
</el-dialog>
|
||||
</el-container>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { ref, onMounted, reactive, computed, watch } from 'vue'
|
||||
import { ref, onMounted } from 'vue'
|
||||
import axios from 'axios'
|
||||
import { ElMessage } from 'element-plus'
|
||||
import { DataBoard, Document, Setting, Refresh } from '@element-plus/icons-vue'
|
||||
import { DataBoard, Document, Setting } from '@element-plus/icons-vue'
|
||||
import Dashboard from './components/Dashboard.vue'
|
||||
import Bids from './components/Bids.vue'
|
||||
import Keywords from './components/Keywords.vue'
|
||||
|
||||
const activeIndex = ref('1')
|
||||
const bids = ref<any[]>([])
|
||||
@@ -199,202 +75,41 @@ const todayBids = ref<any[]>([])
|
||||
const highPriorityBids = ref<any[]>([])
|
||||
const keywords = ref<any[]>([])
|
||||
const loading = ref(false)
|
||||
const crawling = ref(false)
|
||||
const dialogVisible = ref(false)
|
||||
const selectedSource = ref('')
|
||||
const currentPage = ref(1)
|
||||
const pageSize = ref(10)
|
||||
const isCrawling = ref(false)
|
||||
const total = ref(0)
|
||||
const sourceOptions = ref<string[]>([])
|
||||
const isCrawling = ref(false)
|
||||
const selectedKeywords = ref<string[]>([])
|
||||
const dateRange = ref<[string, string] | null>(null)
|
||||
|
||||
// 从 localStorage 加载保存的关键字
|
||||
const loadSavedKeywords = () => {
|
||||
const saved = localStorage.getItem('selectedKeywords')
|
||||
if (saved) {
|
||||
try {
|
||||
selectedKeywords.value = JSON.parse(saved)
|
||||
} catch (e) {
|
||||
console.error('Failed to parse saved keywords:', e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 监听关键字变化并保存到 localStorage
|
||||
watch(selectedKeywords, (newKeywords) => {
|
||||
localStorage.setItem('selectedKeywords', JSON.stringify(newKeywords))
|
||||
}, { deep: true })
|
||||
|
||||
// 监听日期范围变化并显示提示
|
||||
watch(dateRange, () => {
|
||||
const totalBids = bids.value.length
|
||||
const filteredCount = filteredTodayBids.value.length
|
||||
|
||||
if (totalBids > 0 && filteredCount < totalBids) {
|
||||
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
||||
}
|
||||
})
|
||||
|
||||
const form = reactive({
|
||||
word: '',
|
||||
weight: 1
|
||||
})
|
||||
|
||||
// 根据 weight 获取 tag 类型
|
||||
const getTagType = (weight: number) => {
|
||||
if (weight >= 5) return 'danger'
|
||||
if (weight >= 4) return 'warning'
|
||||
if (weight >= 3) return 'primary'
|
||||
if (weight >= 2) return 'success'
|
||||
return 'info'
|
||||
}
|
||||
|
||||
const handleSelect = (key: string) => {
|
||||
activeIndex.value = key
|
||||
}
|
||||
|
||||
// 处理分页变化
|
||||
const handlePageChange = (page: number) => {
|
||||
currentPage.value = page
|
||||
fetchData()
|
||||
const handleFetchBids = async (page: number, limit: number, source?: string) => {
|
||||
loading.value = true
|
||||
try {
|
||||
const res = await axios.get('/api/bids', {
|
||||
params: {
|
||||
page,
|
||||
limit,
|
||||
source: source || undefined
|
||||
}
|
||||
|
||||
// 处理每页数量变化
|
||||
const handleSizeChange = (size: number) => {
|
||||
pageSize.value = size
|
||||
currentPage.value = 1
|
||||
fetchData()
|
||||
}
|
||||
|
||||
// 设置日期范围为最近3天
|
||||
const setLast3Days = () => {
|
||||
const endDate = new Date()
|
||||
const startDate = new Date()
|
||||
startDate.setDate(startDate.getDate() - 2) // 最近3天(包括今天)
|
||||
|
||||
const formatDateForPicker = (date: Date) => {
|
||||
const year = date.getFullYear()
|
||||
const month = String(date.getMonth() + 1).padStart(2, '0')
|
||||
const day = String(date.getDate()).padStart(2, '0')
|
||||
return `${year}-${month}-${day}`
|
||||
}
|
||||
|
||||
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
||||
|
||||
console.log('setLast3Days called, todayBids:', todayBids.value.length, 'dateRange:', dateRange.value)
|
||||
|
||||
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
||||
const start = new Date(startDate)
|
||||
start.setHours(0, 0, 0, 0)
|
||||
|
||||
let result = todayBids.value
|
||||
result = result.filter(bid => {
|
||||
if (!bid.publishDate) return false
|
||||
const bidDate = new Date(bid.publishDate)
|
||||
return bidDate >= start
|
||||
})
|
||||
|
||||
const totalBids = todayBids.value.length
|
||||
const filteredCount = result.length
|
||||
|
||||
console.log('setLast3Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
||||
if (totalBids === 0) {
|
||||
ElMessage.warning('暂无数据,请先抓取数据')
|
||||
bids.value = res.data.items
|
||||
total.value = res.data.total
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch bids:', error)
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
// 设置日期范围为最近7天
|
||||
const setLast7Days = () => {
|
||||
const endDate = new Date()
|
||||
const startDate = new Date()
|
||||
startDate.setDate(startDate.getDate() - 6) // 最近7天(包括今天)
|
||||
|
||||
const formatDateForPicker = (date: Date) => {
|
||||
const year = date.getFullYear()
|
||||
const month = String(date.getMonth() + 1).padStart(2, '0')
|
||||
const day = String(date.getDate()).padStart(2, '0')
|
||||
return `${year}-${month}-${day}`
|
||||
}
|
||||
|
||||
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
||||
|
||||
console.log('setLast7Days called, todayBids:', todayBids.value.length, 'dateRange:', dateRange.value)
|
||||
|
||||
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
||||
const start = new Date(startDate)
|
||||
start.setHours(0, 0, 0, 0)
|
||||
|
||||
let result = todayBids.value
|
||||
result = result.filter(bid => {
|
||||
if (!bid.publishDate) return false
|
||||
const bidDate = new Date(bid.publishDate)
|
||||
return bidDate >= start
|
||||
})
|
||||
|
||||
const totalBids = todayBids.value.length
|
||||
const filteredCount = result.length
|
||||
|
||||
console.log('setLast7Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
||||
if (totalBids === 0) {
|
||||
ElMessage.warning('暂无数据,请先抓取数据')
|
||||
}
|
||||
}
|
||||
|
||||
const formatDate = (dateString: string) => {
|
||||
if (!dateString) return '-'
|
||||
return new Date(dateString).toLocaleDateString()
|
||||
}
|
||||
|
||||
// 过滤 Today's Bids,只显示包含所选关键字的项目,并且在日期范围内
|
||||
const filteredTodayBids = computed(() => {
|
||||
let result = todayBids.value
|
||||
|
||||
// 按关键字筛选
|
||||
if (selectedKeywords.value.length > 0) {
|
||||
result = result.filter(bid => {
|
||||
return selectedKeywords.value.some(keyword =>
|
||||
bid.title.toLowerCase().includes(keyword.toLowerCase())
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// 按日期范围筛选(只限制开始时间,不限制结束时间)
|
||||
if (dateRange.value && dateRange.value.length === 2) {
|
||||
const [startDate] = dateRange.value
|
||||
result = result.filter(bid => {
|
||||
if (!bid.publishDate) return false
|
||||
const bidDate = new Date(bid.publishDate)
|
||||
const start = new Date(startDate)
|
||||
// 设置时间为当天的开始
|
||||
start.setHours(0, 0, 0, 0)
|
||||
return bidDate >= start
|
||||
})
|
||||
}
|
||||
|
||||
return result
|
||||
})
|
||||
|
||||
// 监听筛选结果变化并显示提示
|
||||
watch(filteredTodayBids, (newFilteredBids) => {
|
||||
const totalBids = todayBids.value.length
|
||||
const filteredCount = newFilteredBids.length
|
||||
|
||||
if (totalBids > 0 && filteredCount < totalBids) {
|
||||
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
||||
}
|
||||
}, { deep: true })
|
||||
|
||||
const fetchData = async () => {
|
||||
loading.value = true
|
||||
try {
|
||||
const [bidsRes, recentRes, highRes, kwRes, sourcesRes, statusRes] = await Promise.all([
|
||||
axios.get('/api/bids', {
|
||||
params: {
|
||||
page: currentPage.value,
|
||||
limit: pageSize.value,
|
||||
source: selectedSource.value || undefined
|
||||
page: 1,
|
||||
limit: 10
|
||||
}
|
||||
}),
|
||||
axios.get('/api/bids/recent'),
|
||||
@@ -411,58 +126,13 @@ const fetchData = async () => {
|
||||
sourceOptions.value = sourcesRes.data
|
||||
isCrawling.value = statusRes.data.isCrawling
|
||||
} catch (error) {
|
||||
ElMessage.error('Failed to fetch data')
|
||||
console.error('Failed to fetch data:', error)
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
const handleCrawl = async () => {
|
||||
if (isCrawling.value) {
|
||||
ElMessage.warning('Crawl is already running')
|
||||
return
|
||||
}
|
||||
crawling.value = true
|
||||
try {
|
||||
await axios.post('/api/crawler/run')
|
||||
ElMessage.success('Crawl completed successfully')
|
||||
fetchData() // Refresh data after crawl
|
||||
} catch (error) {
|
||||
ElMessage.error('Failed to run crawl task')
|
||||
} finally {
|
||||
crawling.value = false
|
||||
}
|
||||
}
|
||||
|
||||
const handleAddKeyword = async () => {
|
||||
if (!form.word) {
|
||||
ElMessage.warning('Please enter a keyword')
|
||||
return
|
||||
}
|
||||
try {
|
||||
await axios.post('/api/keywords', form)
|
||||
ElMessage.success('Keyword added')
|
||||
dialogVisible.value = false
|
||||
form.word = ''
|
||||
form.weight = 1
|
||||
fetchData()
|
||||
} catch (error) {
|
||||
ElMessage.error('Failed to add keyword')
|
||||
}
|
||||
}
|
||||
|
||||
const handleDeleteKeyword = async (id: string) => {
|
||||
try {
|
||||
await axios.delete(`/api/keywords/${id}`)
|
||||
ElMessage.success('Keyword deleted')
|
||||
fetchData()
|
||||
} catch (error) {
|
||||
ElMessage.error('Failed to delete keyword')
|
||||
}
|
||||
}
|
||||
|
||||
onMounted(() => {
|
||||
loadSavedKeywords()
|
||||
fetchData()
|
||||
})
|
||||
</script>
|
||||
@@ -486,9 +156,4 @@ onMounted(() => {
|
||||
font-size: 18px;
|
||||
background-color: #434a50;
|
||||
}
|
||||
.card-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
</style>
|
||||
78
frontend/src/components/Bids.vue
Normal file
78
frontend/src/components/Bids.vue
Normal file
@@ -0,0 +1,78 @@
|
||||
<template>
|
||||
<div>
|
||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||
<h2 style="margin: 0;">All Bids</h2>
|
||||
<el-select v-model="selectedSource" placeholder="Filter by Source" clearable style="width: 200px" @change="handleSourceChange">
|
||||
<el-option
|
||||
v-for="source in sourceOptions"
|
||||
:key="source"
|
||||
:label="source"
|
||||
:value="source"
|
||||
/>
|
||||
</el-select>
|
||||
</div>
|
||||
<el-table :data="bids" v-loading="loading" style="width: 100%">
|
||||
<el-table-column prop="title" label="Title">
|
||||
<template #default="scope">
|
||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="source" label="Source" width="200" />
|
||||
<el-table-column prop="publishDate" label="Date" width="150">
|
||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
<el-pagination
|
||||
v-model:current-page="currentPage"
|
||||
v-model:page-size="pageSize"
|
||||
:page-sizes="[10, 20, 50, 100]"
|
||||
:total="total"
|
||||
layout="total, sizes, prev, pager, next, jumper"
|
||||
@current-change="handlePageChange"
|
||||
@size-change="handleSizeChange"
|
||||
style="margin-top: 20px; justify-content: flex-end;"
|
||||
/>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { ref } from 'vue'
|
||||
|
||||
interface Props {
|
||||
bids: any[]
|
||||
sourceOptions: string[]
|
||||
loading: boolean
|
||||
total: number
|
||||
}
|
||||
|
||||
const props = defineProps<Props>()
|
||||
|
||||
const emit = defineEmits<{
|
||||
fetch: [page: number, limit: number, source?: string]
|
||||
}>()
|
||||
|
||||
const selectedSource = ref('')
|
||||
const currentPage = ref(1)
|
||||
const pageSize = ref(10)
|
||||
|
||||
const formatDate = (dateString: string) => {
|
||||
if (!dateString) return '-'
|
||||
return new Date(dateString).toLocaleDateString()
|
||||
}
|
||||
|
||||
const handleSourceChange = () => {
|
||||
currentPage.value = 1
|
||||
emit('fetch', currentPage.value, pageSize.value, selectedSource.value || undefined)
|
||||
}
|
||||
|
||||
const handlePageChange = (page: number) => {
|
||||
currentPage.value = page
|
||||
emit('fetch', currentPage.value, pageSize.value, selectedSource.value || undefined)
|
||||
}
|
||||
|
||||
const handleSizeChange = (size: number) => {
|
||||
pageSize.value = size
|
||||
currentPage.value = 1
|
||||
emit('fetch', currentPage.value, pageSize.value, selectedSource.value || undefined)
|
||||
}
|
||||
</script>
|
||||
279
frontend/src/components/Dashboard.vue
Normal file
279
frontend/src/components/Dashboard.vue
Normal file
@@ -0,0 +1,279 @@
|
||||
<template>
|
||||
<div>
|
||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||
<h2 style="margin: 0;">Dashboard</h2>
|
||||
<el-button type="primary" :loading="crawling" :disabled="isCrawling" @click="handleCrawl">
|
||||
<el-icon style="margin-right: 5px"><Refresh /></el-icon>
|
||||
立刻抓取
|
||||
</el-button>
|
||||
</div>
|
||||
<el-row :gutter="20">
|
||||
<el-col :span="24">
|
||||
<el-card class="box-card" shadow="hover">
|
||||
<template #header>
|
||||
<div class="card-header">
|
||||
<span>High Priority Bids</span>
|
||||
<el-tag type="danger">Top 10</el-tag>
|
||||
</div>
|
||||
</template>
|
||||
<el-table :data="highPriorityBids" style="width: 100%" size="small">
|
||||
<el-table-column prop="title" label="Title">
|
||||
<template #default="scope">
|
||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="source" label="Source" width="240" />
|
||||
<el-table-column prop="publishDate" label="Date" width="120">
|
||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</el-card>
|
||||
</el-col>
|
||||
</el-row>
|
||||
<el-divider />
|
||||
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
|
||||
<h3 style="margin: 0;">Today's Bids</h3>
|
||||
<div style="display: flex; gap: 10px;">
|
||||
<el-date-picker
|
||||
v-model="dateRange"
|
||||
type="daterange"
|
||||
range-separator="To"
|
||||
start-placeholder="Start Date"
|
||||
end-placeholder="End Date"
|
||||
format="YYYY-MM-DD"
|
||||
value-format="YYYY-MM-DD"
|
||||
clearable
|
||||
style="width: 240px;"
|
||||
/>
|
||||
<el-button type="primary" @click="setLast3Days">3天</el-button>
|
||||
<el-button type="primary" @click="setLast7Days">7天</el-button>
|
||||
<el-select
|
||||
v-model="selectedKeywords"
|
||||
multiple
|
||||
collapse-tags
|
||||
collapse-tags-tooltip
|
||||
placeholder="Filter by Keywords"
|
||||
clearable
|
||||
style="width: 300px;"
|
||||
>
|
||||
<el-option
|
||||
v-for="keyword in keywords"
|
||||
:key="keyword.id"
|
||||
:label="keyword.word"
|
||||
:value="keyword.word"
|
||||
/>
|
||||
</el-select>
|
||||
</div>
|
||||
</div>
|
||||
<el-table :data="filteredTodayBids" v-loading="loading" style="width: 100%">
|
||||
<el-table-column prop="title" label="Title">
|
||||
<template #default="scope">
|
||||
<a :href="scope.row.url" target="_blank">{{ scope.row.title }}</a>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column prop="source" label="Source" width="220" />
|
||||
<el-table-column prop="publishDate" label="Date" width="150">
|
||||
<template #default="scope">{{ formatDate(scope.row.publishDate) }}</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { ref, computed, watch } from 'vue'
|
||||
import axios from 'axios'
|
||||
import { ElMessage } from 'element-plus'
|
||||
import { Refresh } from '@element-plus/icons-vue'
|
||||
|
||||
interface Props {
|
||||
todayBids: any[]
|
||||
highPriorityBids: any[]
|
||||
keywords: any[]
|
||||
loading: boolean
|
||||
isCrawling: boolean
|
||||
}
|
||||
|
||||
const props = defineProps<Props>()
|
||||
|
||||
const emit = defineEmits<{
|
||||
crawl: []
|
||||
refresh: []
|
||||
}>()
|
||||
|
||||
const selectedKeywords = ref<string[]>([])
|
||||
const dateRange = ref<[string, string] | null>(null)
|
||||
const crawling = ref(false)
|
||||
|
||||
// 从 localStorage 加载保存的关键字
|
||||
const loadSavedKeywords = () => {
|
||||
const saved = localStorage.getItem('selectedKeywords')
|
||||
if (saved) {
|
||||
try {
|
||||
selectedKeywords.value = JSON.parse(saved)
|
||||
} catch (e) {
|
||||
console.error('Failed to parse saved keywords:', e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 监听关键字变化并保存到 localStorage
|
||||
watch(selectedKeywords, (newKeywords) => {
|
||||
localStorage.setItem('selectedKeywords', JSON.stringify(newKeywords))
|
||||
}, { deep: true })
|
||||
|
||||
// 监听日期范围变化并显示提示
|
||||
watch(dateRange, () => {
|
||||
const totalBids = props.todayBids.length
|
||||
const filteredCount = filteredTodayBids.value.length
|
||||
|
||||
if (totalBids > 0 && filteredCount < totalBids) {
|
||||
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
||||
}
|
||||
})
|
||||
|
||||
const formatDate = (dateString: string) => {
|
||||
if (!dateString) return '-'
|
||||
return new Date(dateString).toLocaleDateString()
|
||||
}
|
||||
|
||||
// 过滤 Today's Bids,只显示包含所选关键字的项目,并且在日期范围内
|
||||
const filteredTodayBids = computed(() => {
|
||||
let result = props.todayBids
|
||||
|
||||
// 按关键字筛选
|
||||
if (selectedKeywords.value.length > 0) {
|
||||
result = result.filter(bid => {
|
||||
return selectedKeywords.value.some(keyword =>
|
||||
bid.title.toLowerCase().includes(keyword.toLowerCase())
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// 按日期范围筛选(只限制开始时间,不限制结束时间)
|
||||
if (dateRange.value && dateRange.value.length === 2) {
|
||||
const [startDate] = dateRange.value
|
||||
result = result.filter(bid => {
|
||||
if (!bid.publishDate) return false
|
||||
const bidDate = new Date(bid.publishDate)
|
||||
const start = new Date(startDate)
|
||||
// 设置时间为当天的开始
|
||||
start.setHours(0, 0, 0, 0)
|
||||
return bidDate >= start
|
||||
})
|
||||
}
|
||||
|
||||
return result
|
||||
})
|
||||
|
||||
// 监听筛选结果变化并显示提示
|
||||
watch(filteredTodayBids, (newFilteredBids) => {
|
||||
const totalBids = props.todayBids.length
|
||||
const filteredCount = newFilteredBids.length
|
||||
|
||||
if (totalBids > 0 && filteredCount < totalBids) {
|
||||
ElMessage.info(`筛选结果:共 ${filteredCount} 条数据(总共 ${totalBids} 条)`)
|
||||
}
|
||||
}, { deep: true })
|
||||
|
||||
// 设置日期范围为最近3天
|
||||
const setLast3Days = () => {
|
||||
const endDate = new Date()
|
||||
const startDate = new Date()
|
||||
startDate.setDate(startDate.getDate() - 2) // 最近3天(包括今天)
|
||||
|
||||
const formatDateForPicker = (date: Date) => {
|
||||
const year = date.getFullYear()
|
||||
const month = String(date.getMonth() + 1).padStart(2, '0')
|
||||
const day = String(date.getDate()).padStart(2, '0')
|
||||
return `${year}-${month}-${day}`
|
||||
}
|
||||
|
||||
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
||||
|
||||
console.log('setLast3Days called, todayBids:', props.todayBids.length, 'dateRange:', dateRange.value)
|
||||
|
||||
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
||||
const start = new Date(startDate)
|
||||
start.setHours(0, 0, 0, 0)
|
||||
|
||||
let result = props.todayBids
|
||||
result = result.filter(bid => {
|
||||
if (!bid.publishDate) return false
|
||||
const bidDate = new Date(bid.publishDate)
|
||||
return bidDate >= start
|
||||
})
|
||||
|
||||
const totalBids = props.todayBids.length
|
||||
const filteredCount = result.length
|
||||
|
||||
console.log('setLast3Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
||||
if (totalBids === 0) {
|
||||
ElMessage.warning('暂无数据,请先抓取数据')
|
||||
}
|
||||
}
|
||||
|
||||
// 设置日期范围为最近7天
|
||||
const setLast7Days = () => {
|
||||
const endDate = new Date()
|
||||
const startDate = new Date()
|
||||
startDate.setDate(startDate.getDate() - 6) // 最近7天(包括今天)
|
||||
|
||||
const formatDateForPicker = (date: Date) => {
|
||||
const year = date.getFullYear()
|
||||
const month = String(date.getMonth() + 1).padStart(2, '0')
|
||||
const day = String(date.getDate()).padStart(2, '0')
|
||||
return `${year}-${month}-${day}`
|
||||
}
|
||||
|
||||
dateRange.value = [formatDateForPicker(startDate), formatDateForPicker(endDate)]
|
||||
|
||||
console.log('setLast7Days called, todayBids:', props.todayBids.length, 'dateRange:', dateRange.value)
|
||||
|
||||
// 直接计算筛选结果并显示提示(只限制开始时间,不限制结束时间)
|
||||
const start = new Date(startDate)
|
||||
start.setHours(0, 0, 0, 0)
|
||||
|
||||
let result = props.todayBids
|
||||
result = result.filter(bid => {
|
||||
if (!bid.publishDate) return false
|
||||
const bidDate = new Date(bid.publishDate)
|
||||
return bidDate >= start
|
||||
})
|
||||
|
||||
const totalBids = props.todayBids.length
|
||||
const filteredCount = result.length
|
||||
|
||||
console.log('setLast7Days result, totalBids:', totalBids, 'filteredCount:', filteredCount)
|
||||
if (totalBids === 0) {
|
||||
ElMessage.warning('暂无数据,请先抓取数据')
|
||||
}
|
||||
}
|
||||
|
||||
const handleCrawl = async () => {
|
||||
if (props.isCrawling) {
|
||||
ElMessage.warning('Crawl is already running')
|
||||
return
|
||||
}
|
||||
crawling.value = true
|
||||
try {
|
||||
await axios.post('/api/crawler/run')
|
||||
ElMessage.success('Crawl completed successfully')
|
||||
emit('refresh') // Refresh data after crawl
|
||||
} catch (error) {
|
||||
ElMessage.error('Failed to run crawl task')
|
||||
} finally {
|
||||
crawling.value = false
|
||||
}
|
||||
}
|
||||
|
||||
// 初始化时加载保存的关键字
|
||||
loadSavedKeywords()
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.card-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
</style>
|
||||
@@ -1,41 +0,0 @@
|
||||
<script setup lang="ts">
|
||||
import { ref } from 'vue'
|
||||
|
||||
defineProps<{ msg: string }>()
|
||||
|
||||
const count = ref(0)
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<h1>{{ msg }}</h1>
|
||||
|
||||
<div class="card">
|
||||
<button type="button" @click="count++">count is {{ count }}</button>
|
||||
<p>
|
||||
Edit
|
||||
<code>components/HelloWorld.vue</code> to test HMR
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<p>
|
||||
Check out
|
||||
<a href="https://vuejs.org/guide/quick-start.html#local" target="_blank"
|
||||
>create-vue</a
|
||||
>, the official Vue + Vite starter
|
||||
</p>
|
||||
<p>
|
||||
Learn more about IDE Support for Vue in the
|
||||
<a
|
||||
href="https://vuejs.org/guide/scaling-up/tooling.html#ide-support"
|
||||
target="_blank"
|
||||
>Vue Docs Scaling up Guide</a
|
||||
>.
|
||||
</p>
|
||||
<p class="read-the-docs">Click on the Vite and Vue logos to learn more</p>
|
||||
</template>
|
||||
|
||||
<style scoped>
|
||||
.read-the-docs {
|
||||
color: #888;
|
||||
}
|
||||
</style>
|
||||
107
frontend/src/components/Keywords.vue
Normal file
107
frontend/src/components/Keywords.vue
Normal file
@@ -0,0 +1,107 @@
|
||||
<template>
|
||||
<div>
|
||||
<div class="card-header" style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
|
||||
<h2>Keyword Management</h2>
|
||||
<el-button type="primary" @click="dialogVisible = true">Add Keyword</el-button>
|
||||
</div>
|
||||
|
||||
<div v-loading="loading" style="min-height: 200px;">
|
||||
<el-tag
|
||||
v-for="keyword in keywords"
|
||||
:key="keyword.id"
|
||||
closable
|
||||
:type="getTagType(keyword.weight)"
|
||||
@close="handleDeleteKeyword(keyword.id)"
|
||||
style="margin: 5px;"
|
||||
>
|
||||
{{ keyword.word }}
|
||||
</el-tag>
|
||||
<el-empty v-if="keywords.length === 0" description="No keywords" />
|
||||
</div>
|
||||
|
||||
<el-dialog v-model="dialogVisible" title="Add Keyword" width="30%">
|
||||
<el-form :model="form" label-width="120px">
|
||||
<el-form-item label="Keyword">
|
||||
<el-input v-model="form.word" />
|
||||
</el-form-item>
|
||||
<el-form-item label="Weight">
|
||||
<el-input-number v-model="form.weight" :min="1" :max="5" />
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<template #footer>
|
||||
<span class="dialog-footer">
|
||||
<el-button @click="dialogVisible = false">Cancel</el-button>
|
||||
<el-button type="primary" @click="handleAddKeyword">Confirm</el-button>
|
||||
</span>
|
||||
</template>
|
||||
</el-dialog>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { ref, reactive } from 'vue'
|
||||
import axios from 'axios'
|
||||
import { ElMessage } from 'element-plus'
|
||||
|
||||
interface Props {
|
||||
keywords: any[]
|
||||
loading: boolean
|
||||
}
|
||||
|
||||
const props = defineProps<Props>()
|
||||
|
||||
const emit = defineEmits<{
|
||||
refresh: []
|
||||
}>()
|
||||
|
||||
const dialogVisible = ref(false)
|
||||
|
||||
const form = reactive({
|
||||
word: '',
|
||||
weight: 1
|
||||
})
|
||||
|
||||
// 根据 weight 获取 tag 类型
|
||||
const getTagType = (weight: number) => {
|
||||
if (weight >= 5) return 'danger'
|
||||
if (weight >= 4) return 'warning'
|
||||
if (weight >= 3) return 'primary'
|
||||
if (weight >= 2) return 'success'
|
||||
return 'info'
|
||||
}
|
||||
|
||||
const handleAddKeyword = async () => {
|
||||
if (!form.word) {
|
||||
ElMessage.warning('Please enter a keyword')
|
||||
return
|
||||
}
|
||||
try {
|
||||
await axios.post('/api/keywords', form)
|
||||
ElMessage.success('Keyword added')
|
||||
dialogVisible.value = false
|
||||
form.word = ''
|
||||
form.weight = 1
|
||||
emit('refresh')
|
||||
} catch (error) {
|
||||
ElMessage.error('Failed to add keyword')
|
||||
}
|
||||
}
|
||||
|
||||
const handleDeleteKeyword = async (id: string) => {
|
||||
try {
|
||||
await axios.delete(`/api/keywords/${id}`)
|
||||
ElMessage.success('Keyword deleted')
|
||||
emit('refresh')
|
||||
} catch (error) {
|
||||
ElMessage.error('Failed to delete keyword')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.card-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
</style>
|
||||
13
jest.config.js
Normal file
13
jest.config.js
Normal file
@@ -0,0 +1,13 @@
|
||||
module.exports = {
|
||||
moduleFileExtensions: ['js', 'json', 'ts'],
|
||||
rootDir: 'src',
|
||||
testRegex: '.*\\.spec\\.ts$',
|
||||
transform: {
|
||||
'^.+\\.(t|j)s$': 'ts-jest',
|
||||
},
|
||||
collectCoverageFrom: ['**/*.(t|j)s'],
|
||||
coverageDirectory: '../coverage',
|
||||
testEnvironment: 'node',
|
||||
// 加载环境变量
|
||||
setupFiles: ['<rootDir>/../jest.setup.js'],
|
||||
};
|
||||
2
jest.setup.js
Normal file
2
jest.setup.js
Normal file
@@ -0,0 +1,2 @@
|
||||
// 加载环境变量
|
||||
require('dotenv').config({ path: '.env' });
|
||||
19
package.json
19
package.json
@@ -19,6 +19,7 @@
|
||||
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
|
||||
"test:e2e": "jest --config ./test/jest-e2e.json",
|
||||
"crawl": "ts-node -r tsconfig-paths/register src/scripts/crawl.ts",
|
||||
"update-source": "ts-node -r tsconfig-paths/register src/scripts/update-source.ts",
|
||||
"web":"npm --prefix frontend run build"
|
||||
},
|
||||
"dependencies": {
|
||||
@@ -32,6 +33,7 @@
|
||||
"axios": "^1.13.2",
|
||||
"class-transformer": "^0.5.1",
|
||||
"class-validator": "^0.14.3",
|
||||
"dotenv": "^16.4.7",
|
||||
"mysql2": "^3.16.0",
|
||||
"puppeteer": "^24.34.0",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
@@ -66,22 +68,5 @@
|
||||
"tsconfig-paths": "^4.2.0",
|
||||
"typescript": "^5.7.3",
|
||||
"typescript-eslint": "^8.20.0"
|
||||
},
|
||||
"jest": {
|
||||
"moduleFileExtensions": [
|
||||
"js",
|
||||
"json",
|
||||
"ts"
|
||||
],
|
||||
"rootDir": "src",
|
||||
"testRegex": ".*\\.spec\\.ts$",
|
||||
"transform": {
|
||||
"^.+\\.(t|j)s$": "ts-jest"
|
||||
},
|
||||
"collectCoverageFrom": [
|
||||
"**/*.(t|j)s"
|
||||
],
|
||||
"coverageDirectory": "../coverage",
|
||||
"testEnvironment": "node"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@ import { CgnpcCrawler } from './cgnpc_target';
|
||||
import { CeicCrawler } from './ceic_target';
|
||||
import { EspicCrawler } from './espic_target';
|
||||
import { PowerbeijingCrawler } from './powerbeijing_target';
|
||||
import { SdiccCrawler } from './sdicc_target';
|
||||
import { CnoocCrawler } from './cnooc_target';
|
||||
|
||||
@Injectable()
|
||||
export class BidCrawlerService {
|
||||
@@ -65,7 +67,7 @@ export class BidCrawlerService {
|
||||
args,
|
||||
});
|
||||
|
||||
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler, CnncecpCrawler, CgnpcCrawler, CeicCrawler, EspicCrawler, PowerbeijingCrawler];
|
||||
const crawlers = [ChdtpCrawler, ChngCrawler, SzecpCrawler, CdtCrawler, EpsCrawler, CnncecpCrawler, CgnpcCrawler, CeicCrawler, EspicCrawler, PowerbeijingCrawler, SdiccCrawler, CnoocCrawler];
|
||||
|
||||
try {
|
||||
for (const crawler of crawlers) {
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CdtCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -137,9 +137,18 @@ export const CdtCrawler = {
|
||||
logger.log('Simulating human scrolling...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// 等待表格加载完成
|
||||
logger.log('Waiting for table to load...');
|
||||
await page.waitForSelector('table.layui-table', { timeout: 30000 });
|
||||
|
||||
while (currentPage <= maxPages) {
|
||||
// 等待表格数据加载
|
||||
await page.waitForSelector('tbody tr', { timeout: 10000 });
|
||||
|
||||
// 获取当前页面的 HTML 内容
|
||||
const content = await page.content();
|
||||
const pageResults = this.extract(content);
|
||||
|
||||
if (pageResults.length === 0) {
|
||||
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
||||
break;
|
||||
@@ -155,24 +164,45 @@ export const CdtCrawler = {
|
||||
logger.log('Simulating human scrolling before pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// Find the "Next Page" button - layui pagination
|
||||
// 查找下一页按钮
|
||||
const nextButtonSelector = 'a.layui-laypage-next:not(.layui-disabled)';
|
||||
const nextButton = await page.$(nextButtonSelector);
|
||||
const nextButtonExists = await page.evaluate((selector) => {
|
||||
const btn = document.querySelector(selector);
|
||||
return btn !== null && !btn.classList.contains('layui-disabled');
|
||||
}, nextButtonSelector);
|
||||
|
||||
if (!nextButton) {
|
||||
logger.log('Next page button not found. Reached end of list.');
|
||||
if (!nextButtonExists) {
|
||||
logger.log('Next page button not found or disabled. Reached end of list.');
|
||||
break;
|
||||
}
|
||||
|
||||
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||
|
||||
try {
|
||||
await Promise.all([
|
||||
page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }),
|
||||
nextButton.click(),
|
||||
]);
|
||||
} catch (navError) {
|
||||
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||
// 点击下一页按钮
|
||||
await page.evaluate((selector) => {
|
||||
const btn = document.querySelector(selector) as HTMLElement;
|
||||
if (btn) btn.click();
|
||||
}, nextButtonSelector);
|
||||
|
||||
// 等待 AJAX 请求完成(通过监听网络请求)
|
||||
await page.waitForFunction(() => {
|
||||
// 检查表格是否正在加载
|
||||
const loading = document.querySelector('.layui-table-loading');
|
||||
return !loading;
|
||||
}, { timeout: 30000 }).catch(() => {});
|
||||
|
||||
// 额外等待确保数据加载完成
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// 检查是否真的翻页了(通过检查当前页码)
|
||||
const currentActivePage = await page.evaluate(() => {
|
||||
const activeSpan = document.querySelector('.layui-laypage-curr em:last-child');
|
||||
return activeSpan ? parseInt(activeSpan.textContent || '1') : 1;
|
||||
});
|
||||
|
||||
if (currentActivePage <= currentPage) {
|
||||
logger.log('Page did not change, stopping.');
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -188,6 +218,11 @@ export const CdtCrawler = {
|
||||
// Random delay between pages
|
||||
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
|
||||
} catch (navError) {
|
||||
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return allResults;
|
||||
|
||||
@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||
jest.setTimeout(120000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CeicCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Run in non-headless mode
|
||||
args: [
|
||||
@@ -16,6 +38,7 @@ describe('CeicCrawler Real Site Test', () => {
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--window-size=1920,1080',
|
||||
'--disable-infobars',
|
||||
...proxyArgs,
|
||||
],
|
||||
defaultViewport: null
|
||||
});
|
||||
|
||||
@@ -48,7 +48,7 @@ async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
}
|
||||
|
||||
export const CeicCrawler = {
|
||||
name: '大连能源采购平台',
|
||||
name: '国家能源集团生态协作平台',
|
||||
url: 'https://ceic.dlnyzb.com/3001',
|
||||
baseUrl: 'https://ceic.dlnyzb.com',
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CgnpcCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('ChdtpCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: true, // Change to false to see the browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ export interface ChdtpResult {
|
||||
}
|
||||
|
||||
export const ChdtpCrawler = {
|
||||
name: '中国华能集团',
|
||||
name: '华电集团电子商务平台 ',
|
||||
url: 'https://www.chdtp.com/webs/queryWebZbgg.action?zbggType=1',
|
||||
baseUrl: 'https://www.chdtp.com/webs/',
|
||||
|
||||
|
||||
@@ -4,6 +4,23 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||
jest.setTimeout(120000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
// 模拟人类鼠标移动
|
||||
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||
const viewport = page.viewport();
|
||||
@@ -53,6 +70,11 @@ describe('ChngCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Run in non-headless mode
|
||||
args: [
|
||||
@@ -61,6 +83,7 @@ describe('ChngCrawler Real Site Test', () => {
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--window-size=1920,1080',
|
||||
"--disable-infobars",
|
||||
...proxyArgs,
|
||||
// "--headless=new",
|
||||
// '--disable-dev-shm-usage',
|
||||
// '--disable-accelerated-2d-canvas',
|
||||
|
||||
@@ -9,6 +9,23 @@ puppeteer.use(StealthPlugin());
|
||||
// Increase timeout to 180 seconds for slow sites and stealth mode
|
||||
jest.setTimeout(180000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
// 模拟人类鼠标移动
|
||||
async function simulateHumanMouseMovement(page: Page) {
|
||||
const viewport = page.viewport();
|
||||
@@ -58,6 +75,11 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
|
||||
let browser: Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: true, // 使用 headless 模式
|
||||
args: [
|
||||
@@ -73,6 +95,7 @@ describe('ChngCrawler Stealth Test (Headless Mode with Stealth Plugin)', () => {
|
||||
'--disable-gpu',
|
||||
'--disable-features=VizDisplayCompositor',
|
||||
'--disable-webgl',
|
||||
...proxyArgs,
|
||||
],
|
||||
defaultViewport: null
|
||||
});
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CnncecpCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
73
src/crawler/services/cnooc_target.spec.ts
Normal file
73
src/crawler/services/cnooc_target.spec.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import { CnoocCrawler } from './cnooc_target';
|
||||
import * as puppeteer from 'puppeteer';
|
||||
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('CnoocCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('should visit website and list all found bid information', async () => {
|
||||
console.log(`\nStarting crawl for: ${CnoocCrawler.name}`);
|
||||
console.log(`Target URL: ${CnoocCrawler.url}`);
|
||||
|
||||
const results = await CnoocCrawler.crawl(browser);
|
||||
|
||||
console.log(`\nSuccessfully found ${results.length} items:\n`);
|
||||
console.log('----------------------------------------');
|
||||
results.forEach((item, index) => {
|
||||
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
|
||||
console.log(` Link: ${item.url}`);
|
||||
console.log('----------------------------------------');
|
||||
});
|
||||
|
||||
// Basic assertions to ensure crawler is working
|
||||
expect(results).toBeDefined();
|
||||
expect(Array.isArray(results)).toBeTruthy();
|
||||
// Warn but don't fail if site returns 0 items (could be empty or changed structure)
|
||||
if (results.length === 0) {
|
||||
console.warn('Warning: No items found. Check if website structure has changed or if list is currently empty.');
|
||||
} else {
|
||||
// Check data integrity of first item
|
||||
const firstItem = results[0];
|
||||
expect(firstItem.title).toBeTruthy();
|
||||
expect(firstItem.url).toMatch(/^https?:\/\//);
|
||||
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
||||
}
|
||||
});
|
||||
});
|
||||
195
src/crawler/services/cnooc_target.ts
Normal file
195
src/crawler/services/cnooc_target.ts
Normal file
@@ -0,0 +1,195 @@
|
||||
import * as puppeteer from 'puppeteer';
|
||||
import { Logger } from '@nestjs/common';
|
||||
|
||||
// 模拟人类鼠标移动
|
||||
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||
const viewport = page.viewport();
|
||||
if (!viewport) return;
|
||||
|
||||
const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动
|
||||
|
||||
for (let i = 0; i < movements; i++) {
|
||||
const x = Math.floor(Math.random() * viewport.width);
|
||||
const y = Math.floor(Math.random() * viewport.height);
|
||||
|
||||
await page.mouse.move(x, y, {
|
||||
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
||||
});
|
||||
|
||||
// 随机停顿 100-500ms
|
||||
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
||||
}
|
||||
}
|
||||
|
||||
// 模拟人类滚动
|
||||
async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动
|
||||
|
||||
for (let i = 0; i < scrollCount; i++) {
|
||||
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
||||
|
||||
await page.evaluate((distance) => {
|
||||
window.scrollBy({
|
||||
top: distance,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}, scrollDistance);
|
||||
|
||||
// 随机停顿 500-1500ms
|
||||
await new Promise(r => setTimeout(r, 500 + Math.random() * 1000));
|
||||
}
|
||||
|
||||
// 滚动回顶部
|
||||
await page.evaluate(() => {
|
||||
window.scrollTo({ top: 0, behavior: 'smooth' });
|
||||
});
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
export interface CnoocResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
url: string;
|
||||
}
|
||||
|
||||
export const CnoocCrawler = {
|
||||
name: '中海油招标平台',
|
||||
url: 'https://buy.cnooc.com.cn/cbjyweb/001/001001/moreinfo.html',
|
||||
baseUrl: 'https://buy.cnooc.com.cn',
|
||||
|
||||
async crawl(browser: puppeteer.Browser): Promise<CnoocResult[]> {
|
||||
const logger = new Logger('CnoocCrawler');
|
||||
const page = await browser.newPage();
|
||||
|
||||
const username = process.env.PROXY_USERNAME;
|
||||
const password = process.env.PROXY_PASSWORD;
|
||||
if (username && password) {
|
||||
await page.authenticate({ username, password });
|
||||
}
|
||||
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||
Object.defineProperty(navigator, 'language', { get: () => "zh-CN"});
|
||||
Object.defineProperty(navigator, 'plugins', { get: () => [1,2,3,4,5]});
|
||||
});
|
||||
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36');
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
const allResults: CnoocResult[] = [];
|
||||
let currentPage = 1;
|
||||
const maxPages = 5;
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
while (currentPage <= maxPages) {
|
||||
logger.log(`Processing page ${currentPage}...`);
|
||||
|
||||
const content = await page.content();
|
||||
const pageResults = this.extract(content);
|
||||
|
||||
if (pageResults.length === 0) {
|
||||
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
||||
break;
|
||||
}
|
||||
|
||||
allResults.push(...pageResults);
|
||||
logger.log(`Extracted ${pageResults.length} items from page ${currentPage}`);
|
||||
|
||||
// 模拟人类行为 - 翻页前
|
||||
logger.log('Simulating human mouse movements before pagination...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling before pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// 查找下一页按钮 - 中海油使用特定的分页结构
|
||||
// 下一页链接格式: <a href="https://buy.cnooc.com.cn/cbjyweb/001/001001/2.html" class="pageLink">下页 ></a>
|
||||
const nextButtonSelector = 'a.pageLink[href*="/cbjyweb/001/001001/"]';
|
||||
const nextButton = await page.$(nextButtonSelector);
|
||||
|
||||
if (!nextButton) {
|
||||
logger.log('Next page button not found. Reached end of list.');
|
||||
break;
|
||||
}
|
||||
|
||||
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||
|
||||
try {
|
||||
// 点击下一页按钮
|
||||
await nextButton.click();
|
||||
await new Promise(r => setTimeout(r, 3000)); // 等待页面加载
|
||||
} catch (navError) {
|
||||
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
currentPage++;
|
||||
|
||||
// 模拟人类行为 - 翻页后
|
||||
logger.log('Simulating human mouse movements after pagination...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling after pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// Random delay between pages
|
||||
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
}
|
||||
|
||||
return allResults;
|
||||
|
||||
} catch (error) {
|
||||
logger.error(`Failed to crawl ${this.name}: ${error.message}`);
|
||||
return allResults;
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
},
|
||||
|
||||
extract(html: string): CnoocResult[] {
|
||||
const results: CnoocResult[] = [];
|
||||
/**
|
||||
* Regex groups for buy.cnooc.com.cn:
|
||||
* 1: URL (href属性)
|
||||
* 2: Title (font标签内的文本)
|
||||
* 3: Date (发布时间,格式:2026-01-12)
|
||||
*
|
||||
* HTML结构示例:
|
||||
* <li class="now-hd-items clearfix">
|
||||
* <a href="https://buy.cnooc.com.cn/cbjyweb/001/001001/20260112/1460280812582768641-zhy.html" target="_blank" class="now-link" title="...">
|
||||
* <font style="font-weight:bold">中海油服-物探事业部2026-2028年度海事许可办理及码头服务(二次)</font>
|
||||
* </a>
|
||||
* <span class="now-span" style="width:100px">2026-01-12</span>
|
||||
* </li>
|
||||
*/
|
||||
const regex = /<li class="now-hd-items clearfix">[\s\S]*?<a[^>]*href="([^"]*)"[^>]*>[\s\S]*?<font[^>]*>([^<]*)<\/font>[\s\S]*?<span class="now-span"[^>]*>\s*(\d{4}-\d{2}-\d{2})\s*<\/span>[\s\S]*?<\/li>/gs;
|
||||
|
||||
let match;
|
||||
while ((match = regex.exec(html)) !== null) {
|
||||
const url = match[1]?.trim();
|
||||
const title = match[2]?.trim();
|
||||
const dateStr = match[3]?.trim();
|
||||
|
||||
if (title && url) {
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: url.startsWith('http') ? url : this.baseUrl + url
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
};
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('EpsCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('EspicCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ export interface EspicResult {
|
||||
}
|
||||
|
||||
export const EspicCrawler = {
|
||||
name: '电能e招采平台',
|
||||
name: '电能e招采平台(国电投)',
|
||||
baseUrl: 'https://ebid.espic.com.cn',
|
||||
|
||||
// 生成动态 URL,使用当前日期
|
||||
|
||||
@@ -4,13 +4,35 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('PowerbeijingCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ export interface PowerbeijingResult {
|
||||
}
|
||||
|
||||
export const PowerbeijingCrawler = {
|
||||
name: '北京电力交易平台',
|
||||
name: '北京京能电子商务平台',
|
||||
url: 'https://www.powerbeijing-ec.com/jncms/search/bulletin.html?dates=300&categoryId=2&tabName=%E6%8B%9B%E6%A0%87%E5%85%AC%E5%91%8A&page=1',
|
||||
baseUrl: 'https://www.powerbeijing-ec.com',
|
||||
|
||||
|
||||
73
src/crawler/services/sdicc_target.spec.ts
Normal file
73
src/crawler/services/sdicc_target.spec.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import { SdiccCrawler } from './sdicc_target';
|
||||
import * as puppeteer from 'puppeteer';
|
||||
|
||||
// Increase timeout to 60 seconds for network operations
|
||||
jest.setTimeout(60000*5);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('SdiccCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Change to false to see browser UI
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox', ...proxyArgs],
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('should visit website and list all found bid information', async () => {
|
||||
console.log(`\nStarting crawl for: ${SdiccCrawler.name}`);
|
||||
console.log(`Target URL: ${SdiccCrawler.url}`);
|
||||
|
||||
const results = await SdiccCrawler.crawl(browser);
|
||||
|
||||
console.log(`\nSuccessfully found ${results.length} items:\n`);
|
||||
console.log('----------------------------------------');
|
||||
results.forEach((item, index) => {
|
||||
console.log(`${index + 1}. [${item.publishDate.toLocaleDateString()}] ${item.title}`);
|
||||
console.log(` Link: ${item.url}`);
|
||||
console.log('----------------------------------------');
|
||||
});
|
||||
|
||||
// Basic assertions to ensure crawler is working
|
||||
expect(results).toBeDefined();
|
||||
expect(Array.isArray(results)).toBeTruthy();
|
||||
// Warn but don't fail if site returns 0 items (could be empty or changed structure)
|
||||
if (results.length === 0) {
|
||||
console.warn('Warning: No items found. Check if website structure has changed or if list is currently empty.');
|
||||
} else {
|
||||
// Check data integrity of first item
|
||||
const firstItem = results[0];
|
||||
expect(firstItem.title).toBeTruthy();
|
||||
expect(firstItem.url).toMatch(/^https?:\/\//);
|
||||
expect(firstItem.publishDate).toBeInstanceOf(Date);
|
||||
}
|
||||
});
|
||||
});
|
||||
203
src/crawler/services/sdicc_target.ts
Normal file
203
src/crawler/services/sdicc_target.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
import * as puppeteer from 'puppeteer';
|
||||
import { Logger } from '@nestjs/common';
|
||||
|
||||
// 模拟人类鼠标移动
|
||||
async function simulateHumanMouseMovement(page: puppeteer.Page) {
|
||||
const viewport = page.viewport();
|
||||
if (!viewport) return;
|
||||
|
||||
const movements = 5 + Math.floor(Math.random() * 5); // 5-10次随机移动
|
||||
|
||||
for (let i = 0; i < movements; i++) {
|
||||
const x = Math.floor(Math.random() * viewport.width);
|
||||
const y = Math.floor(Math.random() * viewport.height);
|
||||
|
||||
await page.mouse.move(x, y, {
|
||||
steps: 10 + Math.floor(Math.random() * 20) // 10-30步,使移动更平滑
|
||||
});
|
||||
|
||||
// 随机停顿 100-500ms
|
||||
await new Promise(r => setTimeout(r, 100 + Math.random() * 400));
|
||||
}
|
||||
}
|
||||
|
||||
// 模拟人类滚动
|
||||
async function simulateHumanScrolling(page: puppeteer.Page) {
|
||||
const scrollCount = 3 + Math.floor(Math.random() * 5); // 3-7次滚动
|
||||
|
||||
for (let i = 0; i < scrollCount; i++) {
|
||||
const scrollDistance = 100 + Math.floor(Math.random() * 400); // 100-500px
|
||||
|
||||
await page.evaluate((distance) => {
|
||||
window.scrollBy({
|
||||
top: distance,
|
||||
behavior: 'smooth'
|
||||
});
|
||||
}, scrollDistance);
|
||||
|
||||
// 随机停顿 500-1500ms
|
||||
await new Promise(r => setTimeout(r, 500 + Math.random() * 1000));
|
||||
}
|
||||
|
||||
// 滚动回顶部
|
||||
await page.evaluate(() => {
|
||||
window.scrollTo({ top: 0, behavior: 'smooth' });
|
||||
});
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
export interface SdiccResult {
|
||||
title: string;
|
||||
publishDate: Date;
|
||||
url: string;
|
||||
}
|
||||
|
||||
export const SdiccCrawler = {
|
||||
name: '国投集团电子采购平台',
|
||||
url: 'https://www.sdicc.com.cn/cgxx/ggList',
|
||||
baseUrl: 'https://www.sdicc.com.cn',
|
||||
|
||||
async crawl(browser: puppeteer.Browser): Promise<SdiccResult[]> {
|
||||
const logger = new Logger('SdiccCrawler');
|
||||
const page = await browser.newPage();
|
||||
|
||||
const username = process.env.PROXY_USERNAME;
|
||||
const password = process.env.PROXY_PASSWORD;
|
||||
if (username && password) {
|
||||
await page.authenticate({ username, password });
|
||||
}
|
||||
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||
Object.defineProperty(navigator, 'language', { get: () => "zh-CN"});
|
||||
Object.defineProperty(navigator, 'plugins', { get: () => [1,2,3,4,5]});
|
||||
});
|
||||
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36');
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
const allResults: SdiccResult[] = [];
|
||||
let currentPage = 1;
|
||||
const maxPages = 5;
|
||||
|
||||
try {
|
||||
logger.log(`Navigating to ${this.url}...`);
|
||||
await page.goto(this.url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
|
||||
// 模拟人类行为
|
||||
logger.log('Simulating human mouse movements...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// 等待表格加载
|
||||
logger.log('Waiting for table to load...');
|
||||
await page.waitForSelector('.tbody table tbody tr', { timeout: 30000 }).catch(() => {
|
||||
logger.warn('Table rows not found, trying alternative selectors...');
|
||||
});
|
||||
|
||||
while (currentPage <= maxPages) {
|
||||
logger.log(`Processing page ${currentPage}...`);
|
||||
|
||||
const content = await page.content();
|
||||
const pageResults = this.extract(content);
|
||||
|
||||
if (pageResults.length === 0) {
|
||||
logger.warn(`No results found on page ${currentPage}, stopping.`);
|
||||
break;
|
||||
}
|
||||
|
||||
allResults.push(...pageResults);
|
||||
logger.log(`Extracted ${pageResults.length} items from page ${currentPage}`);
|
||||
|
||||
// 模拟人类行为 - 翻页前
|
||||
logger.log('Simulating human mouse movements before pagination...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling before pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// 查找下一页按钮
|
||||
const nextButtonSelector = '#page_btnLas';
|
||||
const nextButton = await page.$(nextButtonSelector);
|
||||
|
||||
if (!nextButton) {
|
||||
logger.log('Next page button not found. Reached end of list.');
|
||||
break;
|
||||
}
|
||||
|
||||
logger.log(`Navigating to page ${currentPage + 1}...`);
|
||||
|
||||
try {
|
||||
// 点击下一页按钮
|
||||
await nextButton.click();
|
||||
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 60000 }).catch(() => {});
|
||||
await new Promise(r => setTimeout(r, 2000)); // 额外等待确保数据加载完成
|
||||
} catch (navError) {
|
||||
logger.error(`Navigation to page ${currentPage + 1} failed: ${navError.message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
currentPage++;
|
||||
|
||||
// 模拟人类行为 - 翻页后
|
||||
logger.log('Simulating human mouse movements after pagination...');
|
||||
await simulateHumanMouseMovement(page);
|
||||
|
||||
logger.log('Simulating human scrolling after pagination...');
|
||||
await simulateHumanScrolling(page);
|
||||
|
||||
// Random delay between pages
|
||||
const delay = Math.floor(Math.random() * (3000 - 1000 + 1)) + 1000;
|
||||
await new Promise(resolve => setTimeout(resolve, delay));
|
||||
}
|
||||
|
||||
return allResults;
|
||||
|
||||
} catch (error) {
|
||||
logger.error(`Failed to crawl ${this.name}: ${error.message}`);
|
||||
return allResults;
|
||||
} finally {
|
||||
await page.close();
|
||||
}
|
||||
},
|
||||
|
||||
extract(html: string): SdiccResult[] {
|
||||
const results: SdiccResult[] = [];
|
||||
/**
|
||||
* Regex groups for sdicc.com.cn:
|
||||
* 1: Title (公告名称/项目名称)
|
||||
* 2: Date (发布时间,格式:2026-01-09)
|
||||
* 3: gcGuid (项目GUID)
|
||||
* 4: ggGuid (公告GUID)
|
||||
*
|
||||
* HTML结构示例:
|
||||
* <tr onclick="urlChange('a853e226-09bd-441c-8f05-badb945932f0','ec2ccdd8-1464-4a96-ad99-24a5396d028c')">
|
||||
* <td colspan="1" rowspan="1" style="text-align: center;">1</td>
|
||||
* <td colspan="1" rowspan="1"><span style="margin-right: 5px;">国投罗钾公司硫酸钾厂球磨机控制系统升级项目公开招标公告</span></td>
|
||||
* <td colspan="1" rowspan="1"><span>服务</span></td>
|
||||
* <td colspan="1" rowspan="1"><span> 2026-01-09 </span></td>
|
||||
* </tr>
|
||||
*/
|
||||
const regex = /<tr[^>]*onclick="urlChange\('([^']+)','([^']+)'\)"[^>]*>[\s\S]*?<td[^>]*><span[^>]*>([^<]+)<\/span><\/td>[\s\S]*?<td[^>]*><span[^>]*>\s*(\d{4}-\d{2}-\d{2})\s*<\/span><\/td>[\s\S]*?<\/tr>/gs;
|
||||
|
||||
let match;
|
||||
while ((match = regex.exec(html)) !== null) {
|
||||
const ggGuid = match[1]?.trim();
|
||||
const gcGuid = match[2]?.trim();
|
||||
const title = match[3]?.trim();
|
||||
const dateStr = match[4]?.trim();
|
||||
|
||||
if (title && ggGuid && gcGuid) {
|
||||
results.push({
|
||||
title,
|
||||
publishDate: dateStr ? new Date(dateStr) : new Date(),
|
||||
url: `${this.baseUrl}/cgxx/ggDetail?gcGuid=${gcGuid}&ggGuid=${ggGuid}`
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
};
|
||||
@@ -4,10 +4,32 @@ import * as puppeteer from 'puppeteer';
|
||||
// Increase timeout to 120 seconds for manual inspection and slow sites
|
||||
jest.setTimeout(120000);
|
||||
|
||||
// 获取代理配置
|
||||
const getProxyArgs = (): string[] => {
|
||||
const proxyHost = process.env.PROXY_HOST;
|
||||
const proxyPort = process.env.PROXY_PORT;
|
||||
const proxyUsername = process.env.PROXY_USERNAME;
|
||||
const proxyPassword = process.env.PROXY_PASSWORD;
|
||||
|
||||
if (proxyHost && proxyPort) {
|
||||
const args = [`--proxy-server=${proxyHost}:${proxyPort}`];
|
||||
if (proxyUsername && proxyPassword) {
|
||||
args.push(`--proxy-auth=${proxyUsername}:${proxyPassword}`);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
describe('SzecpCrawler Real Site Test', () => {
|
||||
let browser: puppeteer.Browser;
|
||||
|
||||
beforeAll(async () => {
|
||||
const proxyArgs = getProxyArgs();
|
||||
if (proxyArgs.length > 0) {
|
||||
console.log('Using proxy:', proxyArgs.join(' '));
|
||||
}
|
||||
|
||||
browser = await puppeteer.launch({
|
||||
headless: false, // Run in non-headless mode
|
||||
args: [
|
||||
@@ -16,6 +38,7 @@ describe('SzecpCrawler Real Site Test', () => {
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--window-size=1920,1080',
|
||||
'--disable-infobars',
|
||||
...proxyArgs,
|
||||
],
|
||||
defaultViewport: null
|
||||
});
|
||||
|
||||
@@ -14,8 +14,8 @@ export class BidCrawlTask {
|
||||
|
||||
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
||||
async handleCron() {
|
||||
this.logger.debug('Scheduled crawl task started');
|
||||
await this.crawlerService.crawlAll();
|
||||
// this.logger.debug('Scheduled crawl task started');
|
||||
// await this.crawlerService.crawlAll();
|
||||
}
|
||||
|
||||
@Cron(CronExpression.EVERY_DAY_AT_MIDNIGHT)
|
||||
|
||||
57
src/scripts/update-source.ts
Normal file
57
src/scripts/update-source.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { NestFactory } from '@nestjs/core';
|
||||
import { AppModule } from '../app.module';
|
||||
import { getRepositoryToken } from '@nestjs/typeorm';
|
||||
import { Repository } from 'typeorm';
|
||||
import { BidItem } from '../bids/entities/bid-item.entity';
|
||||
import { CustomLogger } from '../common/logger/logger.service';
|
||||
|
||||
async function updateSource() {
|
||||
const app = await NestFactory.createApplicationContext(AppModule);
|
||||
|
||||
// 设置自定义 logger
|
||||
const logger = await app.resolve(CustomLogger);
|
||||
app.useLogger(logger);
|
||||
logger.setContext('UpdateSourceScript');
|
||||
|
||||
try {
|
||||
// 获取 BidItem 的 repository
|
||||
const bidItemRepository = app.get<Repository<BidItem>>(getRepositoryToken(BidItem));
|
||||
|
||||
const oldSource = '北京电力交易平台';
|
||||
const newSource = '北京京能电子商务平台';
|
||||
|
||||
logger.log(`开始更新 source 字段: "${oldSource}" -> "${newSource}"`);
|
||||
|
||||
// 查找需要更新的记录数量
|
||||
const count = await bidItemRepository.count({
|
||||
where: { source: oldSource },
|
||||
});
|
||||
|
||||
logger.log(`找到 ${count} 条需要更新的记录`);
|
||||
|
||||
if (count === 0) {
|
||||
logger.log('没有需要更新的记录');
|
||||
await app.close();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// 执行更新
|
||||
const result = await bidItemRepository
|
||||
.createQueryBuilder()
|
||||
.update(BidItem)
|
||||
.set({ source: newSource })
|
||||
.where('source = :oldSource', { oldSource })
|
||||
.execute();
|
||||
|
||||
logger.log(`成功更新 ${result.affected} 条记录`);
|
||||
|
||||
await app.close();
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
logger.error('更新失败:', error);
|
||||
await app.close();
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
updateSource();
|
||||
9
test/jest-e2e.json
Normal file
9
test/jest-e2e.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"moduleFileExtensions": ["js", "json", "ts"],
|
||||
"rootDir": ".",
|
||||
"testEnvironment": "node",
|
||||
"testRegex": ".e2e-spec.ts$",
|
||||
"transform": {
|
||||
"^.+\\.(t|j)s$": "ts-jest"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user