From 211e94549152b15d2408175682f5ad3713b9c5cd Mon Sep 17 00:00:00 2001 From: nczitzk <42264778+nczitzk@users.noreply.github.com> Date: Mon, 6 Mar 2023 01:11:41 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(route):=20add=20=E6=B5=99=E6=B1=9F?= =?UTF-8?q?=E5=9C=A8=E7=BA=BF=E6=B5=99=E6=8A=A5=E9=9B=86=E5=9B=A2=E7=B3=BB?= =?UTF-8?q?=E5=88=97=E6=8A=A5=E5=88=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/traditional-media.md | 12 ++++++ lib/v2/zjol/maintainer.js | 3 ++ lib/v2/zjol/paper.js | 85 +++++++++++++++++++++++++++++++++++++++ lib/v2/zjol/radar.js | 61 ++++++++++++++++++++++++++++ lib/v2/zjol/router.js | 3 ++ 5 files changed, 164 insertions(+) create mode 100644 lib/v2/zjol/maintainer.js create mode 100644 lib/v2/zjol/paper.js create mode 100644 lib/v2/zjol/radar.js create mode 100644 lib/v2/zjol/router.js diff --git a/docs/traditional-media.md b/docs/traditional-media.md index 862784f6802469..10e86230e1e568 100644 --- a/docs/traditional-media.md +++ b/docs/traditional-media.md @@ -2433,6 +2433,18 @@ category 对应的关键词有 +## 浙江在线 + +### 浙报集团系列报刊 + + + +| 浙江日报 | 钱江晚报 | 美术报 | 浙江老年报 | 浙江法制报 | 江南游报 | +| ---- | ---- | --- | ----- | ----- | ---- | +| zjrb | qjwb | msb | zjlnb | zjfzb | jnyb | + + + ## 中国日报 ### 英语点津 diff --git a/lib/v2/zjol/maintainer.js b/lib/v2/zjol/maintainer.js new file mode 100644 index 00000000000000..f146840f72f871 --- /dev/null +++ b/lib/v2/zjol/maintainer.js @@ -0,0 +1,3 @@ +module.exports = { + '/paper/:id?': ['nczitzk'], +}; diff --git a/lib/v2/zjol/paper.js b/lib/v2/zjol/paper.js new file mode 100644 index 00000000000000..85d5e4e51c65bd --- /dev/null +++ b/lib/v2/zjol/paper.js @@ -0,0 +1,85 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); + +module.exports = async (ctx) => { + const id = ctx.params.id ?? 'zjrb'; + const limit = ctx.query.limit ? parseInt(ctx.query.limit) : 100; + + const query = id === 'jnyb' ? 'map[name="PagePicMap"] area' : 'ul.main-ed-articlenav-list li a'; + + const rootUrl = id === 'qjwb' ? 'http://qjwb.thehour.cn' : `https://${id}.zjol.com.cn`; + let currentUrl = `${rootUrl}/paperindex.htm`; + + let response = await got({ + method: 'get', + url: currentUrl, + }); + + const url = response.data.match(/URL=(.*)"/)[1]; + const pubDate = parseDate(url.match(/(\d{4}-\d{2}\/\d{2})/)[1], 'YYYY-MM/DD'); + + currentUrl = `${rootUrl}/${url.replace(`/${url.split('/').pop()}`, '')}`; + + response = await got({ + method: 'get', + url: `${rootUrl}/${url}`, + }); + + const $ = cheerio.load(response.data); + + let items = $(query) + .toArray() + .map((a) => `${currentUrl}/${$(a).attr('href').split('?')[0]}`); + + await Promise.all( + $('#pageLink') + .slice(1) + .toArray() + .map((p) => `${currentUrl}/${$(p).attr('href')}`) + .map(async (p) => { + const pageResponse = await got({ + method: 'get', + url: p, + }); + + const page = cheerio.load(pageResponse.data); + + items.push( + ...page(query) + .toArray() + .map((a) => `${currentUrl}/${page(a).attr('href').split('?')[0]}`) + ); + }) + ); + + items = await Promise.all( + items.slice(0, limit).map((link) => + ctx.cache.tryGet(link, async () => { + const detailResponse = await got({ + method: 'get', + url: link, + }); + + const content = cheerio.load(detailResponse.data); + + const title = content('.main-article-title').text(); + + content('.main-article-alltitle').remove(); + + return { + link, + title, + pubDate, + description: content('.main-article-content').html(), + }; + }) + ) + ); + + ctx.state.data = { + title: $('title').text(), + link: rootUrl, + item: items, + }; +}; diff --git a/lib/v2/zjol/radar.js b/lib/v2/zjol/radar.js new file mode 100644 index 00000000000000..25ce162e254699 --- /dev/null +++ b/lib/v2/zjol/radar.js @@ -0,0 +1,61 @@ +module.exports = { + 'zjol.com.cn': { + _name: '浙江在线', + '.': [ + { + title: '浙报集团系列报刊', + docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan', + source: ['/'], + target: (params, url) => `/zjol/paper/${new URL(url).toString().match(/\/\/(.*?)\.zjol/)[1]}`, + }, + ], + zjrb: [ + { + title: '浙江日报', + docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan', + source: ['/'], + target: '/zjol/paper/zjrb', + }, + ], + qjwb: [ + { + title: '钱江晚报', + docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan', + source: ['/'], + target: '/zjol/paper/qjwb', + }, + ], + msb: [ + { + title: '美术报', + docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan', + source: ['/'], + target: '/zjol/paper/msb', + }, + ], + zjlnb: [ + { + title: '浙江老年报', + docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan', + source: ['/'], + target: '/zjol/paper/zjlnb', + }, + ], + zjfzb: [ + { + title: '浙江法制报', + docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan', + source: ['/'], + target: '/zjol/paper/zjfzb', + }, + ], + jnyb: [ + { + title: '江南游报', + docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan', + source: ['/'], + target: '/zjol/paper/jnyb', + }, + ], + }, +}; diff --git a/lib/v2/zjol/router.js b/lib/v2/zjol/router.js new file mode 100644 index 00000000000000..896721b6d87a8a --- /dev/null +++ b/lib/v2/zjol/router.js @@ -0,0 +1,3 @@ +module.exports = function (router) { + router.get('/paper/:id?', require('./paper')); +}; From 50f4530c32c6db8697540e7794fc498b238571b2 Mon Sep 17 00:00:00 2001 From: Ethan Shen <42264778+nczitzk@users.noreply.github.com> Date: Wed, 8 Mar 2023 22:27:21 +0800 Subject: [PATCH 2/2] fix: duplicated content in jnyb --- lib/v2/zjol/paper.js | 49 +++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/lib/v2/zjol/paper.js b/lib/v2/zjol/paper.js index 85d5e4e51c65bd..67126d0ebc77a0 100644 --- a/lib/v2/zjol/paper.js +++ b/lib/v2/zjol/paper.js @@ -30,7 +30,7 @@ module.exports = async (ctx) => { let items = $(query) .toArray() - .map((a) => `${currentUrl}/${$(a).attr('href').split('?')[0]}`); + .map((a) => `${currentUrl}/${$(a).attr('href')}`); await Promise.all( $('#pageLink') @@ -48,33 +48,36 @@ module.exports = async (ctx) => { items.push( ...page(query) .toArray() - .map((a) => `${currentUrl}/${page(a).attr('href').split('?')[0]}`) + .map((a) => `${currentUrl}/${page(a).attr('href')}`) ); }) ); items = await Promise.all( - items.slice(0, limit).map((link) => - ctx.cache.tryGet(link, async () => { - const detailResponse = await got({ - method: 'get', - url: link, - }); - - const content = cheerio.load(detailResponse.data); - - const title = content('.main-article-title').text(); - - content('.main-article-alltitle').remove(); - - return { - link, - title, - pubDate, - description: content('.main-article-content').html(), - }; - }) - ) + items + .filter((a) => (id === 'jnyb' ? /\?div=1$/.test(a) : true)) + .slice(0, limit) + .map((link) => + ctx.cache.tryGet(link, async () => { + const detailResponse = await got({ + method: 'get', + url: link, + }); + + const content = cheerio.load(detailResponse.data); + + const title = content('.main-article-title').text(); + + content('.main-article-alltitle').remove(); + + return { + title, + pubDate, + link: link.split('?')[0], + description: content('.main-article-content').html(), + }; + }) + ) ); ctx.state.data = {