From 211e94549152b15d2408175682f5ad3713b9c5cd Mon Sep 17 00:00:00 2001
From: nczitzk <42264778+nczitzk@users.noreply.github.com>
Date: Mon, 6 Mar 2023 01:11:41 +0800
Subject: [PATCH 1/2] =?UTF-8?q?feat(route):=20add=20=E6=B5=99=E6=B1=9F?=
=?UTF-8?q?=E5=9C=A8=E7=BA=BF=E6=B5=99=E6=8A=A5=E9=9B=86=E5=9B=A2=E7=B3=BB?=
=?UTF-8?q?=E5=88=97=E6=8A=A5=E5=88=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
docs/traditional-media.md | 12 ++++++
lib/v2/zjol/maintainer.js | 3 ++
lib/v2/zjol/paper.js | 85 +++++++++++++++++++++++++++++++++++++++
lib/v2/zjol/radar.js | 61 ++++++++++++++++++++++++++++
lib/v2/zjol/router.js | 3 ++
5 files changed, 164 insertions(+)
create mode 100644 lib/v2/zjol/maintainer.js
create mode 100644 lib/v2/zjol/paper.js
create mode 100644 lib/v2/zjol/radar.js
create mode 100644 lib/v2/zjol/router.js
diff --git a/docs/traditional-media.md b/docs/traditional-media.md
index 862784f6802469..10e86230e1e568 100644
--- a/docs/traditional-media.md
+++ b/docs/traditional-media.md
@@ -2433,6 +2433,18 @@ category 对应的关键词有
+## 浙江在线
+
+### 浙报集团系列报刊
+
+
+
+| 浙江日报 | 钱江晚报 | 美术报 | 浙江老年报 | 浙江法制报 | 江南游报 |
+| ---- | ---- | --- | ----- | ----- | ---- |
+| zjrb | qjwb | msb | zjlnb | zjfzb | jnyb |
+
+
+
## 中国日报
### 英语点津
diff --git a/lib/v2/zjol/maintainer.js b/lib/v2/zjol/maintainer.js
new file mode 100644
index 00000000000000..f146840f72f871
--- /dev/null
+++ b/lib/v2/zjol/maintainer.js
@@ -0,0 +1,3 @@
+module.exports = {
+ '/paper/:id?': ['nczitzk'],
+};
diff --git a/lib/v2/zjol/paper.js b/lib/v2/zjol/paper.js
new file mode 100644
index 00000000000000..85d5e4e51c65bd
--- /dev/null
+++ b/lib/v2/zjol/paper.js
@@ -0,0 +1,85 @@
+const got = require('@/utils/got');
+const cheerio = require('cheerio');
+const { parseDate } = require('@/utils/parse-date');
+
+module.exports = async (ctx) => {
+ const id = ctx.params.id ?? 'zjrb';
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit) : 100;
+
+ const query = id === 'jnyb' ? 'map[name="PagePicMap"] area' : 'ul.main-ed-articlenav-list li a';
+
+ const rootUrl = id === 'qjwb' ? 'http://qjwb.thehour.cn' : `https://${id}.zjol.com.cn`;
+ let currentUrl = `${rootUrl}/paperindex.htm`;
+
+ let response = await got({
+ method: 'get',
+ url: currentUrl,
+ });
+
+ const url = response.data.match(/URL=(.*)"/)[1];
+ const pubDate = parseDate(url.match(/(\d{4}-\d{2}\/\d{2})/)[1], 'YYYY-MM/DD');
+
+ currentUrl = `${rootUrl}/${url.replace(`/${url.split('/').pop()}`, '')}`;
+
+ response = await got({
+ method: 'get',
+ url: `${rootUrl}/${url}`,
+ });
+
+ const $ = cheerio.load(response.data);
+
+ let items = $(query)
+ .toArray()
+ .map((a) => `${currentUrl}/${$(a).attr('href').split('?')[0]}`);
+
+ await Promise.all(
+ $('#pageLink')
+ .slice(1)
+ .toArray()
+ .map((p) => `${currentUrl}/${$(p).attr('href')}`)
+ .map(async (p) => {
+ const pageResponse = await got({
+ method: 'get',
+ url: p,
+ });
+
+ const page = cheerio.load(pageResponse.data);
+
+ items.push(
+ ...page(query)
+ .toArray()
+ .map((a) => `${currentUrl}/${page(a).attr('href').split('?')[0]}`)
+ );
+ })
+ );
+
+ items = await Promise.all(
+ items.slice(0, limit).map((link) =>
+ ctx.cache.tryGet(link, async () => {
+ const detailResponse = await got({
+ method: 'get',
+ url: link,
+ });
+
+ const content = cheerio.load(detailResponse.data);
+
+ const title = content('.main-article-title').text();
+
+ content('.main-article-alltitle').remove();
+
+ return {
+ link,
+ title,
+ pubDate,
+ description: content('.main-article-content').html(),
+ };
+ })
+ )
+ );
+
+ ctx.state.data = {
+ title: $('title').text(),
+ link: rootUrl,
+ item: items,
+ };
+};
diff --git a/lib/v2/zjol/radar.js b/lib/v2/zjol/radar.js
new file mode 100644
index 00000000000000..25ce162e254699
--- /dev/null
+++ b/lib/v2/zjol/radar.js
@@ -0,0 +1,61 @@
+module.exports = {
+ 'zjol.com.cn': {
+ _name: '浙江在线',
+ '.': [
+ {
+ title: '浙报集团系列报刊',
+ docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan',
+ source: ['/'],
+ target: (params, url) => `/zjol/paper/${new URL(url).toString().match(/\/\/(.*?)\.zjol/)[1]}`,
+ },
+ ],
+ zjrb: [
+ {
+ title: '浙江日报',
+ docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan',
+ source: ['/'],
+ target: '/zjol/paper/zjrb',
+ },
+ ],
+ qjwb: [
+ {
+ title: '钱江晚报',
+ docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan',
+ source: ['/'],
+ target: '/zjol/paper/qjwb',
+ },
+ ],
+ msb: [
+ {
+ title: '美术报',
+ docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan',
+ source: ['/'],
+ target: '/zjol/paper/msb',
+ },
+ ],
+ zjlnb: [
+ {
+ title: '浙江老年报',
+ docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan',
+ source: ['/'],
+ target: '/zjol/paper/zjlnb',
+ },
+ ],
+ zjfzb: [
+ {
+ title: '浙江法制报',
+ docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan',
+ source: ['/'],
+ target: '/zjol/paper/zjfzb',
+ },
+ ],
+ jnyb: [
+ {
+ title: '江南游报',
+ docs: 'https://docs.rsshub.app/traditional-media.html#zhe-jiang-zai-xian-zhe-bao-ji-tuan-xi-lie-bao-kan',
+ source: ['/'],
+ target: '/zjol/paper/jnyb',
+ },
+ ],
+ },
+};
diff --git a/lib/v2/zjol/router.js b/lib/v2/zjol/router.js
new file mode 100644
index 00000000000000..896721b6d87a8a
--- /dev/null
+++ b/lib/v2/zjol/router.js
@@ -0,0 +1,3 @@
+module.exports = function (router) {
+ router.get('/paper/:id?', require('./paper'));
+};
From 50f4530c32c6db8697540e7794fc498b238571b2 Mon Sep 17 00:00:00 2001
From: Ethan Shen <42264778+nczitzk@users.noreply.github.com>
Date: Wed, 8 Mar 2023 22:27:21 +0800
Subject: [PATCH 2/2] fix: duplicated content in jnyb
---
lib/v2/zjol/paper.js | 49 +++++++++++++++++++++++---------------------
1 file changed, 26 insertions(+), 23 deletions(-)
diff --git a/lib/v2/zjol/paper.js b/lib/v2/zjol/paper.js
index 85d5e4e51c65bd..67126d0ebc77a0 100644
--- a/lib/v2/zjol/paper.js
+++ b/lib/v2/zjol/paper.js
@@ -30,7 +30,7 @@ module.exports = async (ctx) => {
let items = $(query)
.toArray()
- .map((a) => `${currentUrl}/${$(a).attr('href').split('?')[0]}`);
+ .map((a) => `${currentUrl}/${$(a).attr('href')}`);
await Promise.all(
$('#pageLink')
@@ -48,33 +48,36 @@ module.exports = async (ctx) => {
items.push(
...page(query)
.toArray()
- .map((a) => `${currentUrl}/${page(a).attr('href').split('?')[0]}`)
+ .map((a) => `${currentUrl}/${page(a).attr('href')}`)
);
})
);
items = await Promise.all(
- items.slice(0, limit).map((link) =>
- ctx.cache.tryGet(link, async () => {
- const detailResponse = await got({
- method: 'get',
- url: link,
- });
-
- const content = cheerio.load(detailResponse.data);
-
- const title = content('.main-article-title').text();
-
- content('.main-article-alltitle').remove();
-
- return {
- link,
- title,
- pubDate,
- description: content('.main-article-content').html(),
- };
- })
- )
+ items
+ .filter((a) => (id === 'jnyb' ? /\?div=1$/.test(a) : true))
+ .slice(0, limit)
+ .map((link) =>
+ ctx.cache.tryGet(link, async () => {
+ const detailResponse = await got({
+ method: 'get',
+ url: link,
+ });
+
+ const content = cheerio.load(detailResponse.data);
+
+ const title = content('.main-article-title').text();
+
+ content('.main-article-alltitle').remove();
+
+ return {
+ title,
+ pubDate,
+ link: link.split('?')[0],
+ description: content('.main-article-content').html(),
+ };
+ })
+ )
);
ctx.state.data = {