From 5be926f66c064ab5cd4f505ad30abb874ff92e98 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:34:55 +0800 Subject: [PATCH 01/18] feat(route): add NBER working papers --- docs/journal.md | 14 ++++++++++++ lib/v2/nber/maintainer.js | 4 ++++ lib/v2/nber/news.js | 39 ++++++++++++++++++++++++++++++++ lib/v2/nber/papers.js | 47 +++++++++++++++++++++++++++++++++++++++ lib/v2/nber/radar.js | 19 ++++++++++++++++ lib/v2/nber/router.js | 4 ++++ lib/v2/nber/utils.js | 29 ++++++++++++++++++++++++ 7 files changed, 156 insertions(+) create mode 100644 lib/v2/nber/maintainer.js create mode 100644 lib/v2/nber/news.js create mode 100644 lib/v2/nber/papers.js create mode 100644 lib/v2/nber/radar.js create mode 100644 lib/v2/nber/router.js create mode 100644 lib/v2/nber/utils.js diff --git a/docs/journal.md b/docs/journal.md index 44eee8800eb5b8..a5a9108596471d 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -284,6 +284,20 @@ You can get all short name of a journal from +## National Bureau of Economic Research (NBER) Working Papers + +### All Papers + + + +- Page size should be one of 20, 50, 100. By default the number is 50. + +### New Papers + + + +Papers that are labeled by "new" at the website. + ## Network and Distributed System Security (NDSS) Symposium ### Accepted papers diff --git a/lib/v2/nber/maintainer.js b/lib/v2/nber/maintainer.js new file mode 100644 index 00000000000000..f23aa42520fd76 --- /dev/null +++ b/lib/v2/nber/maintainer.js @@ -0,0 +1,4 @@ +module.exports = { + '/articles/:perPage?': ['5upernova-heng'], + '/news': ['5upernova-heng'], +}; diff --git a/lib/v2/nber/news.js b/lib/v2/nber/news.js new file mode 100644 index 00000000000000..43bb354f0db11e --- /dev/null +++ b/lib/v2/nber/news.js @@ -0,0 +1,39 @@ +const { get_html, get_elements } = require('./utils'); +const got = require('@/utils/got'); +const cheerio = require('cheerio'); + +module.exports = async (ctx) => { + url = `https://www.nber.org/papers?page=1&perPage=20&sortBy=public_date`; + const html = await get_html(url); + const elements = await get_elements(html, '.digest-card.is-new .digest-card__title a'); + + // Get Author and Abstarct + const baseUrl = 'https://www.nber.org'; + const items = await Promise.all( + elements.map((item) => + ctx.cache.tryGet(item.link, async () => { + const detailResponse = await got({ + method: 'get', + url: `${baseUrl}${item.link}`, + }); + const content = cheerio.load(detailResponse.data); + const authors = []; + content('.page-header__author-item a').each((index, element) => { + const text = content(element).text(); + const link = content(element).attr('href'); + authors.push({ name: text, link }); + }); + item.authors = authors; + item.abstract = content('.page-header__intro-inner p').text(); + return item; + }) + ) + ); + + ctx.state.data = { + title: 'NBER Working Paper News', + link: url, + item: items, + description: 'National Bureau of Economic Research Working Papers -- News', + }; +}; diff --git a/lib/v2/nber/papers.js b/lib/v2/nber/papers.js new file mode 100644 index 00000000000000..9854b0ed93e2e4 --- /dev/null +++ b/lib/v2/nber/papers.js @@ -0,0 +1,47 @@ +const { get_html, get_elements } = require('./utils'); +const got = require('@/utils/got'); +const cheerio = require('cheerio'); + +module.exports = async (ctx) => { + let { perPage = '50' } = ctx.params; + perPage = parseInt(perPage); + // perPage has to be one of 20, 50, 100 + if (perPage <= 35) {perPage = 20;} + if (35 < perPage && perPage <= 70) {perPage = 50;} + if (70 < perPage) {perPage = 100;} + + // Get title and link + url = `https://www.nber.org/papers?page=1&perPage=${perPage}&sortBy=public_date`; + const html = await get_html(url); + const elements = await get_elements(html, '.digest-card .digest-card__title a'); + + // Get Author and Abstarct + const baseUrl = 'https://www.nber.org'; + const items = await Promise.all( + elements.map((item) => + ctx.cache.tryGet(item.link, async () => { + const detailResponse = await got({ + method: 'get', + url: `${baseUrl}${item.link}`, + }); + const content = cheerio.load(detailResponse.data); + const authors = []; + content('.page-header__author-item a').each((index, element) => { + const text = content(element).text(); + const link = content(element).attr('href'); + authors.push({ name: text, link }); + }); + item.authors = authors; + item.abstract = content('.page-header__intro-inner p').text(); + return item; + }) + ) + ); + + ctx.state.data = { + title: 'NBER Working Paper', + link: url, + item: items, + description: `National Bureau of Economic Research Working Papers -- ${perPage} articles`, + }; +}; diff --git a/lib/v2/nber/radar.js b/lib/v2/nber/radar.js new file mode 100644 index 00000000000000..4b9c395a2b719a --- /dev/null +++ b/lib/v2/nber/radar.js @@ -0,0 +1,19 @@ +module.exports = { + 'nber.org': { + __name: 'NBER', + '.': [ + { + title: 'New working paper', + docs: 'todo', + source: ['/news'], + target: '/nber/news', + }, + { + title: 'All working paper', + docs: 'todo', + source: ['/papers'], + target: '/nber/papers', + }, + ], + }, +}; diff --git a/lib/v2/nber/router.js b/lib/v2/nber/router.js new file mode 100644 index 00000000000000..932196ce3d943c --- /dev/null +++ b/lib/v2/nber/router.js @@ -0,0 +1,4 @@ +module.exports = (router) => { + router.get('/papers/:perPage?', require('./papers')); + router.get('/news', require('./news')); +}; diff --git a/lib/v2/nber/utils.js b/lib/v2/nber/utils.js new file mode 100644 index 00000000000000..4262f7b14187c1 --- /dev/null +++ b/lib/v2/nber/utils.js @@ -0,0 +1,29 @@ +const cheerio = require('cheerio'); + +async function get_html(url) { + const browser = await require('@/utils/puppeteer')(); + const page = await browser.newPage(); + await page.goto(url); + await page.waitForSelector('.promo-grid'); + const html = await page.content(); + await browser.close(); + + return html; + // Get title and link +} + +async function get_elements(html, selector) { + const elements = []; + const $ = cheerio.load(html); + $(selector).each((index, element) => { + const text = $(element).text(); + const href = $(element).attr('href'); + elements.push({ title: text, link: href }); + }); + return elements; +} + +module.exports = { + get_html, + get_elements, +}; From 4ad0c06a0394dc762409472b1d5b84c11965b712 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:43:13 +0800 Subject: [PATCH 02/18] Modify the documentation --- docs/en/journal.md | 14 ++++++++++++++ docs/journal.md | 8 ++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/en/journal.md b/docs/en/journal.md index 0b0cd8b0cb9e90..a247206c1cf8f8 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -313,6 +313,20 @@ Only some journals are supported. +## National Bureau of Economic Research (NBER) Working Papers + +### All Papers + + + +- Page size should be one of 20, 50, 100. By default the number is 50. + +### New Papers + + + +Papers that are labeled by "new" at the website. + ## Network and Distributed System Security (NDSS) Symposium ### Accepted papers diff --git a/docs/journal.md b/docs/journal.md index a5a9108596471d..9dc662210dbc11 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -286,17 +286,17 @@ You can get all short name of a journal from ## National Bureau of Economic Research (NBER) Working Papers -### All Papers +### 全部论文 -- Page size should be one of 20, 50, 100. By default the number is 50. +- 页面文章数量必须为以下值之一:20,50,100. 默认为 50 -### New Papers +### 新论文 -Papers that are labeled by "new" at the website. +在网站上被标记为 "new" 的论文 ## Network and Distributed System Security (NDSS) Symposium From 0c2d0b3c2458482b9b845bf611bf09b1cb4ef5f7 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:51:39 +0800 Subject: [PATCH 03/18] Fix 'url is not defined' bug --- lib/v2/nber/news.js | 2 +- lib/v2/nber/papers.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/v2/nber/news.js b/lib/v2/nber/news.js index 43bb354f0db11e..31344d2f4fd46b 100644 --- a/lib/v2/nber/news.js +++ b/lib/v2/nber/news.js @@ -3,7 +3,7 @@ const got = require('@/utils/got'); const cheerio = require('cheerio'); module.exports = async (ctx) => { - url = `https://www.nber.org/papers?page=1&perPage=20&sortBy=public_date`; + const url = `https://www.nber.org/papers?page=1&perPage=20&sortBy=public_date`; const html = await get_html(url); const elements = await get_elements(html, '.digest-card.is-new .digest-card__title a'); diff --git a/lib/v2/nber/papers.js b/lib/v2/nber/papers.js index 9854b0ed93e2e4..76fa15f400ad57 100644 --- a/lib/v2/nber/papers.js +++ b/lib/v2/nber/papers.js @@ -11,7 +11,7 @@ module.exports = async (ctx) => { if (70 < perPage) {perPage = 100;} // Get title and link - url = `https://www.nber.org/papers?page=1&perPage=${perPage}&sortBy=public_date`; + const url = `https://www.nber.org/papers?page=1&perPage=${perPage}&sortBy=public_date`; const html = await get_html(url); const elements = await get_elements(html, '.digest-card .digest-card__title a'); From de01cfaf811f1a39fcb743418d36f717829b1794 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:55:25 +0800 Subject: [PATCH 04/18] Remove async of get_element function --- lib/v2/nber/news.js | 2 +- lib/v2/nber/papers.js | 2 +- lib/v2/nber/utils.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/v2/nber/news.js b/lib/v2/nber/news.js index 31344d2f4fd46b..a6ac39a27108d3 100644 --- a/lib/v2/nber/news.js +++ b/lib/v2/nber/news.js @@ -5,7 +5,7 @@ const cheerio = require('cheerio'); module.exports = async (ctx) => { const url = `https://www.nber.org/papers?page=1&perPage=20&sortBy=public_date`; const html = await get_html(url); - const elements = await get_elements(html, '.digest-card.is-new .digest-card__title a'); + const elements = get_elements(html, '.digest-card.is-new .digest-card__title a'); // Get Author and Abstarct const baseUrl = 'https://www.nber.org'; diff --git a/lib/v2/nber/papers.js b/lib/v2/nber/papers.js index 76fa15f400ad57..4d20cb475bcd94 100644 --- a/lib/v2/nber/papers.js +++ b/lib/v2/nber/papers.js @@ -13,7 +13,7 @@ module.exports = async (ctx) => { // Get title and link const url = `https://www.nber.org/papers?page=1&perPage=${perPage}&sortBy=public_date`; const html = await get_html(url); - const elements = await get_elements(html, '.digest-card .digest-card__title a'); + const elements = get_elements(html, '.digest-card .digest-card__title a'); // Get Author and Abstarct const baseUrl = 'https://www.nber.org'; diff --git a/lib/v2/nber/utils.js b/lib/v2/nber/utils.js index 4262f7b14187c1..0aeb25952736d4 100644 --- a/lib/v2/nber/utils.js +++ b/lib/v2/nber/utils.js @@ -12,7 +12,7 @@ async function get_html(url) { // Get title and link } -async function get_elements(html, selector) { +function get_elements(html, selector) { const elements = []; const $ = cheerio.load(html); $(selector).each((index, element) => { From 0c276b72a141c2911a039036b8fa6dbf87ad8f4d Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 17:11:38 +0800 Subject: [PATCH 05/18] Remove meaningless comments --- lib/v2/nber/utils.js | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/v2/nber/utils.js b/lib/v2/nber/utils.js index 0aeb25952736d4..5546c445d97eac 100644 --- a/lib/v2/nber/utils.js +++ b/lib/v2/nber/utils.js @@ -9,7 +9,6 @@ async function get_html(url) { await browser.close(); return html; - // Get title and link } function get_elements(html, selector) { From 4abe9d8bd655e1b0b97c0b8c3448646a06d9954c Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 21:01:17 +0800 Subject: [PATCH 06/18] Update docs/en/journal.md Co-authored-by: Tony --- docs/en/journal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/journal.md b/docs/en/journal.md index a247206c1cf8f8..a3f365f55b6555 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -313,7 +313,7 @@ Only some journals are supported. -## National Bureau of Economic Research (NBER) Working Papers +## National Bureau of Economic Research ### All Papers From 12408ba9dbd57d224e75775c35213da833645bab Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 21:01:59 +0800 Subject: [PATCH 07/18] Update docs/en/journal.md Co-authored-by: Tony --- docs/en/journal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/journal.md b/docs/en/journal.md index a3f365f55b6555..4b8a58c5f69fa1 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -317,7 +317,7 @@ Only some journals are supported. ### All Papers - + - Page size should be one of 20, 50, 100. By default the number is 50. From 83c974c20be3843bcaeec2285fbb503ae33b0e1c Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 23:54:58 +0800 Subject: [PATCH 08/18] Change data acquisition methods to call api --- docs/en/journal.md | 6 ++-- lib/v2/nber/maintainer.js | 2 +- lib/v2/nber/news.js | 57 +++++++++++++++--------------- lib/v2/nber/papers.js | 61 ++++++++++++++------------------- lib/v2/nber/radar.js | 6 ++-- lib/v2/nber/router.js | 2 +- lib/v2/nber/template/author.art | 5 +++ lib/v2/nber/utils.js | 32 +++++++---------- 8 files changed, 78 insertions(+), 93 deletions(-) create mode 100644 lib/v2/nber/template/author.art diff --git a/docs/en/journal.md b/docs/en/journal.md index 4b8a58c5f69fa1..e867731649a429 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -317,15 +317,13 @@ Only some journals are supported. ### All Papers - - -- Page size should be one of 20, 50, 100. By default the number is 50. + ### New Papers -Papers that are labeled by "new" at the website. +Papers that are published in this week. ## Network and Distributed System Security (NDSS) Symposium diff --git a/lib/v2/nber/maintainer.js b/lib/v2/nber/maintainer.js index f23aa42520fd76..3d9a574641c17f 100644 --- a/lib/v2/nber/maintainer.js +++ b/lib/v2/nber/maintainer.js @@ -1,4 +1,4 @@ module.exports = { - '/articles/:perPage?': ['5upernova-heng'], + '/articles': ['5upernova-heng'], '/news': ['5upernova-heng'], }; diff --git a/lib/v2/nber/news.js b/lib/v2/nber/news.js index a6ac39a27108d3..3a5739d26d8f8f 100644 --- a/lib/v2/nber/news.js +++ b/lib/v2/nber/news.js @@ -1,39 +1,38 @@ -const { get_html, get_elements } = require('./utils'); const got = require('@/utils/got'); const cheerio = require('cheerio'); +const path = require('path'); +const { art } = require('@/utils/render'); +const { parseDate } = require('@/utils/parse-date'); +const { getData, parseAuthor } = require('./utils'); module.exports = async (ctx) => { - const url = `https://www.nber.org/papers?page=1&perPage=20&sortBy=public_date`; - const html = await get_html(url); - const elements = get_elements(html, '.digest-card.is-new .digest-card__title a'); - - // Get Author and Abstarct + const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; - const items = await Promise.all( - elements.map((item) => - ctx.cache.tryGet(item.link, async () => { - const detailResponse = await got({ - method: 'get', - url: `${baseUrl}${item.link}`, - }); - const content = cheerio.load(detailResponse.data); - const authors = []; - content('.page-header__author-item a').each((index, element) => { - const text = content(element).text(); - const link = content(element).attr('href'); - authors.push({ name: text, link }); - }); - item.authors = authors; - item.abstract = content('.page-header__intro-inner p').text(); - return item; - }) - ) - ); + const data = await getData(url, ctx); + const items = data + .filter((article) => article.newthisweek) + .map(async (article) => { + const link = `${baseUrl}${article.url}`; + const pubDate = await ctx.cache.tryGet(link, async () => { + const response = await got.get(link); + const $ = cheerio.load(response.data); + return parseDate($('meta[name="citation_publication_date"]').attr('content'), 'YYYY/MM/DD'); + }); + const parsedAuthors = parseAuthor(article.authors); + return { + title: article.title, + author: art(path.join(__dirname, 'template/author.art'), { + authors: parsedAuthors, + }), + pubDate, + description: article.abstract, + }; + }); ctx.state.data = { - title: 'NBER Working Paper News', - link: url, + title: 'NBER Working Paper', + link: 'https://www.nber.org/papers', item: items, - description: 'National Bureau of Economic Research Working Papers -- News', + description: `National Bureau of Economic Research Working Papers articles`, }; }; diff --git a/lib/v2/nber/papers.js b/lib/v2/nber/papers.js index 4d20cb475bcd94..9ac02920850993 100644 --- a/lib/v2/nber/papers.js +++ b/lib/v2/nber/papers.js @@ -1,47 +1,36 @@ -const { get_html, get_elements } = require('./utils'); const got = require('@/utils/got'); const cheerio = require('cheerio'); +const path = require('path'); +const { art } = require('@/utils/render'); +const { parseDate } = require('@/utils/parse-date'); +const { getData, parseAuthor } = require('./utils'); module.exports = async (ctx) => { - let { perPage = '50' } = ctx.params; - perPage = parseInt(perPage); - // perPage has to be one of 20, 50, 100 - if (perPage <= 35) {perPage = 20;} - if (35 < perPage && perPage <= 70) {perPage = 50;} - if (70 < perPage) {perPage = 100;} - - // Get title and link - const url = `https://www.nber.org/papers?page=1&perPage=${perPage}&sortBy=public_date`; - const html = await get_html(url); - const elements = get_elements(html, '.digest-card .digest-card__title a'); - - // Get Author and Abstarct + const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; - const items = await Promise.all( - elements.map((item) => - ctx.cache.tryGet(item.link, async () => { - const detailResponse = await got({ - method: 'get', - url: `${baseUrl}${item.link}`, - }); - const content = cheerio.load(detailResponse.data); - const authors = []; - content('.page-header__author-item a').each((index, element) => { - const text = content(element).text(); - const link = content(element).attr('href'); - authors.push({ name: text, link }); - }); - item.authors = authors; - item.abstract = content('.page-header__intro-inner p').text(); - return item; - }) - ) - ); + const data = await getData(url, ctx); + const items = data.map(async (article) => { + const link = `${baseUrl}${article.url}`; + const pubDate = await ctx.cache.tryGet(link, async () => { + const response = await got.get(link); + const $ = cheerio.load(response.data); + return parseDate($('meta[name="citation_publication_date"]').attr('content'), 'YYYY/MM/DD'); + }); + const parsedAuthors = parseAuthor(article.authors); + return { + title: article.title, + author: art(path.join(__dirname, 'template/author.art'), { + authors: parsedAuthors, + }), + pubDate, + description: article.abstract, + }; + }); ctx.state.data = { title: 'NBER Working Paper', - link: url, + link: 'https://www.nber.org/papers', item: items, - description: `National Bureau of Economic Research Working Papers -- ${perPage} articles`, + description: `National Bureau of Economic Research Working Papers articles`, }; }; diff --git a/lib/v2/nber/radar.js b/lib/v2/nber/radar.js index 4b9c395a2b719a..c95922a60264dc 100644 --- a/lib/v2/nber/radar.js +++ b/lib/v2/nber/radar.js @@ -1,16 +1,16 @@ module.exports = { 'nber.org': { - __name: 'NBER', + __name: 'National Bureau of Economic Research', '.': [ { title: 'New working paper', - docs: 'todo', + docs: 'https://docs.rsshub.app/en/journal.html#national-bureau-of-economic-research', source: ['/news'], target: '/nber/news', }, { title: 'All working paper', - docs: 'todo', + docs: 'https://docs.rsshub.app/en/journal.html#national-bureau-of-economic-research', source: ['/papers'], target: '/nber/papers', }, diff --git a/lib/v2/nber/router.js b/lib/v2/nber/router.js index 932196ce3d943c..b620bada0f6613 100644 --- a/lib/v2/nber/router.js +++ b/lib/v2/nber/router.js @@ -1,4 +1,4 @@ module.exports = (router) => { - router.get('/papers/:perPage?', require('./papers')); + router.get('/papers', require('./papers')); router.get('/news', require('./news')); }; diff --git a/lib/v2/nber/template/author.art b/lib/v2/nber/template/author.art new file mode 100644 index 00000000000000..6ca36fed610aef --- /dev/null +++ b/lib/v2/nber/template/author.art @@ -0,0 +1,5 @@ +{{ if authors }} +{{ each authors author }} + {{ author.name }} +{{ /each }} +{{ /if }} \ No newline at end of file diff --git a/lib/v2/nber/utils.js b/lib/v2/nber/utils.js index 5546c445d97eac..cf89b145199fab 100644 --- a/lib/v2/nber/utils.js +++ b/lib/v2/nber/utils.js @@ -1,28 +1,22 @@ +const got = require('@/utils/got'); const cheerio = require('cheerio'); -async function get_html(url) { - const browser = await require('@/utils/puppeteer')(); - const page = await browser.newPage(); - await page.goto(url); - await page.waitForSelector('.promo-grid'); - const html = await page.content(); - await browser.close(); - - return html; +async function getData(url) { + const response = await got(url).json(); + return response.results; } -function get_elements(html, selector) { - const elements = []; - const $ = cheerio.load(html); - $(selector).each((index, element) => { - const text = $(element).text(); - const href = $(element).attr('href'); - elements.push({ title: text, link: href }); +function parseAuthor(authors) { + return authors.map((author) => { + const $ = cheerio.load(author); + return { + name: $('a').text(), + link: $('a').attr('href'), + }; }); - return elements; } module.exports = { - get_html, - get_elements, + getData, + parseAuthor, }; From f2987590a37294d0f9d85980458688d312e27788 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Thu, 2 Mar 2023 23:58:42 +0800 Subject: [PATCH 09/18] Remove wrong param 'ctx' --- lib/v2/nber/news.js | 2 +- lib/v2/nber/papers.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/v2/nber/news.js b/lib/v2/nber/news.js index 3a5739d26d8f8f..5d20795843f6c3 100644 --- a/lib/v2/nber/news.js +++ b/lib/v2/nber/news.js @@ -8,7 +8,7 @@ const { getData, parseAuthor } = require('./utils'); module.exports = async (ctx) => { const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; - const data = await getData(url, ctx); + const data = await getData(url); const items = data .filter((article) => article.newthisweek) .map(async (article) => { diff --git a/lib/v2/nber/papers.js b/lib/v2/nber/papers.js index 9ac02920850993..e5ff1ffdecd1fe 100644 --- a/lib/v2/nber/papers.js +++ b/lib/v2/nber/papers.js @@ -8,7 +8,7 @@ const { getData, parseAuthor } = require('./utils'); module.exports = async (ctx) => { const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; - const data = await getData(url, ctx); + const data = await getData(url); const items = data.map(async (article) => { const link = `${baseUrl}${article.url}`; const pubDate = await ctx.cache.tryGet(link, async () => { From ab489f8eb284c91f40eb82427723e84f18fab510 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Fri, 3 Mar 2023 00:44:09 +0800 Subject: [PATCH 10/18] Change documentation and radar --- docs/en/journal.md | 6 ++++-- docs/journal.md | 10 +++++----- lib/v2/nber/radar.js | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/docs/en/journal.md b/docs/en/journal.md index e867731649a429..865db8443db842 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -317,14 +317,16 @@ Only some journals are supported. ### All Papers - + ### New Papers - + Papers that are published in this week. + + ## Network and Distributed System Security (NDSS) Symposium ### Accepted papers diff --git a/docs/journal.md b/docs/journal.md index 9dc662210dbc11..77bdd9913deff4 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -284,20 +284,20 @@ You can get all short name of a journal from -## National Bureau of Economic Research (NBER) Working Papers +## National Bureau of Economic Research ### 全部论文 - - -- 页面文章数量必须为以下值之一:20,50,100. 默认为 50 + ### 新论文 - + 在网站上被标记为 "new" 的论文 + + ## Network and Distributed System Security (NDSS) Symposium ### Accepted papers diff --git a/lib/v2/nber/radar.js b/lib/v2/nber/radar.js index c95922a60264dc..aca2d38459e6dd 100644 --- a/lib/v2/nber/radar.js +++ b/lib/v2/nber/radar.js @@ -5,7 +5,7 @@ module.exports = { { title: 'New working paper', docs: 'https://docs.rsshub.app/en/journal.html#national-bureau-of-economic-research', - source: ['/news'], + source: ['/papers'], target: '/nber/news', }, { From adffaf99982814f44f518a5446f937ccd26930fe Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Fri, 3 Mar 2023 02:16:31 +0800 Subject: [PATCH 11/18] Combine to file together & Cache each item & Add doi & Add pdf download link in description & Styling the description --- lib/v2/nber/index.js | 48 ++++++++++++++++++++++++++++ lib/v2/nber/news.js | 38 ---------------------- lib/v2/nber/papers.js | 36 --------------------- lib/v2/nber/router.js | 4 +-- lib/v2/nber/template/author.art | 5 --- lib/v2/nber/template/description.art | 6 ++++ lib/v2/nber/utils.js | 22 ------------- 7 files changed, 56 insertions(+), 103 deletions(-) create mode 100644 lib/v2/nber/index.js delete mode 100644 lib/v2/nber/news.js delete mode 100644 lib/v2/nber/papers.js delete mode 100644 lib/v2/nber/template/author.art create mode 100644 lib/v2/nber/template/description.art delete mode 100644 lib/v2/nber/utils.js diff --git a/lib/v2/nber/index.js b/lib/v2/nber/index.js new file mode 100644 index 00000000000000..209cb384e624d3 --- /dev/null +++ b/lib/v2/nber/index.js @@ -0,0 +1,48 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const path = require('path'); +const { art } = require('@/utils/render'); +const { parseDate } = require('@/utils/parse-date'); + +async function getData(url) { + const response = await got(url).json(); + return response.results; +} + +module.exports = async (ctx) => { + const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; + const baseUrl = 'https://www.nber.org'; + const data = await ctx.cache.tryGet(url, async () => { + return getData(url); + }); + const items = await Promise.all( + data + .filter((article) => ctx.path === '/papers' || article.newthisweek) + .map((article) => { + const link = `${baseUrl}${article.url}`; + const item = ctx.cache.tryGet(link, async () => { + const response = await got.get(link); + const $ = cheerio.load(response.data); + const downloadLink = $('meta[name="citation_pdf_url"]').attr('content'); + return { + title: article.title, + author: $('meta[name="dcterms.creator"]').attr('content'), + pubDate: parseDate($('meta[name="citation_publication_date"]').attr('content'), 'YYYY/MM/DD'), + link: $('meta[name="citation_doi"]').attr('content'), + description: art(path.join(__dirname, 'template/description.art'), { + article, + downloadLink, + }), + }; + }); + return item; + }) + ); + + ctx.state.data = { + title: 'NBER Working Paper', + link: 'https://www.nber.org/papers', + item: items, + description: `National Bureau of Economic Research Working Papers articles`, + }; +}; diff --git a/lib/v2/nber/news.js b/lib/v2/nber/news.js deleted file mode 100644 index 5d20795843f6c3..00000000000000 --- a/lib/v2/nber/news.js +++ /dev/null @@ -1,38 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const path = require('path'); -const { art } = require('@/utils/render'); -const { parseDate } = require('@/utils/parse-date'); -const { getData, parseAuthor } = require('./utils'); - -module.exports = async (ctx) => { - const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; - const baseUrl = 'https://www.nber.org'; - const data = await getData(url); - const items = data - .filter((article) => article.newthisweek) - .map(async (article) => { - const link = `${baseUrl}${article.url}`; - const pubDate = await ctx.cache.tryGet(link, async () => { - const response = await got.get(link); - const $ = cheerio.load(response.data); - return parseDate($('meta[name="citation_publication_date"]').attr('content'), 'YYYY/MM/DD'); - }); - const parsedAuthors = parseAuthor(article.authors); - return { - title: article.title, - author: art(path.join(__dirname, 'template/author.art'), { - authors: parsedAuthors, - }), - pubDate, - description: article.abstract, - }; - }); - - ctx.state.data = { - title: 'NBER Working Paper', - link: 'https://www.nber.org/papers', - item: items, - description: `National Bureau of Economic Research Working Papers articles`, - }; -}; diff --git a/lib/v2/nber/papers.js b/lib/v2/nber/papers.js deleted file mode 100644 index e5ff1ffdecd1fe..00000000000000 --- a/lib/v2/nber/papers.js +++ /dev/null @@ -1,36 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const path = require('path'); -const { art } = require('@/utils/render'); -const { parseDate } = require('@/utils/parse-date'); -const { getData, parseAuthor } = require('./utils'); - -module.exports = async (ctx) => { - const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; - const baseUrl = 'https://www.nber.org'; - const data = await getData(url); - const items = data.map(async (article) => { - const link = `${baseUrl}${article.url}`; - const pubDate = await ctx.cache.tryGet(link, async () => { - const response = await got.get(link); - const $ = cheerio.load(response.data); - return parseDate($('meta[name="citation_publication_date"]').attr('content'), 'YYYY/MM/DD'); - }); - const parsedAuthors = parseAuthor(article.authors); - return { - title: article.title, - author: art(path.join(__dirname, 'template/author.art'), { - authors: parsedAuthors, - }), - pubDate, - description: article.abstract, - }; - }); - - ctx.state.data = { - title: 'NBER Working Paper', - link: 'https://www.nber.org/papers', - item: items, - description: `National Bureau of Economic Research Working Papers articles`, - }; -}; diff --git a/lib/v2/nber/router.js b/lib/v2/nber/router.js index b620bada0f6613..e4dc695191999d 100644 --- a/lib/v2/nber/router.js +++ b/lib/v2/nber/router.js @@ -1,4 +1,4 @@ module.exports = (router) => { - router.get('/papers', require('./papers')); - router.get('/news', require('./news')); + router.get('/papers', require('.')); + router.get('/news', require('.')); }; diff --git a/lib/v2/nber/template/author.art b/lib/v2/nber/template/author.art deleted file mode 100644 index 6ca36fed610aef..00000000000000 --- a/lib/v2/nber/template/author.art +++ /dev/null @@ -1,5 +0,0 @@ -{{ if authors }} -{{ each authors author }} - {{ author.name }} -{{ /each }} -{{ /if }} \ No newline at end of file diff --git a/lib/v2/nber/template/description.art b/lib/v2/nber/template/description.art new file mode 100644 index 00000000000000..44f9ea5eddad5a --- /dev/null +++ b/lib/v2/nber/template/description.art @@ -0,0 +1,6 @@ +{{ if article.abstract }} +

{{ article.abstract }}

+{{ /if}} +{{ if downloadLink }} +Download PDF +{{ /if }} \ No newline at end of file diff --git a/lib/v2/nber/utils.js b/lib/v2/nber/utils.js deleted file mode 100644 index cf89b145199fab..00000000000000 --- a/lib/v2/nber/utils.js +++ /dev/null @@ -1,22 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); - -async function getData(url) { - const response = await got(url).json(); - return response.results; -} - -function parseAuthor(authors) { - return authors.map((author) => { - const $ = cheerio.load(author); - return { - name: $('a').text(), - link: $('a').attr('href'), - }; - }); -} - -module.exports = { - getData, - parseAuthor, -}; From 347d650b02e30c0a7f9713772ece1de923e0b34e Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Fri, 3 Mar 2023 02:22:42 +0800 Subject: [PATCH 12/18] Add await in async function --- lib/v2/nber/index.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/v2/nber/index.js b/lib/v2/nber/index.js index 209cb384e624d3..b4805597db6c44 100644 --- a/lib/v2/nber/index.js +++ b/lib/v2/nber/index.js @@ -12,8 +12,9 @@ async function getData(url) { module.exports = async (ctx) => { const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; - const data = await ctx.cache.tryGet(url, async () => { - return getData(url); + const data = ctx.cache.tryGet(url, async () => { + const response = await getData(url); + return response; }); const items = await Promise.all( data From 6443cefdfa13b9df02df3196ec38df8e23f8c4be Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Fri, 3 Mar 2023 21:36:08 +0800 Subject: [PATCH 13/18] Clean the code --- lib/v2/nber/index.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/v2/nber/index.js b/lib/v2/nber/index.js index b4805597db6c44..34756f077b561b 100644 --- a/lib/v2/nber/index.js +++ b/lib/v2/nber/index.js @@ -12,7 +12,7 @@ async function getData(url) { module.exports = async (ctx) => { const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; - const data = ctx.cache.tryGet(url, async () => { + const data = await ctx.cache.tryGet(url, async () => { const response = await getData(url); return response; }); @@ -21,8 +21,8 @@ module.exports = async (ctx) => { .filter((article) => ctx.path === '/papers' || article.newthisweek) .map((article) => { const link = `${baseUrl}${article.url}`; - const item = ctx.cache.tryGet(link, async () => { - const response = await got.get(link); + return ctx.cache.tryGet(link, async () => { + const response = await got(link); const $ = cheerio.load(response.data); const downloadLink = $('meta[name="citation_pdf_url"]').attr('content'); return { @@ -36,7 +36,6 @@ module.exports = async (ctx) => { }), }; }); - return item; }) ); From 693c7187855e2a8915781fe0ef5ecf1126743ba2 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Fri, 3 Mar 2023 22:56:40 +0800 Subject: [PATCH 14/18] Add config.cache.expire --- lib/v2/nber/index.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/v2/nber/index.js b/lib/v2/nber/index.js index 34756f077b561b..9e744ebbdab2b1 100644 --- a/lib/v2/nber/index.js +++ b/lib/v2/nber/index.js @@ -12,10 +12,8 @@ async function getData(url) { module.exports = async (ctx) => { const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; - const data = await ctx.cache.tryGet(url, async () => { - const response = await getData(url); - return response; - }); + const config = require('@/config').value; + const data = await ctx.cache.tryGet(url, () => getData(url), config.cache.expire, false); const items = await Promise.all( data .filter((article) => ctx.path === '/papers' || article.newthisweek) From 067f16d7f5d1fd6e0697ef2c47db0ccbeb3343e0 Mon Sep 17 00:00:00 2001 From: TonyRL Date: Fri, 3 Mar 2023 16:09:48 +0000 Subject: [PATCH 15/18] fix: typo --- docs/en/journal.md | 28 ++++++++++++++-------------- docs/journal.md | 28 ++++++++++++++-------------- lib/v2/nber/index.js | 5 +++-- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/docs/en/journal.md b/docs/en/journal.md index 865db8443db842..3bc19150a18754 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -234,6 +234,20 @@ Return results from 2020 | smart-cities | /technologyreview/smart-cities| | space | /technologyreview/space | +## National Bureau of Economic Research + +### All Papers + + + +### New Papers + + + +Papers that are published in this week. + + + ## Nature Journal ::: tip Tips @@ -313,20 +327,6 @@ Only some journals are supported. -## National Bureau of Economic Research - -### All Papers - - - -### New Papers - - - -Papers that are published in this week. - - - ## Network and Distributed System Security (NDSS) Symposium ### Accepted papers diff --git a/docs/journal.md b/docs/journal.md index 77bdd9913deff4..4f640ea3ff7ed9 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -206,6 +206,20 @@ pageClass: routes +## National Bureau of Economic Research + +### 全部论文 + + + +### 新论文 + + + +在网站上被标记为 "new" 的论文 + + + ## Nature 系列 ::: tip Tips @@ -284,20 +298,6 @@ You can get all short name of a journal from -## National Bureau of Economic Research - -### 全部论文 - - - -### 新论文 - - - -在网站上被标记为 "new" 的论文 - - - ## Network and Distributed System Security (NDSS) Symposium ### Accepted papers diff --git a/lib/v2/nber/index.js b/lib/v2/nber/index.js index 9e744ebbdab2b1..20c9cd5f56bf44 100644 --- a/lib/v2/nber/index.js +++ b/lib/v2/nber/index.js @@ -13,7 +13,7 @@ module.exports = async (ctx) => { const url = 'https://www.nber.org/api/v1/working_page_listing/contentType/working_paper/_/_/search'; const baseUrl = 'https://www.nber.org'; const config = require('@/config').value; - const data = await ctx.cache.tryGet(url, () => getData(url), config.cache.expire, false); + const data = await ctx.cache.tryGet(url, () => getData(url), config.cache.routeExpire, false); const items = await Promise.all( data .filter((article) => ctx.path === '/papers' || article.newthisweek) @@ -27,7 +27,8 @@ module.exports = async (ctx) => { title: article.title, author: $('meta[name="dcterms.creator"]').attr('content'), pubDate: parseDate($('meta[name="citation_publication_date"]').attr('content'), 'YYYY/MM/DD'), - link: $('meta[name="citation_doi"]').attr('content'), + link, + doi: $('meta[name="citation_doi"]').attr('content'), description: art(path.join(__dirname, 'template/description.art'), { article, downloadLink, From e1af81079005e8a26de31a8bff77e5fb70459177 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Sat, 4 Mar 2023 01:49:53 +0800 Subject: [PATCH 16/18] Change 'abstract' acquisition method to parsing html from calling api --- lib/v2/nber/index.js | 3 ++- lib/v2/nber/template/description.art | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/v2/nber/index.js b/lib/v2/nber/index.js index 20c9cd5f56bf44..5a7ebda392ce4f 100644 --- a/lib/v2/nber/index.js +++ b/lib/v2/nber/index.js @@ -23,6 +23,7 @@ module.exports = async (ctx) => { const response = await got(link); const $ = cheerio.load(response.data); const downloadLink = $('meta[name="citation_pdf_url"]').attr('content'); + const fullAbstract = $('.page-header__intro-inner').html(); return { title: article.title, author: $('meta[name="dcterms.creator"]').attr('content'), @@ -30,7 +31,7 @@ module.exports = async (ctx) => { link, doi: $('meta[name="citation_doi"]').attr('content'), description: art(path.join(__dirname, 'template/description.art'), { - article, + fullAbstract, downloadLink, }), }; diff --git a/lib/v2/nber/template/description.art b/lib/v2/nber/template/description.art index 44f9ea5eddad5a..c013cb155e4b5d 100644 --- a/lib/v2/nber/template/description.art +++ b/lib/v2/nber/template/description.art @@ -1,5 +1,5 @@ -{{ if article.abstract }} -

{{ article.abstract }}

+{{ if fullAbstract }} +{{@ fullAbstract }} {{ /if}} {{ if downloadLink }} Download PDF From b6479bd0969f7af40e4d27a5e4911c671d3c8960 Mon Sep 17 00:00:00 2001 From: Hengyu <76626546+5upernova-heng@users.noreply.github.com> Date: Sat, 4 Mar 2023 01:56:05 +0800 Subject: [PATCH 17/18] Update lib/v2/nber/radar.js Co-authored-by: Tony --- lib/v2/nber/radar.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/v2/nber/radar.js b/lib/v2/nber/radar.js index aca2d38459e6dd..3aab15ed675eea 100644 --- a/lib/v2/nber/radar.js +++ b/lib/v2/nber/radar.js @@ -1,6 +1,6 @@ module.exports = { 'nber.org': { - __name: 'National Bureau of Economic Research', + _name: 'National Bureau of Economic Research', '.': [ { title: 'New working paper', From 8de2594f2e4bd825b2fe9ed8a163ee1914118d4a Mon Sep 17 00:00:00 2001 From: TonyRL Date: Fri, 3 Mar 2023 18:26:07 +0000 Subject: [PATCH 18/18] docs: fix typos --- docs/en/journal.md | 2 +- docs/journal.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/journal.md b/docs/en/journal.md index 3bc19150a18754..ef1fa82a584654 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -238,7 +238,7 @@ Return results from 2020 ### All Papers - + ### New Papers diff --git a/docs/journal.md b/docs/journal.md index 4f640ea3ff7ed9..a254d7fbd8c4f3 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -210,7 +210,7 @@ pageClass: routes ### 全部论文 - + ### 新论文