diff --git a/docs/blog.md b/docs/blog.md index 6356db3943fbfd..acf7827ac62249 100644 --- a/docs/blog.md +++ b/docs/blog.md @@ -152,6 +152,60 @@ username 为博主用户名,而非`xxx.hashnode.dev`中`xxx`所代表的 blog +## Medium + +### List + + + +List ID 取的是网址中最后一部分 `-` 后面的内容,例如 `https://medium.com/@imsingee/list/collection-7e67004f23f9` 的用户名为 imsingee、ID 为 `7e67004f23f9` + +::: warning 注意 + +想要获取 Private 的 List 则只支持自建 + +::: + + + +### 个性推荐 - For You + + + +::: warning 注意 + +个性推荐需要登录后的 Cookie 值,所以只能自建,详情见部署页面的配置模块。 + +::: + + + +### 个性推荐 - Following + + + +::: warning 注意 + +个性推荐需要登录后的 Cookie 值,所以只能自建,详情见部署页面的配置模块。 + +::: + + + +### 个性推荐 - Tag + + + +Tag 有很多,可从首页点进 Tag 以后的 URL 获取,例如 `https://medium.com/?tag=web3` 则 tag 为 `web3` + +::: warning 注意 + +个性推荐需要登录后的 Cookie 值,所以只能自建,详情见部署页面的配置模块。 + +::: + + + ## Miris Whispers ### 博客 diff --git a/docs/en/blog.md b/docs/en/blog.md index 5568b6a15c630e..04ac9fa28ec761 100644 --- a/docs/en/blog.md +++ b/docs/en/blog.md @@ -70,6 +70,60 @@ pageClass: routes +## Medium + +### List + + + +The List ID is the last part of the URL after `-`, for example, the username in "https://medium.com/@imsingee/list/collection-7e67004f23f9" is `imsingee`, and the ID is `7e67004f23f9`. + +::: warning Note + +To access private lists, only self-hosting is supported. + +::: + + + +### Personalized Recommendations - For You + + + +::: warning Note + +Personalized recommendations require the cookie value after logging in, so only self-hosting is supported. See the configuration module on the deployment page for details. + +::: + + + +### Personalized Recommendations - Following + + + +::: warning Note + +Personalized recommendations require the cookie value after logging in, so only self-hosting is supported. See the configuration module on the deployment page for details. + +::: + + + +### Personalized Recommendations - Tag + + + +There are many tags, which can be obtained by clicking on a tag from the homepage and looking at the URL. For example, if the URL is `https://medium.com/?tag=web3`, then the tag is `web3`. + +::: warning Note + +Personalized recommendations require the cookie value after logging in, so only self-hosting is supported. See the configuration module on the deployment page for details. + +::: + + + ## Miris Whispers ### Blog diff --git a/docs/en/install/README.md b/docs/en/install/README.md index 3f2f82c81e987f..4f687317573b3d 100644 --- a/docs/en/install/README.md +++ b/docs/en/install/README.md @@ -758,6 +758,11 @@ See docs of the specified route and `lib/config.js` for detailed information. - `MASTODON_API_ACCESS_TOKEN`: user access token - `MASTODON_API_ACCT_DOMAIN`: acct domain for particular instance +- Medium related routes: Open the console, copy the cookie (in theory, only uid and sid are required) + + - `MEDIUM_ARTICLE_COOKIE`: Cookie used when requesting the full article, can access the full text of paid content when there is an active Member subscription. + - `MEDIUM_COOKIE_{username}`: Cookie of the user corresponding to the username, required for personalized recommendation related routes. + - nhentai torrent: [Registration](https://nhentai.net/register/) - `NHENTAI_USERNAME`: nhentai username or email diff --git a/docs/install/README.md b/docs/install/README.md index a797e5d1cedb1e..029d0626ef96d7 100644 --- a/docs/install/README.md +++ b/docs/install/README.md @@ -783,6 +783,11 @@ RSSHub 支持使用访问密钥 / 码,白名单和黑名单三种方式进行 - `MASTODON_API_ACCESS_TOKEN`: 用户 access token, 申请应用后,在应用配置页可以看到申请者的 access token - `MASTODON_API_ACCT_DOMAIN`: 该实例本地用户 acct 标识的域名 +- Medium 相关路由:打开控制台,复制 Cookie(理论上只需要 uid 和 sid 即可) + + - `MEDIUM_ARTICLE_COOKIE`:请求全文时使用的 Cookie,存在活跃的 Member 订阅时可获取付费内容全文 + - `MEDIUM_COOKIE_{username}`:对应 username 的用户的 Cookie,个性推荐相关路由需要 + - MiniFlux 全部路由: - `MINIFLUX_INSTANCE`: 用户所用的实例,默认为 MiniFlux 官方提供的 [付费服务地址](https://reader.miniflux.app) diff --git a/lib/config.js b/lib/config.js index 82f32d87e309c9..2b693bcbc5a9f3 100644 --- a/lib/config.js +++ b/lib/config.js @@ -9,6 +9,7 @@ const calculateValue = () => { const twitter_tokens = {}; const email_config = {}; const discuz_cookies = {}; + const medium_cookies = {}; for (const name in envs) { if (name.startsWith('BILIBILI_COOKIE_')) { @@ -23,6 +24,9 @@ const calculateValue = () => { } else if (name.startsWith('DISCUZ_COOKIE_')) { const cid = name.slice(14); discuz_cookies[cid] = envs[name]; + } else if (name.startsWith('MEDIUM_COOKIE_')) { + const username = name.slice(14).toLowerCase(); + medium_cookies[username] = envs[name]; } } @@ -208,6 +212,10 @@ const calculateValue = () => { accessToken: envs.MASTODON_API_ACCESS_TOKEN, acctDomain: envs.MASTODON_API_ACCT_DOMAIN, }, + medium: { + cookies: medium_cookies, + articleCookie: envs.MEDIUM_ARTICLE_COOKIE || '', + }, miniflux: { instance: envs.MINIFLUX_INSTANCE || 'https://reader.miniflux.app', token: envs.MINIFLUX_TOKEN || '', diff --git a/lib/v2/medium/following.js b/lib/v2/medium/following.js new file mode 100644 index 00000000000000..71bd5a8f0d3cd0 --- /dev/null +++ b/lib/v2/medium/following.js @@ -0,0 +1,31 @@ +const config = require('@/config').value; + +const parseArticle = require('./parse-article.js'); +const { getFollowingFeedQuery } = require('./graphql.js'); + +module.exports = async (ctx) => { + const user = ctx.params.user; + + const cookie = config.medium.cookies[user]; + if (cookie === undefined) { + throw Error(`缺少 Medium 用户 ${user} 登录后的 Cookie 值`); + } + + const posts = await getFollowingFeedQuery(user, cookie); + ctx.state.json = posts; + + if (!posts) { + // login failed + throw Error(`Medium 用户 ${user} 的 Cookie 无效或已过期`); + } + + const urls = posts.items.map((data) => data.post.mediumUrl); + + const parsedArticles = await Promise.all(urls.map((url) => parseArticle(ctx, url))); + + ctx.state.data = { + title: `${user} Medium Following`, + link: 'https://medium.com/?feed=following', + item: parsedArticles, + }; +}; diff --git a/lib/v2/medium/for-you.js b/lib/v2/medium/for-you.js new file mode 100644 index 00000000000000..5954615775bdf3 --- /dev/null +++ b/lib/v2/medium/for-you.js @@ -0,0 +1,31 @@ +const config = require('@/config').value; + +const parseArticle = require('./parse-article.js'); +const { getWebInlineRecommendedFeedQuery } = require('./graphql.js'); + +module.exports = async (ctx) => { + const user = ctx.params.user; + + const cookie = config.medium.cookies[user]; + if (cookie === undefined) { + throw Error(`缺少 Medium 用户 ${user} 登录后的 Cookie 值`); + } + + const posts = await getWebInlineRecommendedFeedQuery(user, cookie); + ctx.state.json = posts; + + if (!posts) { + // login failed + throw Error(`Medium 用户 ${user} 的 Cookie 无效或已过期`); + } + + const urls = posts.items.map((data) => data.post.mediumUrl); + + const parsedArticles = await Promise.all(urls.map((url) => parseArticle(ctx, url))); + + ctx.state.data = { + title: `${user} Medium For You`, + link: 'https://medium.com/', + item: parsedArticles, + }; +}; diff --git a/lib/v2/medium/graphql.js b/lib/v2/medium/graphql.js new file mode 100644 index 00000000000000..75b9fcc7ecfbd0 --- /dev/null +++ b/lib/v2/medium/graphql.js @@ -0,0 +1,190 @@ +const got = require('@/utils/got'); + +async function graphqlRequest(body, cookie) { + const { data } = await got('https://medium.com/_/graphql', { + method: 'POST', + headers: { + accept: '*/*', + 'accept-language': 'en-US,en;q=0.9,zh;q=0.8,zh-CN;q=0.7', + 'apollographql-client-name': 'lite', + 'apollographql-client-version': 'main-20230505-195233-209f54c418', + 'cache-control': 'no-cache', + 'content-type': 'application/json', + 'medium-frontend-app': 'lite/main-20230505-195233-209f54c418', + 'medium-frontend-path': '/', + 'medium-frontend-route': 'homepage', + 'ot-tracer-sampled': 'true', + 'ot-tracer-spanid': '2db0b0d7263ffad8', + 'ot-tracer-traceid': '679eb621b33147c4', + pragma: 'no-cache', + 'sec-ch-ua': '"Chromium";v="113", "Not-A.Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'graphql-operation': body.operationName, + cookie, + }, + data: JSON.stringify([body]), + }); + return data[0].data; +} + +async function getFollowingFeedQuery(user, cookie, pagingLimit = 20) { + return (await graphqlRequest(newFollowingFeedQuery(pagingLimit), cookie))?.followingFeed; +} + +async function getWebInlineRecommendedFeedQuery(user, cookie, pagingLimit = 20) { + return (await graphqlRequest(newWebInlineRecommendedFeedQuery(pagingLimit), cookie))?.webRecommendedFeed; +} + +async function getWebInlineTopicFeedQuery(user, tagSlug, cookie, pagingLimit = 20) { + return (await graphqlRequest(newWebInlineTopicFeedQuery(tagSlug, pagingLimit), cookie))?.personalisedTagFeed; +} + +async function getUserCatalogMainContentQuery(user, catalogId, cookie, pagingLimit = 20) { + return (await graphqlRequest(newUserCatalogMainContentQuery(catalogId, pagingLimit), cookie))?.catalogById; +} + +module.exports = { + getWebInlineRecommendedFeedQuery, + getFollowingFeedQuery, + getWebInlineTopicFeedQuery, + getUserCatalogMainContentQuery, +}; + +function newFollowingFeedQuery(pagingLimit = 5) { + return { + operationName: 'FollowingFeedQuery', + variables: { + paging: { + limit: pagingLimit, + }, + }, + query: `query FollowingFeedQuery($paging: PagingOptions) { + followingFeed(paging: $paging) { + items { + feedId + post { + mediumUrl + __typename + } + __typename + } + pagingInfo { + next { + to + from + limit + source + __typename + } + __typename + } + __typename + } + }`, + }; +} + +function newWebInlineRecommendedFeedQuery(pagingLimit = 5) { + return { + operationName: 'WebInlineRecommendedFeedQuery', + variables: { + forceRank: true, + paging: { + limit: pagingLimit, + }, + }, + query: `query WebInlineRecommendedFeedQuery($paging: PagingOptions, $forceRank: Boolean) { + webRecommendedFeed(paging: $paging, forceRank: $forceRank) { + items { + feedId + post { + mediumUrl + __typename + } + __typename + } + pagingInfo { + next { + limit + to + source + __typename + } + __typename + } + __typename + } + }`, + }; +} + +function newWebInlineTopicFeedQuery(tagSlug, pagingLimit = 5) { + return { + operationName: 'WebInlineTopicFeedQuery', + variables: { + tagSlug, + paging: { + limit: pagingLimit, + }, + skipCache: true, + }, + query: `query WebInlineTopicFeedQuery($tagSlug: String!, $paging: PagingOptions!, $skipCache: Boolean) { + personalisedTagFeed(tagSlug: $tagSlug, paging: $paging, skipCache: $skipCache) { + items { + feedId + post { + mediumUrl + __typename + } + __typename + } + pagingInfo { + next { + source + limit + from + to + __typename + } + __typename + } + __typename + } + }`, + }; +} + +function newUserCatalogMainContentQuery(catalogId, pagingLimit = 20) { + return { + operationName: 'UserCatalogMainContentQuery', + variables: { + catalogId, + pagingOptions: { + limit: pagingLimit, + }, + }, + query: `query UserCatalogMainContentQuery($catalogId: ID!, $pagingOptions: CatalogPagingOptionsInput!) { + catalogById(catalogId: $catalogId) { + __typename + ... on Catalog { + name + itemsConnection(pagingOptions: $pagingOptions) { + items { + entity { + ... on Post { + mediumUrl + } + } + __typename + } + __typename + } + } + } + }`, + }; +} diff --git a/lib/v2/medium/list.js b/lib/v2/medium/list.js new file mode 100644 index 00000000000000..09becc04cbbb12 --- /dev/null +++ b/lib/v2/medium/list.js @@ -0,0 +1,32 @@ +const config = require('@/config').value; + +const parseArticle = require('./parse-article.js'); +const { getUserCatalogMainContentQuery } = require('./graphql.js'); + +module.exports = async (ctx) => { + const user = ctx.params.user; + const catalogId = ctx.params.catalogId; + + const cookie = config.medium.cookies[user]; + + const catalog = await getUserCatalogMainContentQuery(user, catalogId, cookie); + ctx.state.json = catalog; + + if (catalog && catalog.__typename === 'Forbidden') { + throw Error(`无权访问 id 为 ${catalogId} 的 List(可能是未设置 Cookie 或 Cookie 已过期)`); + } + if (!catalog || !catalog.itemsConnection) { + throw Error(`id 为 ${catalogId} 的 List 不存在`); + } + + const name = catalog.name; + const urls = catalog.itemsConnection.items.map((item) => item.entity.mediumUrl); + + const parsedArticles = await Promise.all(urls.map((url) => parseArticle(ctx, url))); + + ctx.state.data = { + title: `List: ${name}`, + link: `https://medium.com/@${user}/list/${catalogId}`, + item: parsedArticles, + }; +}; diff --git a/lib/v2/medium/maintainer.js b/lib/v2/medium/maintainer.js new file mode 100644 index 00000000000000..6f22b81b0c105c --- /dev/null +++ b/lib/v2/medium/maintainer.js @@ -0,0 +1,6 @@ +module.exports = { + '/following/:user': ['ImSingee'], + '/for-you/:user': ['ImSingee'], + '/list/:user/:catalogId': ['ImSingee'], + '/tag/:user/:tag': ['ImSingee'], +}; diff --git a/lib/v2/medium/parse-article.js b/lib/v2/medium/parse-article.js new file mode 100644 index 00000000000000..634abd47d22698 --- /dev/null +++ b/lib/v2/medium/parse-article.js @@ -0,0 +1,60 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); + +const config = require('@/config').value; + +async function parse(url, cookie = '') { + const { data } = await got(url, { + headers: { + cookie, + }, + }); + const $ = cheerio.load(data); + + const publishedTime = $('meta[property="article:published_time"]').attr('content'); + const author = $('meta[name="author"]').attr('content'); + + const article = $('body article'); + + // remove header actions + article.find('header').remove(); + + // get and remove title + const title = article.find('h1').first(); + const titleText = title.text(); + title.remove(); // remove title from html + + // get subtitle and remove author actions + const subtitle = article.find('.pw-subtitle-paragraph'); + const subtitleText = subtitle.text(); + if (subtitle.length !== 0) { + // preserve subtitle in html + subtitle.siblings().remove(); + } else { + // no subtitle + article.find('.pw-post-body-paragraph').siblings().first().remove(); + } + + return { + title: titleText, + subtitle: subtitleText, + author, + publishedTime, + html: article.html(), + url, + }; +} + +module.exports = function (ctx, url) { + return ctx.cache.tryGet(`medium:article:${url}`, async () => { + const { title, author, publishedTime, html } = await parse(url, config.medium.articleCookie); + + return { + title, + author, + link: url, + description: html, + pubDate: publishedTime, + }; + }); +}; diff --git a/lib/v2/medium/radar.js b/lib/v2/medium/radar.js new file mode 100644 index 00000000000000..f6cce1fad6bca6 --- /dev/null +++ b/lib/v2/medium/radar.js @@ -0,0 +1,23 @@ +module.exports = { + 'medium.com': { + _name: 'Medium', + '.': [ + { + title: '个性推荐 - For You', + docs: 'https://docs.rsshub.app/blog.html#medium-ge-xing-tui-jian-for-you', + }, + { + title: '个性推荐 - Following', + docs: 'https://docs.rsshub.app/blog.html#medium-ge-xing-tui-jian-following', + }, + { + title: '个性推荐 - Tag', + docs: 'https://docs.rsshub.app/blog.html#medium-ge-xing-tui-jian-tag', + }, + { + title: 'List', + docs: 'https://docs.rsshub.app/blog.html#medium-list', + }, + ], + }, +}; diff --git a/lib/v2/medium/router.js b/lib/v2/medium/router.js new file mode 100644 index 00000000000000..4e4112b88b5b5a --- /dev/null +++ b/lib/v2/medium/router.js @@ -0,0 +1,6 @@ +module.exports = (router) => { + router.get('/following/:user', require('./following')); + router.get('/for-you/:user', require('./for-you')); + router.get('/list/:user/:catalogId', require('./list')); + router.get('/tag/:user/:tag', require('./tag')); +}; diff --git a/lib/v2/medium/tag.js b/lib/v2/medium/tag.js new file mode 100644 index 00000000000000..c76ed60dfd2769 --- /dev/null +++ b/lib/v2/medium/tag.js @@ -0,0 +1,32 @@ +const config = require('@/config').value; + +const parseArticle = require('./parse-article.js'); +const { getWebInlineTopicFeedQuery } = require('./graphql.js'); + +module.exports = async (ctx) => { + const user = ctx.params.user; + const tag = ctx.params.tag; + + const cookie = config.medium.cookies[user]; + if (cookie === undefined) { + throw Error(`缺少 Medium 用户 ${user} 登录后的 Cookie 值`); + } + + const posts = await getWebInlineTopicFeedQuery(user, tag, cookie); + ctx.state.json = posts; + + if (!posts) { + // login failed + throw Error(`Medium 用户 ${user} 的 Cookie 无效或已过期`); + } + + const urls = posts.items.map((data) => data.post.mediumUrl); + + const parsedArticles = await Promise.all(urls.map((url) => parseArticle(ctx, url))); + + ctx.state.data = { + title: `${user} Medium Following Tag ${tag}`, + link: `https://medium.com/?tag=${tag}`, + item: parsedArticles, + }; +}; diff --git a/test/config.js b/test/config.js index 7ea6773e398cb0..f82a9ea5d03adc 100644 --- a/test/config.js +++ b/test/config.js @@ -59,6 +59,20 @@ describe('config', () => { delete process.env.DISCUZ_COOKIE_34; }); + it('medium cookie', () => { + process.env.MEDIUM_COOKIE_12 = 'cookie1'; + process.env.MEDIUM_COOKIE_34 = 'cookie2'; + + const config = require('../lib/config').value; + expect(config.medium.cookies).toMatchObject({ + 12: 'cookie1', + 34: 'cookie2', + }); + + delete process.env.MEDIUM_COOKIE_12; + delete process.env.MEDIUM_COOKIE_34; + }); + it('no random ua', () => { process.env.NO_RANDOM_UA = true;