feat(parameter): Add summary by ChatGPT (#13611)

* feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * feat: 添加使用 ChatGPT 进行总结的功能 * style: camelCase ---------
DIYgod · Nov 6, 2023 · e594309 · e594309
1 parent f421736
commit e594309
Show file tree

Hide file tree

Showing 10 changed files with 187 additions and 0 deletions.
diff --git a/lib/config.js b/lib/config.js
@@ -112,6 +112,14 @@ const calculateValue = () => {
         },
         suffix: envs.SUFFIX,
         titleLengthLimit: parseInt(envs.TITLE_LENGTH_LIMIT) || 150,
+        openai: {
+            apiKey: envs.OPENAI_API_KEY,
+            model: envs.OPENAI_MODEL || 'gpt-3.5-turbo-16k',
+            temperature: envs.OPENAI_TEMPERATURE || 0.2,
+            maxTokens: envs.OPENAI_MAX_TOKENS || null,
+            endpoint: envs.OPENAI_API_ENDPOINT || 'https://api.openai.com/v1',
+            prompt: envs.OPENAI_PROMPT || 'Please summarize the following article and reply with markdown format.',
+        },
 
         // Route-specific Configurations
         bilibili: {

diff --git a/lib/middleware/parameter.js b/lib/middleware/parameter.js
@@ -4,6 +4,10 @@ const { simplecc } = require('simplecc-wasm');
 const got = require('@/utils/got');
 const config = require('@/config').value;
 const { RE2JS } = require('re2js');
+const md = require('markdown-it')({
+    html: true,
+});
+const htmlToText = require('html-to-text');
 
 let mercury_parser;
 
@@ -23,6 +27,26 @@ const resolveRelativeLink = ($, elem, attr, baseUrl) => {
     }
 };
 
+const summarizeArticle = async (articleText) => {
+    const apiUrl = `${config.openai.endpoint}/chat/completions`;
+    const response = await got.post(apiUrl, {
+        json: {
+            model: config.openai.model,
+            max_tokens: config.openai.maxTokens,
+            messages: [
+                { role: 'system', content: config.openai.prompt },
+                { role: 'user', content: articleText },
+            ],
+            temperature: config.openai.temperature,
+        },
+        headers: {
+            Authorization: `Bearer ${config.openai.apiKey}`,
+        },
+    });
+
+    return response.data.choices[0].message.content;
+};
+
 module.exports = async (ctx, next) => {
     await next();
 
@@ -286,6 +310,33 @@ module.exports = async (ctx, next) => {
                 await Promise.all(tasks);
             }
 
+            // openai
+            if (ctx.query.chatgpt && config.openai.apiKey) {
+                ctx.state.data.item = await Promise.all(
+                    ctx.state.data.item.map(async (item) => {
+                        if (item.description) {
+                            try {
+                                const summary = await ctx.cache.tryGet(`openai:${item.link}`, async () => {
+                                    const text = htmlToText.htmlToText(item.description);
+                                    if (text.length < 300) {
+                                        return '';
+                                    }
+                                    const summary_md = await summarizeArticle(text);
+                                    return md.render(summary_md);
+                                });
+                                // 将总结结果添加到文章数据中
+                                if (summary !== '') {
+                                    item.description = summary + '<hr/><br/>' + item.description;
+                                }
+                            } catch (e) {
+                                // when openai failed, return default description and not write cache
+                            }
+                        }
+                        return item;
+                    })
+                );
+            }
+
             // scihub
             if (ctx.query.scihub) {
                 ctx.state.data.item.map((item) => {

diff --git a/lib/v2/test/index.js b/lib/v2/test/index.js
@@ -231,6 +231,43 @@ module.exports = async (ctx) => {
                 author: null,
             }
         );
+    } else if (ctx.params.id === 'gpt') {
+        item.push(
+            {
+                title: 'Title0',
+                description: 'Description0',
+                pubDate: new Date(`2019-3-1`).toUTCString(),
+                link: 'https://github.com/DIYgod/RSSHub/issues/0',
+            },
+            {
+                title: 'Title1',
+                description:
+                    '快速开始\n' +
+                    '如果您在使用 RSSHub 过程中遇到了问题或者有建议改进，我们很乐意听取您的意见！您可以通过 Pull Request 来提交您的修改。无论您对 Pull Request 的使用是否熟悉，我们都欢迎不同经验水平的开发者参与贡献。如果您不懂编程，也可以通过 报告错误 的方式来帮助我们。\n' +
+                    '\n' +
+                    '参与讨论\n' +
+                    'Telegram 群组 GitHub Issues GitHub 讨论\n' +
+                    '\n' +
+                    '开始之前\n' +
+                    '要制作一个 RSS 订阅，您需要结合使用 Git、HTML、JavaScript、jQuery 和 Node.js。\n' +
+                    '\n' +
+                    '如果您对它们不是很了解，但想要学习它们，以下是一些好的资源：\n' +
+                    '\n' +
+                    'MDN Web Docs 上的 JavaScript 指南\n' +
+                    'W3Schools\n' +
+                    'Codecademy 上的 Git 课程\n' +
+                    '如果您想查看其他开发人员如何使用这些技术来制作 RSS 订阅的示例，您可以查看 我们的代码库 中的一些代码。\n' +
+                    '\n' +
+                    '提交新的 RSSHub 规则\n' +
+                    '如果您发现一个网站没有提供 RSS 订阅，您可以使用 RSSHub 制作一个 RSS 规则。RSS 规则是一个短小的 Node.js 程序代码（以下简称 “路由”），它告诉 RSSHub 如何从网站中提取内容并生成 RSS 订阅。通过制作新的 RSS 路由，您可以帮助让您喜爱的网站的内容被更容易访问和关注。\n' +
+                    '\n' +
+                    '在您开始编写 RSS 路由之前，请确保源站点没有提供 RSS。一些网页会在 HTML 头部中包含一个 type 为 application/atom+xml 或 application/rss+xml 的 link 元素来指示 RSS 链接。\n' +
+                    '\n' +
+                    '这是在 HTML 头部中看到 RSS 链接可能会长成这样：<link rel="alternate" type="application/rss+xml" href="http://example.com/rss.xml" />。如果您看到这样的链接，这意味着这个网站已经有了一个 RSS 订阅，您不需要为它制作一个新的 RSS 路由。',
+                pubDate: new Date(`2019-3-1`).toUTCString(),
+                link: 'https://github.com/DIYgod/RSSHub/issues/1',
+            }
+        );
     }
 
     for (let i = 1; i < 6; i++) {

diff --git a/package.json b/package.json
@@ -104,6 +104,7 @@
     "git-rev-sync": "3.0.2",
     "googleapis": "128.0.0",
     "got": "11.8.6",
+    "html-to-text": "9.0.5",
     "https-proxy-agent": "7.0.2",
     "iconv-lite": "0.6.3",
     "imapflow": "1.0.147",

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/test/middleware/parameter.js b/test/middleware/parameter.js
@@ -5,6 +5,8 @@ const request = supertest(server);
 const Parser = require('rss-parser');
 const parser = new Parser();
 const config = require('../../lib/config').value;
+const got = require('../../lib/utils/got');
+jest.mock('../../lib/utils/got');
 
 afterAll(() => {
     server.close();
@@ -413,3 +415,36 @@ describe('multi parameter', () => {
         expect(parsed.items[1].title).toBe('Title3');
     });
 });
+
+describe('openai', () => {
+    it(`chatgpt`, async () => {
+        config.openai.apiKey = 'sk-1234567890';
+        // 模拟 openai 请求的响应
+        const openaiResponse = {
+            data: {
+                choices: [
+                    {
+                        message: {
+                            content: 'Summary of the article.',
+                        },
+                    },
+                ],
+            },
+        };
+
+        got.post.mockResolvedValue(openaiResponse);
+        const responseWithGpt = await request.get('/test/gpt?chatgpt=true');
+        const responseNormal = await request.get('/test/gpt');
+
+        expect(responseWithGpt.status).toBe(200);
+        expect(responseNormal.status).toBe(200);
+
+        const parsedGpt = await parser.parseString(responseWithGpt.text);
+        const parsedNormal = await parser.parseString(responseNormal.text);
+
+        expect(parsedGpt.items[0].content).not.toBe(undefined);
+        expect(parsedGpt.items[0].content).toBe(parsedNormal.items[0].content);
+        expect(parsedGpt.items[1].content).not.toBe(undefined);
+        expect(parsedGpt.items[1].content).not.toBe(parsedNormal.items[1].content);
+    });
+});
diff --git a/website/docs/install/README.md b/website/docs/install/README.md
@@ -739,6 +739,18 @@ Configs in this sections are in beta stage, and **are turn off by default**. Ple
 
 `TITLE_LENGTH_LIMIT`: limit the length of feed title generated in bytes, an English alphabet counts as 1 byte, the rest such as Chinese, Japanese, Korean or Arabic counts as 2 bytes by design, default to `150`
 
+`OPENAI_API_KEY`: OpenAI API Key, used for using ChatGPT to summarize articles
+
+`OPENAI_MODEL`: OpenAI model name, used for using ChatGPT to summarize articles, default to `gpt-3.5-turbo-16k`, see [OpenAI API reference](https://platform.openai.com/docs/models/overview) for details
+
+`OPENAI_TEMPERATURE`: OpenAI temperature parameter, used for using ChatGPT to summarize articles, default to `0.2`, see [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) for details
+
+`OPENAI_MAX_TOKENS`: OpenAI maximum token number, used for using ChatGPT to summarize articles, default to `null`, see [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens) for details
+
+`OPENAI_API_ENDPOINT`: OpenAI API URL, used for using ChatGPT to summarize articles, default to `https://api.openai.com/v1`, see [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat) for details
+
+`OPENAI_PROMPT`: OpenAI prompt, used for using ChatGPT to summarize articles, see [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat) for details
+
 ### Route-specific Configurations
 
 :::tip

diff --git a/website/docs/parameter.md b/website/docs/parameter.md
@@ -171,3 +171,17 @@ Set the parameter `brief` to generate a brief pure-text introduction with a limi
 For example：
 
 -   Brief introduction with 100 characters: `?brief=100`
+
+## Summarized by ChatGPT (Self-hosted)
+
+Set the parameter `chatgpt` to generate a summary by ChatGPT. See [Install](/install#other-application-configurations) for details. Please consider the necessity of this feature, because it will consume some tokens.
+
+-   `chatgpt`: set to any value
+
+Requirements：
+
+-   `OPENAI_API_KEY` environment variable has been set
+
+For example：
+
+-   `/meituan/tech/home?chatgpt=true`
diff --git a/website/i18n/zh/docusaurus-plugin-content-docs/current/install/README.md b/website/i18n/zh/docusaurus-plugin-content-docs/current/install/README.md
@@ -734,6 +734,18 @@ RSSHub 支持使用访问密钥 / 码，允许清单和拒绝清单三种方式
 
 `TITLE_LENGTH_LIMIT`: 限制输出标题的字节长度，一个英文字符的长度为 1 字节，部分语言如中文，日文，韩文或阿拉伯文等，统一算作 2 字节，默认 `150`
 
+`OPENAI_API_KEY`: OpenAI API Key，用于使用 ChatGPT 总结文章
+
+`OPENAI_MODEL`: OpenAI 模型名称，用于使用 ChatGPT 总结文章，默认`gpt-3.5-turbo-16k`，详见 [OpenAI API 文档](https://platform.openai.com/docs/models/overview)
+
+`OPENAI_TEMPERATURE`: OpenAI 温度参数，用于使用 ChatGPT 总结文章，默认`0.2`，详见 [OpenAI API 文档](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature)
+
+`OPENAI_MAX_TOKENS`: OpenAI 最大 token 数，用于使用 ChatGPT 总结文章，默认`null`，详见 [OpenAI API 文档](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens)
+
+`OPENAI_API_ENDPOINT`: OpenAI API 地址，用于使用 ChatGPT 总结文章，默认`https://api.openai.com/v1`，详见 [OpenAI API 文档](https://platform.openai.com/docs/api-reference/chat)
+
+`OPENAI_PROMPT`: OpenAI 提示语，用于使用 ChatGPT 总结文章，详见 [OpenAI API 文档](https://platform.openai.com/docs/api-reference/chat)
+
 ### 部分 RSS 模块配置
 
 :::tip

diff --git a/website/i18n/zh/docusaurus-plugin-content-docs/current/parameter.md b/website/i18n/zh/docusaurus-plugin-content-docs/current/parameter.md
@@ -193,3 +193,17 @@ RSSHub 同时支持 RSS 2.0、Atom 和 JSON Feed 输出格式，在路由末尾
 举例：
 
 -   输出 100 字简讯: `?brief=100`
+
+## 输出 GPT 总结的内容（仅自建）
+
+可以使用 `chatgpt` 参数输出 GPT 总结的内容，详细配置请见[部署](/zh/install#other-application-configurations)。请考虑是否有必要开启，因为这会消耗一些 tokens
+
+-   `chatgpt`: 任意值开启
+
+要求：
+
+-   已设置 `OPENAI_API_KEY` 环境变量
+
+举例：
+
+-   `/meituan/tech/home?chatgpt=true`