From 7a6be9a229b822d0bb407424c82c6ffc6005b3ed Mon Sep 17 00:00:00 2001 From: Rongrong Date: Sun, 1 May 2022 21:00:29 +0800 Subject: [PATCH] feat(core): customizable Chromium executable path (#9670) * feat(core): customizable Chromium executable path also build Chromium-bundled Docker image for arm/arm64 Signed-off-by: Rongrong * chore: fix typo Signed-off-by: Rongrong * chore(CI/test): using build matrix Signed-off-by: Rongrong * docs(install): fix punctuation Signed-off-by: Rongrong --- .github/workflows/docker-release.yml | 4 +- .github/workflows/test.yml | 55 +++++++++++++++++++++++++--- Dockerfile | 31 ++++++++++++---- docker-compose.yml | 4 +- docs/en/install/README.md | 36 +++++++++++++++--- docs/install/README.md | 42 +++++++++++++++++---- lib/config.js | 1 + lib/utils/puppeteer.js | 9 ++++- 8 files changed, 151 insertions(+), 31 deletions(-) diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml index cbcd02ec7c128a..e4dfb13907e504 100644 --- a/.github/workflows/docker-release.yml +++ b/.github/workflows/docker-release.yml @@ -89,10 +89,10 @@ jobs: push: true tags: ${{ steps.meta-chromium-bundled.outputs.tags }} labels: ${{ steps.meta-chromium-bundled.outputs.labels }} - platforms: linux/amd64 # bundled Chromium is only available on amd64 + platforms: linux/amd64,linux/arm/v7,linux/arm64 cache-from: | - type=gha,scope=docker-release type=registry,ref=${{ secrets.DOCKER_USERNAME }}/rsshub:chromium-bundled + # type=gha,scope=docker-release # not needed, Docker automatically uses local cache from the builder # type=registry,ref=${{ secrets.DOCKER_USERNAME }}/rsshub:buildcache cache-to: type=inline,ref=${{ secrets.DOCKER_USERNAME }}/rsshub:chromium-bundled # inline cache is enough diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46ecbc6d51d6f1..52a696c32a6476 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,6 @@ name: test -on: [push, pull_request] +on: [ push, pull_request ] permissions: contents: read @@ -16,7 +16,7 @@ jobs: options: --entrypoint redis-server strategy: matrix: - node-version: ['14', '16'] + node-version: [ '14', '16' ] name: Jest on Node ${{ matrix.node-version }} steps: - uses: actions/checkout@v3 @@ -24,8 +24,10 @@ jobs: with: node-version: ${{ matrix.node-version }} cache: 'yarn' - - run: yarn - - run: npm run jest:coverage + - name: Install dependencies (yarn) + run: yarn + - name: Test all and generate coverage + run: npm run jest:coverage env: REDIS_URL: redis://localhost:${{ job.services.redis.ports[6379] }}/ - name: Upload coverage to Codecov @@ -34,11 +36,52 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos as documented, but seems broken + puppeteer: + runs-on: ubuntu-latest + strategy: + matrix: + node-version: [ '14', '16' ] + chromium: + - name: bundled Chromium + dependency: '' + environment: '{}' + - name: Chromium from Ubuntu + dependency: chromium-browser + environment: '{ "CHROMIUM_EXECUTABLE_PATH": "chromium-browser" }' + - name: Chrome from Google + dependency: google-chrome-stable + environment: '{ "CHROMIUM_EXECUTABLE_PATH": "google-chrome-stable" }' + name: Jest puppeteer on Node ${{ matrix.node-version }} with ${{ matrix.chromium.name }} + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + with: + node-version: ${{ matrix.node-version }} + cache: 'yarn' + - name: Install dependencies (yarn) + run: yarn + - name: Install Chromium + if: ${{ matrix.chromium.dependency != '' }} + # Chromium from Ubuntu is too old (85), but can still pass the tests + # That's not really a problem since Chromium-bundled Docker image is based on Debian bullseye, + # which updates Chromium frequently, and only on arm/arm64 the image needs Chromium from Debian. + run: | + set -ex + curl -s "https://dl.google.com/linux/linux_signing_key.pub" | gpg --dearmor | + sudo tee /etc/apt/trusted.gpg.d/google-chrome.gpg > /dev/null + echo "deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main" | + sudo tee /etc/apt/sources.list.d/google-chrome.list > /dev/null + sudo apt-get update + sudo apt-get install -yq --no-install-recommends ${{ matrix.chromium.dependency }} + - name: Test puppeteer + run: npm run jest puppeteer + env: ${{ fromJSON(matrix.chromium.environment) }} + docs: runs-on: ubuntu-latest strategy: matrix: - node-version: ['14', '16'] + node-version: [ '14', '16' ] name: Build docs on Node ${{ matrix.node-version }} steps: - uses: actions/checkout@v3 @@ -54,7 +97,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - node-version: ['14', '16'] + node-version: [ '14', '16' ] name: Build radar and maintainer on Node ${{ matrix.node-version }} steps: - uses: actions/checkout@v3 diff --git a/Dockerfile b/Dockerfile index b742a950f882e1..95968997b6a3b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -92,12 +92,14 @@ FROM node:16-bullseye-slim as chromium-downloader WORKDIR /app COPY --from=dep-version-parser /ver/.puppeteer_version /app/.puppeteer_version +ARG TARGETPLATFORM ARG USE_CHINA_NPM_REGISTRY=0 ARG PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=1 +# The official recommended way to use Puppeteer on x86(_64) is to use the bundled Chromium from Puppeteer: # https://github.com/puppeteer/puppeteer#q-why-doesnt-puppeteer-vxxx-work-with-chromium-vyyy RUN \ set -ex ; \ - if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = 0 ]; then \ + if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = 0 ] && [ "$TARGETPLATFORM" = 'linux/amd64' ]; then \ if [ "$USE_CHINA_NPM_REGISTRY" = 1 ]; then \ npm config set registry https://registry.npmmirror.com && \ yarn config set registry https://registry.npmmirror.com ; \ @@ -122,10 +124,13 @@ ENV TZ Asia/Shanghai WORKDIR /app # install deps first to avoid cache miss or disturbing buildkit to build concurrently +ARG TARGETPLATFORM ARG PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=1 # https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#chrome-headless-doesnt-launch-on-unix # https://github.com/puppeteer/puppeteer/issues/7822 # https://www.debian.org/releases/bullseye/amd64/release-notes/ch-information.en.html#noteworthy-obsolete-packages +# The official recommended way to use Puppeteer on arm/arm64 is to install Chromium from the distribution repositories: +# https://github.com/puppeteer/puppeteer/blob/94cb08c85955c0688d12b6ed10e61a4581a01280/src/node/BrowserFetcher.ts#L116-L119 RUN \ set -ex && \ apt-get update && \ @@ -133,12 +138,19 @@ RUN \ dumb-init \ ; \ if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = 0 ]; then \ - apt-get install -yq --no-install-recommends \ - ca-certificates fonts-liberation wget xdg-utils \ - libasound2 libatk-bridge2.0-0 libatk1.0-0 libatspi2.0-0 libcairo2 libcups2 libdbus-1-3 libdrm2 libexpat1 \ - libgbm1 libglib2.0-0 libnspr4 libnss3 libpango-1.0-0 libx11-6 libxcb1 libxcomposite1 libxdamage1 libxext6 \ - libxfixes3 libxkbcommon0 libxrandr2 \ - ; \ + if [ "$TARGETPLATFORM" = 'linux/amd64' ]; then \ + apt-get install -yq --no-install-recommends \ + ca-certificates fonts-liberation wget xdg-utils \ + libasound2 libatk-bridge2.0-0 libatk1.0-0 libatspi2.0-0 libcairo2 libcups2 libdbus-1-3 libdrm2 \ + libexpat1 libgbm1 libglib2.0-0 libnspr4 libnss3 libpango-1.0-0 libx11-6 libxcb1 libxcomposite1 \ + libxdamage1 libxext6 libxfixes3 libxkbcommon0 libxrandr2 \ + ; \ + else \ + apt-get install -yq --no-install-recommends \ + chromium \ + && \ + echo 'CHROMIUM_EXECUTABLE_PATH=chromium' | tee /app/.env ; \ + fi; \ fi; \ rm -rf /var/lib/apt/lists/* @@ -147,7 +159,7 @@ COPY --from=chromium-downloader /app/node_modules/puppeteer /app/node_modules/pu # if grep matches nothing then it will exit with 1, thus, we cannot `set -e` here RUN \ set -x && \ - if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = 0 ]; then \ + if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = 0 ] && [ "$TARGETPLATFORM" = 'linux/amd64' ]; then \ echo 'Verifying Chromium installation...' && \ ldd $(find /app/node_modules/puppeteer/ -name chrome) | grep "not found" ; \ if [ "$?" = 0 ]; then \ @@ -170,6 +182,7 @@ CMD ["npm", "run", "start"] # In case Chromium has unmet shared libs, here is some magic to find and install the packages they belong to: # In most case you can just stop at `grep ^lib` and add those packages to the above stage. # +# set -ex && \ # apt-get update && \ # apt install -yq --no-install-recommends \ # apt-file \ @@ -183,3 +196,5 @@ CMD ["npm", "run", "start"] # apt purge -yq --auto-remove \ # apt-file \ # rm -rf /tmp/.chromium_path /var/lib/apt/lists/* + +# !!! If you manually build Docker image but with buildx/BuildKit disabled, set TARGETPLATFORM yourself !!! diff --git a/docker-compose.yml b/docker-compose.yml index 8e475dfcf4074d..41fe95a71e0409 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,8 +3,8 @@ version: '3' services: rsshub: # two ways to enable puppeteer: - # * (only on amd64/x86_64) comment out marked lines, then use this image instead: diygod/rsshub:chromium-bundled - # * (on all supported architectures, but consumes more disk space and memory) leave anything unchanged + # * comment out marked lines, then use this image instead: diygod/rsshub:chromium-bundled + # * (consumes more disk space and memory) leave everything unchanged image: diygod/rsshub restart: always ports: diff --git a/docs/en/install/README.md b/docs/en/install/README.md index 3c75b9e08285cf..d70b3f79c8c8ad 100644 --- a/docs/en/install/README.md +++ b/docs/en/install/README.md @@ -33,9 +33,9 @@ $ docker pull diygod/rsshub:2021-06-18 You can back to the latest version when the code has been fixed and rebuild the image. -If you need to enable puppeteer on x86_64 (amd64), `diygod/rsshub:chromium-bundled` is a good choice. If date specified, it will become: `diygod/rsshub:chromium-bundled-2021-06-18`. +To enable puppeteer, `diygod/rsshub:chromium-bundled` is a good choice. If date specified, it will become: `diygod/rsshub:chromium-bundled-2021-06-18`. -On all supported architectures, to enable puppeteer, using Docker Compose is another good choice. However, it consumes more disk space and memory. By modifiying the `docker-compose.yml` file, you can use `diygod/rsshub:chromium-bundled` instead to reduce the disk space and memory consumption. +Another approach to enable puppeteer is deploying with Docker Compose. However, it consumes more disk space and memory. By modifying `docker-compose.yml`, you can use `diygod/rsshub:chromium-bundled` instead to reduce the disk space and memory consumption. ## Docker Compose Deployment @@ -87,7 +87,11 @@ Edit `environment` in [docker-compose.yml](https://github.com/DIYgod/RSSHub/blob ## Docker Deployment -> **To enable puppeteer, replace `diygod/rsshub` with `diygod/rsshub:chromium-bundled` in EACH command (only on x86_64)** +::: tip Tip + +To enable puppeteer, replace `diygod/rsshub` with `diygod/rsshub:chromium-bundled` in **EACH** command. + +::: ### Install @@ -132,7 +136,7 @@ For example, adding `-e CACHE_EXPIRE=3600` will set the cache time to 1 hour. $ docker run -d --name rsshub -p 1200:1200 -e CACHE_EXPIRE=3600 -e GITHUB_ACCESS_TOKEN=example diygod/rsshub ``` -This deployment method does not include puppeteer (unless using `diygod/rsshub:chromium-bundled` instead on x86_64) and Redis dependencies. Use the Docker Compose deployment method or deploy external dependencies yourself if you need it. +This deployment method does not include puppeteer (unless using `diygod/rsshub:chromium-bundled` instead) and Redis dependencies. Use the Docker Compose deployment method or deploy external dependencies yourself if you need it. To configure more options please refer to [Configuration](#configuration). @@ -218,6 +222,26 @@ Refer to our [Guide](https://docs.rsshub.app/en/) for usage. Replace `https://rs ### Configuration +::: tip Tip + +On arm/arm64, this deployment method does not include puppeteer dependencies. To enable puppeteer, install Chromium from your distribution repositories first, then set `CHROMIUM_EXECUTABLE_PATH` to its executable path. + +Debian: +```bash +$ apt install chroium +$ echo >> .env +$ echo 'CHROMIUM_EXECUTABLE_PATH=chromium' >> .env +``` + +Ubuntu/Raspbian: +```bash +$ apt install chromium-browser +$ echo >> .env +$ echo 'CHROMIUM_EXECUTABLE_PATH=chromium-browser' >> .env +``` + +::: + RSSHub can be configured by setting environment variables. Create a `.env` file in the root directory of your project. Add environment-specific variables on new lines in the form of `NAME=VALUE`. For example: @@ -229,7 +253,7 @@ CACHE_EXPIRE=600 Please notice that it will not override already existed environment variables, more rules please refer to [dotenv](https://github.com/motdotla/dotenv) -This deployment method does not include puppeteer and Redis dependencies. Use the Docker Compose deployment method or deploy external dependencies yourself if you need it. +This deployment method does not include Redis dependencies. Use the Docker Compose deployment method or deploy external dependencies yourself if you need it. To configure more options please refer to [Configuration](#configuration). @@ -511,6 +535,8 @@ See the relation between access key/code and white/blacklisting. `PUPPETEER_WS_ENDPOINT`: browser WebSocket endpoint which can be used as an argument to puppeteer.connect, refer to [browserWSEndpoint](https://pptr.dev/#?product=Puppeteer&show=api-browserwsendpoint) +`CHROMIUM_EXECUTABLE_PATH`: path to the Chromium (or Chrome) executable. If puppeteer is not bundled with Chromium (manually skipped downloading or system architecture is arm/arm64), configuring this can effectively enable puppeteer. Or alternatively, if you prefer Chrome to Chromium, this configuration will help. **WARNING**: only effective when `PUPPETEER_WS_ENDPOINT` is not set; only useful for manual deployment, for Docker, please use the `chromium-bundled` image instead. + `TITLE_LENGTH_LIMIT`: limit the length of feed title generated in bytes, an English alphabet counts as 1 byte, the rest such as Chinese, Japanese, Korean or Arabic counts as 2 bytes by design, default to `150` ### Route-specific Configurations diff --git a/docs/install/README.md b/docs/install/README.md index f0c0ff6663735a..4d21343af5275b 100644 --- a/docs/install/README.md +++ b/docs/install/README.md @@ -33,9 +33,9 @@ $ docker pull diygod/rsshub:2021-06-18 待最新镜像更新后再切换回 `diygod/rsshub:latest` 最新版镜像。 -如需在 x86\_64 (amd64) 架构上部署启用 puppeteer 的版本,可使用 `diygod/rsshub:chromium-bundled`,若指定日期则为 `diygod/rsshub:chromium-bundled-2021-06-18`。 +如需启用 puppeteer,可使用 `diygod/rsshub:chromium-bundled`;若指定日期则为 `diygod/rsshub:chromium-bundled-2021-06-18`。 -在所有受支持的架构上,均可使用 Docker Compose 部署以启用 puppeteer,但更消耗磁盘空间和内存。通过修改 `docker-compose.yml` 文件,也可以使用 `diygod/rsshub:chromium-bundled`,这样就没有更消耗资源的问题了。 +亦可使用 Docker Compose 部署以启用 puppeteer,但更消耗磁盘空间和内存。通过修改 `docker-compose.yml`,也可以使用 `diygod/rsshub:chromium-bundled`,这样就没有更消耗资源的问题了。 ## Docker Compose 部署 @@ -87,7 +87,11 @@ $ docker pull diygod/rsshub ## Docker 部署 -> **如需启用 puppeteer,请在每条命令中均将 `diygod/rsshub` 替换为 `diygod/rsshub:chromium-bundled` (仅限 x86\_64 架构)** +::: tip 提示 + +如需启用 puppeteer,请在**每条**命令中均将 `diygod/rsshub` 替换为 `diygod/rsshub:chromium-bundled`。 + +::: ### 安装 @@ -132,7 +136,7 @@ $ docker rm rsshub $ docker run -d --name rsshub -p 1200:1200 -e CACHE_EXPIRE=3600 -e GITHUB_ACCESS_TOKEN=example diygod/rsshub ``` -该部署方式不包括 puppeteer (除非在 x86\_64 架构上改用 `diygod/rsshub:chromium-bundled`) 和 redis 依赖,如有需要请改用 Docker Compose 部署方式或自行部署外部依赖 +该部署方式不包括 puppeteer(除非改用 `diygod/rsshub:chromium-bundled`)和 redis 依赖,如有需要请改用 Docker Compose 部署方式或自行部署外部依赖 更多配置项请看 [#配置](#pei-zhi) @@ -220,18 +224,40 @@ $ pm2 start lib/index.js --name rsshub ### 添加配置 +::: tip 提示 + +在 arm/arm64 上,此部署方式不包含 puppeteer 依赖。要启用 puppeteer,你需要先从发行版安装 Chromium,然后设置 `CHROMIUM_EXECUTABLE_PATH` 为其可执行路径。 + +Debian: + +```bash +$ apt install chroium +$ echo >> .env +$ echo 'CHROMIUM_EXECUTABLE_PATH=chromium' >> .env +``` + +Ubuntu/Raspbian: + +```bash +$ apt install chromium-browser +$ echo >> .env +$ echo 'CHROMIUM_EXECUTABLE_PATH=chromium-browser' >> .env +``` + +::: + 可以通过设置环境变量来配置 RSSHub 在项目根目录新建一个 `.env` 文件,每行以 `NAME=VALUE` 格式添加环境变量,例如 ```env - CACHE_TYPE=redis - CACHE_EXPIRE=600 +CACHE_TYPE=redis +CACHE_EXPIRE=600 ``` 注意它不会覆盖已有的环境变量,更多规则请参考 [dotenv](https://github.com/motdotla/dotenv) -该部署方式不包括 puppeteer 和 redis 依赖,如有需要请改用 Docker Compose 部署方式或自行部署外部依赖 +该部署方式不包括 redis 依赖,如有需要请改用 Docker Compose 部署方式或自行部署外部依赖 更多配置项请看 [#配置](#pei-zhi) @@ -515,6 +541,8 @@ RSSHub 支持使用访问密钥 / 码,白名单和黑名单三种方式进行 `PUPPETEER_WS_ENDPOINT`: 用于 puppeteer.connect 的浏览器 websocket 链接,见 [browserWSEndpoint](https://zhaoqize.github.io/puppeteer-api-zh_CN/#?product=Puppeteer\&show=api-browserwsendpoint) +`CHROMIUM_EXECUTABLE_PATH`: Chromium(或 Chrome)的可执行路径。若 puppeteer 没有下载捆绑的 Chromium(主动跳过下载或体系架构为 arm/arm64),设置此项可启用 puppeteer。或者,偏好 Chrome 而不是 Chromium 时,此项也很有用。**注意**:`PUPPETEER_WS_ENDPOINT` 被设置时,此项不生效;仅在手动部署时有用,对于 Docker 部署,请改用 `chromium-bundled` 版本镜像。 + `TITLE_LENGTH_LIMIT`: 限制输出标题的字节长度,一个英文字符的长度为 1 字节,部分语言如中文,日文,韩文或阿拉伯文等,统一算作 2 字节,默认 `150` ### 部分 RSS 模块配置 diff --git a/lib/config.js b/lib/config.js index de73c5e73d6943..f61b34df016cb7 100644 --- a/lib/config.js +++ b/lib/config.js @@ -32,6 +32,7 @@ const calculateValue = () => { isPackage: envs.IS_PACKAGE, nodeName: envs.NODE_NAME, puppeteerWSEndpoint: envs.PUPPETEER_WS_ENDPOINT, + chromiumExecutablePath: envs.CHROMIUM_EXECUTABLE_PATH, // network connect: { port: envs.PORT || 1200, // 监听端口 diff --git a/lib/utils/puppeteer.js b/lib/utils/puppeteer.js index 34a55bf434256c..d7cf779f767072 100644 --- a/lib/utils/puppeteer.js +++ b/lib/utils/puppeteer.js @@ -14,7 +14,14 @@ module.exports = async () => { browserWSEndpoint: config.puppeteerWSEndpoint, }); } else { - browser = await puppeteer.launch(options); + browser = await puppeteer.launch( + config.chromiumExecutablePath + ? { + executablePath: config.chromiumExecutablePath, + ...options, + } + : options + ); } setTimeout(() => { browser.close();