From 9329a44c80a37b9aec18a233c9e3bdb3f1c7621d Mon Sep 17 00:00:00 2001 From: DIYgod Date: Tue, 6 Sep 2022 17:07:04 +0100 Subject: [PATCH] feat(core): support proxy config for pupperteer (#10714) * feat: support proxy config for pupperteer * test: add puppeteer proxy detection * fix: package.json * test: fix regex --- docs/en/install/README.md | 2 +- lib/utils/puppeteer.js | 11 +++++++++++ lib/utils/request-wrapper.js | 10 ++++++++-- package.json | 1 + test/utils/puppeteer.js | 26 ++++++++++++++++++++++++++ yarn.lock | 9 ++++++++- 6 files changed, 55 insertions(+), 4 deletions(-) diff --git a/docs/en/install/README.md b/docs/en/install/README.md index 07b6c6405d3b06..ae38b3be96a38e 100644 --- a/docs/en/install/README.md +++ b/docs/en/install/README.md @@ -574,7 +574,7 @@ Configs in this sections are in beta stage, and are turn off by default. Please `NODE_NAME`: node name, used for load balancing, identify the current node -`PUPPETEER_WS_ENDPOINT`: browser WebSocket endpoint which can be used as an argument to puppeteer.connect, refer to [browserWSEndpoint](https://pptr.dev/#?product=Puppeteer&show=api-browserwsendpoint) +`PUPPETEER_WS_ENDPOINT`: browser WebSocket endpoint which can be used as an argument to puppeteer.connect, refer to [browserWSEndpoint](https://pptr.dev/api/puppeteer.browser.wsendpoint) `CHROMIUM_EXECUTABLE_PATH`: path to the Chromium (or Chrome) executable. If puppeteer is not bundled with Chromium (manually skipped downloading or system architecture is arm/arm64), configuring this can effectively enable puppeteer. Or alternatively, if you prefer Chrome to Chromium, this configuration will help. **WARNING**: only effective when `PUPPETEER_WS_ENDPOINT` is not set; only useful for manual deployment, for Docker, please use the `chromium-bundled` image instead. diff --git a/lib/utils/puppeteer.js b/lib/utils/puppeteer.js index f80184c0dc1f67..1f45868665ec4b 100644 --- a/lib/utils/puppeteer.js +++ b/lib/utils/puppeteer.js @@ -1,5 +1,6 @@ const config = require('@/config').value; let puppeteer = require('puppeteer'); +const proxyChain = require('proxy-chain'); const options = { args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-infobars', '--window-position=0,0', '--ignore-certificate-errors', '--ignore-certificate-errors-spki-list', `--user-agent=${config.ua}`], @@ -7,6 +8,13 @@ const options = { ignoreHTTPSErrors: true, }; +let proxyUri; +if (config.proxyUri && typeof config.proxyUri === 'string') { + proxyUri = config.proxyUri; +} else if (config.proxy && config.proxy.protocol && config.proxy.host && config.proxy.port) { + proxyUri = `${config.proxy.protocol}://${config.proxy.host}:${config.proxy.port}`; +} + /** * @param {Object} extraOptions * @param {boolean} extraOptions.stealth - Use puppeteer-extra-plugin-stealth @@ -41,6 +49,9 @@ module.exports = async (extraOptions = {}) => { puppeteer.use(require('puppeteer-extra-plugin-stealth')()); } let browser; + if (proxyUri) { + options.args.push(`--proxy-server=${await proxyChain.anonymizeProxy(proxyUri)}`); + } if (config.puppeteerWSEndpoint) { browser = await puppeteer.connect({ browserWSEndpoint: config.puppeteerWSEndpoint, diff --git a/lib/utils/request-wrapper.js b/lib/utils/request-wrapper.js index 0e06e7a4b8e671..0c56de464cf035 100644 --- a/lib/utils/request-wrapper.js +++ b/lib/utils/request-wrapper.js @@ -73,11 +73,17 @@ const requestWrapper = (url, options) => { if (agent && new RegExp(config.proxy.url_regex).test(url)) { let agentResult; try { - agentResult = agent[(options.protocol || url.match(/(https?:)/)[1]).slice(0, -1)]; + agentResult = agent[(options.protocol || url.match(/(^https?:)/)[1]).slice(0, -1)]; } catch (error) { agentResult = null; } - options.agent = agentResult; + try { + if (new URL(url).host !== new URL(config.proxyUri).host) { + options.agent = agentResult; + } + } catch (error) { + options.agent = agentResult; + } if (config.proxy.auth) { if (!options.headers) { diff --git a/package.json b/package.json index 8d725bc71e5f49..561956ae2a28e4 100644 --- a/package.json +++ b/package.json @@ -121,6 +121,7 @@ "parse-torrent": "9.1.5", "pidusage": "3.0.0", "plist": "3.0.6", + "proxy-chain": "2.0.7", "puppeteer": "17.1.1", "puppeteer-extra": "3.3.4", "puppeteer-extra-plugin-stealth": "2.11.1", diff --git a/test/utils/puppeteer.js b/test/utils/puppeteer.js index 57882b5a51f01c..ecdc8f83363f3c 100644 --- a/test/utils/puppeteer.js +++ b/test/utils/puppeteer.js @@ -12,6 +12,12 @@ afterEach(() => { browser.close(); browser = null; } + delete process.env.PROXY_URI; + delete process.env.PROXY_PROTOCOL; + delete process.env.PROXY_HOST; + delete process.env.PROXY_PORT; + delete process.env.PROXY_AUTH; + jest.resetModules(); }); @@ -73,4 +79,24 @@ describe('puppeteer', () => { expect(webDriverTest).toBe('missing (passed)'); expect(chromeTest).toBe('present (passed)'); }, 10000); + + it('puppeteer accept proxy uri', async () => { + process.env.PROXY_URI = 'http://user:pass@rsshub.proxy:2333'; + + puppeteer = require('../../lib/utils/puppeteer'); + browser = await puppeteer(); + + expect(browser.process().spawnargs.some((arg) => /^--proxy-server=http:\/\/.*$/.test(arg))).toBe(true); + }); + + it('puppeteer accept proxy', async () => { + process.env.PROXY_PROTOCOL = 'http'; + process.env.PROXY_HOST = 'rsshub.proxy'; + process.env.PROXY_PORT = '2333'; + + puppeteer = require('../../lib/utils/puppeteer'); + browser = await puppeteer(); + + expect(browser.process().spawnargs.some((arg) => /^--proxy-server=http:\/\/.*$/.test(arg))).toBe(true); + }, 10000); }); diff --git a/yarn.lock b/yarn.lock index ddebad79074d6a..83af22771bc8c9 100644 --- a/yarn.lock +++ b/yarn.lock @@ -11475,6 +11475,13 @@ proxy-agent@^4.0.0: proxy-from-env "^1.0.0" socks-proxy-agent "^5.0.0" +proxy-chain@2.0.7: + version "2.0.7" + resolved "https://registry.yarnpkg.com/proxy-chain/-/proxy-chain-2.0.7.tgz#ce368e7783ddd204231b284874323af32270cbe7" + integrity sha512-JYoszbLmEWkjltnzpe7XImWqTLBs1mIwYtObznqm3DF8z1P33iLvZKHrv2Guh+FwRcmjxOiJUzjrjpOMGvEk2Q== + dependencies: + tslib "^2.3.1" + proxy-from-env@1.1.0, proxy-from-env@^1.0.0: version "1.1.0" resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2" @@ -13610,7 +13617,7 @@ tslib@^1.9.0, tslib@^1.9.3: resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== -tslib@^2.0.1: +tslib@^2.0.1, tslib@^2.3.1: version "2.4.0" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.4.0.tgz#7cecaa7f073ce680a05847aa77be941098f36dc3" integrity sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ==