Skip to content

Commit

Permalink
feat(core): support proxy config for pupperteer (DIYgod#10714)
Browse files Browse the repository at this point in the history
* feat: support proxy config for pupperteer

* test: add puppeteer proxy detection

* fix: package.json

* test: fix regex
  • Loading branch information
DIYgod authored Sep 6, 2022
1 parent 57b4669 commit 9329a44
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 4 deletions.
2 changes: 1 addition & 1 deletion docs/en/install/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ Configs in this sections are in beta stage, and are turn off by default. Please

`NODE_NAME`: node name, used for load balancing, identify the current node

`PUPPETEER_WS_ENDPOINT`: browser WebSocket endpoint which can be used as an argument to puppeteer.connect, refer to [browserWSEndpoint](https://pptr.dev/#?product=Puppeteer&show=api-browserwsendpoint)
`PUPPETEER_WS_ENDPOINT`: browser WebSocket endpoint which can be used as an argument to puppeteer.connect, refer to [browserWSEndpoint](https://pptr.dev/api/puppeteer.browser.wsendpoint)

`CHROMIUM_EXECUTABLE_PATH`: path to the Chromium (or Chrome) executable. If puppeteer is not bundled with Chromium (manually skipped downloading or system architecture is arm/arm64), configuring this can effectively enable puppeteer. Or alternatively, if you prefer Chrome to Chromium, this configuration will help. **WARNING**: only effective when `PUPPETEER_WS_ENDPOINT` is not set; only useful for manual deployment, for Docker, please use the `chromium-bundled` image instead.

Expand Down
11 changes: 11 additions & 0 deletions lib/utils/puppeteer.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
const config = require('@/config').value;
let puppeteer = require('puppeteer');
const proxyChain = require('proxy-chain');

const options = {
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-infobars', '--window-position=0,0', '--ignore-certificate-errors', '--ignore-certificate-errors-spki-list', `--user-agent=${config.ua}`],
headless: true,
ignoreHTTPSErrors: true,
};

let proxyUri;
if (config.proxyUri && typeof config.proxyUri === 'string') {
proxyUri = config.proxyUri;
} else if (config.proxy && config.proxy.protocol && config.proxy.host && config.proxy.port) {
proxyUri = `${config.proxy.protocol}://${config.proxy.host}:${config.proxy.port}`;
}

/**
* @param {Object} extraOptions
* @param {boolean} extraOptions.stealth - Use puppeteer-extra-plugin-stealth
Expand Down Expand Up @@ -41,6 +49,9 @@ module.exports = async (extraOptions = {}) => {
puppeteer.use(require('puppeteer-extra-plugin-stealth')());
}
let browser;
if (proxyUri) {
options.args.push(`--proxy-server=${await proxyChain.anonymizeProxy(proxyUri)}`);
}
if (config.puppeteerWSEndpoint) {
browser = await puppeteer.connect({
browserWSEndpoint: config.puppeteerWSEndpoint,
Expand Down
10 changes: 8 additions & 2 deletions lib/utils/request-wrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,17 @@ const requestWrapper = (url, options) => {
if (agent && new RegExp(config.proxy.url_regex).test(url)) {
let agentResult;
try {
agentResult = agent[(options.protocol || url.match(/(https?:)/)[1]).slice(0, -1)];
agentResult = agent[(options.protocol || url.match(/(^https?:)/)[1]).slice(0, -1)];
} catch (error) {
agentResult = null;
}
options.agent = agentResult;
try {
if (new URL(url).host !== new URL(config.proxyUri).host) {
options.agent = agentResult;
}
} catch (error) {
options.agent = agentResult;
}

if (config.proxy.auth) {
if (!options.headers) {
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
"parse-torrent": "9.1.5",
"pidusage": "3.0.0",
"plist": "3.0.6",
"proxy-chain": "2.0.7",
"puppeteer": "17.1.1",
"puppeteer-extra": "3.3.4",
"puppeteer-extra-plugin-stealth": "2.11.1",
Expand Down
26 changes: 26 additions & 0 deletions test/utils/puppeteer.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ afterEach(() => {
browser.close();
browser = null;
}
delete process.env.PROXY_URI;
delete process.env.PROXY_PROTOCOL;
delete process.env.PROXY_HOST;
delete process.env.PROXY_PORT;
delete process.env.PROXY_AUTH;

jest.resetModules();
});

Expand Down Expand Up @@ -73,4 +79,24 @@ describe('puppeteer', () => {
expect(webDriverTest).toBe('missing (passed)');
expect(chromeTest).toBe('present (passed)');
}, 10000);

it('puppeteer accept proxy uri', async () => {
process.env.PROXY_URI = 'http://user:pass@rsshub.proxy:2333';

puppeteer = require('../../lib/utils/puppeteer');
browser = await puppeteer();

expect(browser.process().spawnargs.some((arg) => /^--proxy-server=http:\/\/.*$/.test(arg))).toBe(true);
});

it('puppeteer accept proxy', async () => {
process.env.PROXY_PROTOCOL = 'http';
process.env.PROXY_HOST = 'rsshub.proxy';
process.env.PROXY_PORT = '2333';

puppeteer = require('../../lib/utils/puppeteer');
browser = await puppeteer();

expect(browser.process().spawnargs.some((arg) => /^--proxy-server=http:\/\/.*$/.test(arg))).toBe(true);
}, 10000);
});
9 changes: 8 additions & 1 deletion yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11475,6 +11475,13 @@ proxy-agent@^4.0.0:
proxy-from-env "^1.0.0"
socks-proxy-agent "^5.0.0"

proxy-chain@2.0.7:
version "2.0.7"
resolved "https://registry.yarnpkg.com/proxy-chain/-/proxy-chain-2.0.7.tgz#ce368e7783ddd204231b284874323af32270cbe7"
integrity sha512-JYoszbLmEWkjltnzpe7XImWqTLBs1mIwYtObznqm3DF8z1P33iLvZKHrv2Guh+FwRcmjxOiJUzjrjpOMGvEk2Q==
dependencies:
tslib "^2.3.1"

proxy-from-env@1.1.0, proxy-from-env@^1.0.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
Expand Down Expand Up @@ -13610,7 +13617,7 @@ tslib@^1.9.0, tslib@^1.9.3:
resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==

tslib@^2.0.1:
tslib@^2.0.1, tslib@^2.3.1:
version "2.4.0"
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.4.0.tgz#7cecaa7f073ce680a05847aa77be941098f36dc3"
integrity sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ==
Expand Down

0 comments on commit 9329a44

Please sign in to comment.