Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(parameter): use re2js instead #13072

Merged
merged 2 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/middleware/anti-hotlink.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ const validateTemplate = (template) => {
module.exports = async (ctx, next) => {
await next();

let image_hotlink_template = undefined;
let multimedia_hotlink_template = undefined;
let image_hotlink_template;
let multimedia_hotlink_template;
const shouldWrapInIframe = ctx.query.wrap_multimedia_in_iframe === '1';

// Read params if enabled
Expand Down
94 changes: 47 additions & 47 deletions lib/middleware/parameter.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ const cheerio = require('cheerio');
const { simplecc } = require('simplecc-wasm');
const got = require('@/utils/got');
const config = require('@/config').value;
const { RE2 } = require('re2-wasm');
const { resolve } = require('path');
const { RE2JS } = require('re2js');

let mercury_parser;

Expand All @@ -28,17 +27,6 @@ module.exports = async (ctx, next) => {
await next();

if (!ctx.state.data && !ctx._matchedRoute) {
// Given that the official demo has a cache TTL of 2h, a "wrong path" page will be cached by CloudFlare for
// 200h (8.33d).
// What makes it worse is that the documentation contains status badges to detect the availability of routes,
// but the documentation is updated more timely than the official demo, so the every example path of every
// new route will probably have a "wrong path" page cached for at least 200h soon after accepted. That is to
// say, the example paths of a new route will probably be unavailable on the public demo in the first 200h
// after accepted.
// As a conclusion, the next 3 lines has been commented out. (exactly the same behavior as any internal error)
// ctx.set({
// 'Cache-Control': `public, max-age=${config.cache.routeExpire * 100}`,
// });
throw Error('wrong path');
}

Expand Down Expand Up @@ -160,41 +148,37 @@ module.exports = async (ctx, next) => {
}

// filter
const engine = config.feature.filter_regex_engine;
const makeRegex = (string) => {
if (!string) {
return null;
}
// default: case_senstivie = true
const engine = config.feature.filter_regex_engine;
if (ctx.query.filter_case_sensitive === 'false') {
switch (engine) {
case 'regexp':
return new RegExp(string, 'i');
case 're2':
return new RE2(string, 'iu');
default:
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
}
} else {
switch (engine) {
case 'regexp':
return new RegExp(string);
case 're2':
return new RE2(string, 'u');
default:
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
}
const insensitive = ctx.query.filter_case_sensitive === 'false';
switch (engine) {
case 'regexp':
return new RegExp(string, insensitive ? 'i' : '');
case 're2':
return RE2JS.compile(string, insensitive ? RE2JS.CASE_INSENSITIVE : 0);
default:
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
}
};

if (ctx.query.filter) {
// workaround for @vercel/nft removing wasm file
resolve('node_modules/re2-wasm/build/wasm/re2.wasm');
const regex = makeRegex(ctx.query.filter);

ctx.state.data.item = ctx.state.data.item.filter((item) => {
const title = item.title || '';
const description = item.description || title;
const author = item.author || '';
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
const category = item.category ? categoryArray : [];
const isFilter =
title.match(makeRegex(ctx.query.filter)) || description.match(makeRegex(ctx.query.filter)) || author.match(makeRegex(ctx.query.filter)) || category.some((c) => c.match(makeRegex(ctx.query.filter)));
engine === 're2'
? regex.matcher(title).find() || regex.matcher(description).find() || regex.matcher(author).find() || category.some((c) => regex.matcher(c).find())
: title.match(regex) || description.match(regex) || author.match(regex) || category.some((c) => c.match(regex));

return isFilter;
});
}
Expand All @@ -205,12 +189,20 @@ module.exports = async (ctx, next) => {
const title = item.title || '';
const description = item.description || title;
const author = item.author || '';
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
const category = item.category ? categoryArray : [];
let isFilter = true;
ctx.query.filter_title && (isFilter = title.match(makeRegex(ctx.query.filter_title)));
ctx.query.filter_description && (isFilter = isFilter && description.match(makeRegex(ctx.query.filter_description)));
ctx.query.filter_author && (isFilter = isFilter && author.match(makeRegex(ctx.query.filter_author)));
ctx.query.filter_category && (isFilter = isFilter && category.some((c) => c.match(makeRegex(ctx.query.filter_category))));

const titleRegex = makeRegex(ctx.query.filter_title);
const descriptionRegex = makeRegex(ctx.query.filter_description);
const authorRegex = makeRegex(ctx.query.filter_author);
const categoryRegex = makeRegex(ctx.query.filter_category);

ctx.query.filter_title && (isFilter = engine === 're2' ? titleRegex.matcher(title).find() : title.match(titleRegex));
ctx.query.filter_description && (isFilter = isFilter && (engine === 're2' ? descriptionRegex.matcher(description).find() : description.match(descriptionRegex)));
ctx.query.filter_author && (isFilter = isFilter && (engine === 're2' ? authorRegex.matcher(author).find() : author.match(authorRegex)));
ctx.query.filter_category && (isFilter = isFilter && category.some((c) => (engine === 're2' ? categoryRegex.matcher(c).find() : c.match(categoryRegex))));

return isFilter;
});
}
Expand All @@ -224,12 +216,20 @@ module.exports = async (ctx, next) => {
const title = item.title;
const description = item.description || title;
const author = item.author || '';
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
const category = item.category ? categoryArray : [];
let isFilter = true;
ctx.query.filterout_title && (isFilter = !title.match(makeRegex(ctx.query.filterout_title)));
ctx.query.filterout_description && (isFilter = isFilter && !description.match(makeRegex(ctx.query.filterout_description)));
ctx.query.filterout_author && (isFilter = isFilter && !author.match(makeRegex(ctx.query.filterout_author)));
ctx.query.filterout_category && (isFilter = isFilter && !category.some((c) => c.match(makeRegex(ctx.query.filterout_category))));

const titleRegex = makeRegex(ctx.query.filterout_title);
const descriptionRegex = makeRegex(ctx.query.filterout_description);
const authorRegex = makeRegex(ctx.query.filterout_author);
const categoryRegex = makeRegex(ctx.query.filterout_category);

ctx.query.filterout_title && (isFilter = engine === 're2' ? !titleRegex.matcher(title).find() : !title.match(titleRegex));
ctx.query.filterout_description && (isFilter = isFilter && (engine === 're2' ? !descriptionRegex.matcher(description).find() : !description.match(descriptionRegex)));
ctx.query.filterout_author && (isFilter = isFilter && (engine === 're2' ? !authorRegex.matcher(author).find() : !author.match(authorRegex)));
ctx.query.filterout_category && (isFilter = isFilter && !category.some((c) => (engine === 're2' ? categoryRegex.matcher(c).find() : c.match(categoryRegex))));

return isFilter;
});
}
Expand Down Expand Up @@ -308,7 +308,7 @@ module.exports = async (ctx, next) => {
if (item.description) {
text = item.description.replace(/<\/?[^>]+(>|$)/g, '');
}
if (text && text.length) {
if (text?.length) {
if (text.length > ctx.query.brief) {
item.description = `<p>${text.substring(0, ctx.query.brief)}…</p>`;
} else {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
"puppeteer-extra-plugin-user-preferences": "2.4.1",
"query-string": "7.1.3",
"rand-user-agent": "1.0.109",
"re2-wasm": "1.0.2",
"re2js": "0.3.2",
"require-all": "3.0.0",
"rfc4648": "1.5.2",
"rss-parser": "3.13.0",
Expand Down
11 changes: 5 additions & 6 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion test/middleware/filter-engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ describe('filter-engine', () => {

const response = await request.get('/test/1?filter=abc(%3F%3Ddef)');
expect(response.status).toBe(404);
expect(response.text).toMatch(/SyntaxError/);
expect(response.text).toMatch(/RE2JSSyntaxException/);
server.close();
});

Expand Down