From 6d8a769e45e08b8a43cc11b8b282afacd0cad834 Mon Sep 17 00:00:00 2001 From: lyngai Date: Mon, 6 Jun 2022 08:49:47 +0000 Subject: [PATCH] feat(emphasis): add chinese punctuations as word's boundary --- src/core/hooks/Emphasis.js | 6 +++--- src/utils/regexp.js | 13 +++++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/core/hooks/Emphasis.js b/src/core/hooks/Emphasis.js index 85967177a..4805bd57b 100644 --- a/src/core/hooks/Emphasis.js +++ b/src/core/hooks/Emphasis.js @@ -18,7 +18,7 @@ import { compileRegExp, DO_NOT_STARTS_AND_END_WITH_SPACES_MULTILINE_ALLOW_EMPTY, ALLOW_WHITESPACE_MULTILINE, - UNDERSCORE_EMPHASIS_BORDER, + UNDERSCORE_EMPHASIS_BOUNDARY, } from '@/utils/regexp'; export default class Emphasis extends SyntaxBase { @@ -97,9 +97,9 @@ export default class Emphasis extends SyntaxBase { // UNDERSCORE_EMPHASIS_BORDER:允许除下划线以外的「标点符号」和空格出现,使用[^\w\S \t]或[\W\s]会有性能问题 const underscore = { - begin: `(^|${UNDERSCORE_EMPHASIS_BORDER})(_+)`, // ?, ? + begin: `(^|${UNDERSCORE_EMPHASIS_BOUNDARY})(_+)`, // ?, ? content: `(${REGEX})`, // ? - end: `\\2(?=${UNDERSCORE_EMPHASIS_BORDER}|$)`, + end: `\\2(?=${UNDERSCORE_EMPHASIS_BOUNDARY}|$)`, }; asterisk.reg = compileRegExp(asterisk, 'g'); diff --git a/src/utils/regexp.js b/src/utils/regexp.js index 4cb27790f..dbc8593cd 100644 --- a/src/utils/regexp.js +++ b/src/utils/regexp.js @@ -43,6 +43,7 @@ export const NOT_ALL_WHITE_SPACES_INLINE = '(?:[^\\n]*?\\S[^\\n]*?)'; export const NORMAL_INDENT = '[ ]{0, 3}|\\t'; export const NO_BACKSLASH_BEFORE_CAPTURE = '[^\\\\]'; + // https://spec.commonmark.org/0.29/#ascii-punctuation-character // !, ", #, $, %, &, ', (, ), *, +, ,, -, ., / (U+0021–2F), // :, ;, <, =, >, ?, @ (U+003A–0040), @@ -50,9 +51,17 @@ export const NO_BACKSLASH_BEFORE_CAPTURE = '[^\\\\]'; // {, |, }, or ~ (U+007B–007E). export const PUNCTUATION = '[\\u0021-\\u002F\\u003a-\\u0040\\u005b-\\u0060\\u007b-\\u007e]'; +// extra punctuations +export const CHINESE_PUNCTUATION = '[!“”¥‘’(),。—:;《》?【】「」·~|]'; + // 下划线强调语法允许的边界符号 -export const UNDERSCORE_EMPHASIS_BORDER = - '[\\u0021-\\u002F\\u003a-\\u0040\\u005b\\u005d\\u005e\\u0060\\u007b-\\u007e \\t\\n]'; +export const UNDERSCORE_EMPHASIS_BOUNDARY = + '[' + + '\\u0021-\\u002F\\u003a-\\u0040\\u005b\\u005d\\u005e\\u0060\\u007b-\\u007e' + // punctuations defined in commonmark + ' ' + + '\\t\\n' + + '!“”¥‘’(),。—:;《》?【】「」·~|' + // chinese punctuations + ']'; // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) export const EMAIL_INLINE = new RegExp(