Skip to content

Commit

Permalink
Use WordList to lookup HTML void tag, avoid check void tag twice, i…
Browse files Browse the repository at this point in the history
…ssue #957.
  • Loading branch information
zufuliu committed Jan 9, 2025
1 parent 1396609 commit bbe02d6
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 78 deletions.
48 changes: 19 additions & 29 deletions scintilla/lexers/LexHTML.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ namespace {
//KeywordIndex++Autogenerated -- start of section automatically generated
enum {
KeywordIndex_Tag = 0,
KeywordIndex_JavaScript = 1,
KeywordIndex_VBScript = 2,
KeywordIndex_SGML = 3,
KeywordIndex_Attribute = 4,
KeywordIndex_VoidTag = 1,
KeywordIndex_JavaScript = 2,
KeywordIndex_VBScript = 3,
KeywordIndex_SGML = 4,
KeywordIndex_Attribute = 5,
MaxKeywordSize = 32,
};
//KeywordIndex--Autogenerated -- end of section automatically generated
Expand Down Expand Up @@ -185,50 +186,39 @@ bool isHTMLCustomElement(const char *tag, size_t length, bool dashColon) noexcep
}

int classifyTagHTML(Sci_PositionU end, LexerWordList keywordLists, LexAccessor &styler, bool &tagDontFold, bool isXml, bool allowScripts) {
char withSpace[126 + 2];
withSpace[0] = ' ';
withSpace[1] = '\0';
char tag[127 + 1];
// Copy after the '<' and stop before space
Sci_PositionU i = 1;
Sci_PositionU length = 0;
bool dashColon = false;
const Sci_PositionU start = styler.GetStartSegment();
for (Sci_PositionU cPos = start; cPos < end && i < sizeof(withSpace) - 2; cPos++) {
for (Sci_PositionU cPos = start; cPos < end && length < sizeof(tag) - 1; cPos++) {
const char ch = styler[cPos];
if (static_cast<unsigned char>(ch) <= ' ') {
break;
}
if ((ch != '<') && (ch != '/')) {
withSpace[i++] = isXml ? ch : MakeLowerCase(ch);
tag[length++] = isXml ? ch : MakeLowerCase(ch);
if (ch == ':' || ch == '-') {
dashColon = true;
}
}
}

//The following is only a quick hack, to see if this whole thing would work
//we first need the tagname with a trailing space...
withSpace[i] = ' ';
withSpace[i + 1] = '\0';

// if the current language is XML, I can fold any tag
// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
//...to find it in the list of no-container-tags
tagDontFold = (!isXml) && IsHtmlVoidTag(withSpace);

//now we can remove the trailing space
withSpace[i] = '\0';
const char * const tag = withSpace + 1;

// No keywords -> all are known
tag[length] = '\0';
int chAttr = SCE_H_TAGUNKNOWN;
bool customElement = false;
if (tag[0] == '!') {
chAttr = SCE_H_SGML_DEFAULT;
} else if (isXml || keywordLists[KeywordIndex_Tag].InList(tag)) {
chAttr = SCE_H_TAG;
} else if (!isXml && isHTMLCustomElement(tag, i - 1, dashColon)) {
customElement = true;
} else if (isXml) {
chAttr = SCE_H_TAG;
} else {
tagDontFold = keywordLists[KeywordIndex_VoidTag].InList(tag);
if (tagDontFold || keywordLists[KeywordIndex_Tag].InList(tag)) {
chAttr = SCE_H_TAG;
} else if (isHTMLCustomElement(tag, length, dashColon)) {
customElement = true;
chAttr = SCE_H_TAG;
}
}
if (chAttr != SCE_H_TAGUNKNOWN) {
styler.ColorTo(end, chAttr);
Expand Down
29 changes: 11 additions & 18 deletions scintilla/lexers/LexPHP.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ enum {
KeywordIndex_PredefinedVariable = 4,
KeywordIndex_MagicConstant = 5,
KeywordIndex_MagicMethod = 6,
KeywordIndex_JavaScript = 10,
KeywordIndex_VoidTag = 10,
KeywordIndex_JavaScript = 11,
MaxKeywordSize = 40,
};
//KeywordIndex--Autogenerated -- end of section automatically generated
Expand Down Expand Up @@ -206,7 +207,7 @@ struct PHPLexer {
return sc.Match(hereDocId.c_str()) && !IsIdentifierCharEx(sc.GetRelative(hereDocId.length()));
}

void ClassifyHtmlTag();
void ClassifyHtmlTag(LexerWordList keywordLists);
bool HandleBlockEnd(HtmlTextBlock block);

void HandlePHPTag();
Expand All @@ -219,29 +220,21 @@ struct PHPLexer {
bool ClassifyCssWord();
};

void PHPLexer::ClassifyHtmlTag() {
void PHPLexer::ClassifyHtmlTag(LexerWordList keywordLists) {
if (sc.state == SCE_H_OTHER) {
sc.SetState((tagType == HtmlTagType::Question) ? SCE_H_QUESTION : SCE_H_TAG);
} else if (tagType == HtmlTagType::None) {
char s[16]{};
sc.GetCurrentLowered(s, sizeof(s) - 1);
char *p = s + 1;
char s[16];
sc.GetCurrentLowered(s, sizeof(s));
const char * const p = s + 1;
if (StrEqual(p, "script")) {
tagType = HtmlTagType::Script;
} else if (StrEqual(p, "style")) {
tagType = HtmlTagType::Style;
} else {
tagType = HtmlTagType::Normal;
const size_t length = sc.LengthCurrent();
if (length <= maxHtmlVoidTagLen + 2) {
s[length] = ' ';
if (*p != '/') {
--p;
}
*p = ' ';
if (IsHtmlVoidTag(p)) {
tagType = HtmlTagType::Void;
}
if (keywordLists[KeywordIndex_VoidTag].InList(p)) {
tagType = HtmlTagType::Void;
}
}
}
Expand Down Expand Up @@ -1001,7 +994,7 @@ void ColourisePHPDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initSty
case SCE_H_QUESTION:
if (sc.ch == '>' || sc.Match('/', '>') || IsASpace(sc.ch)
|| (lexer.tagType == HtmlTagType::Question && sc.Match('?', '>'))) {
lexer.ClassifyHtmlTag();
lexer.ClassifyHtmlTag(keywordLists);
}
break;

Expand Down Expand Up @@ -1052,7 +1045,7 @@ void ColourisePHPDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initSty

case SCE_H_OTHER:
if (sc.ch == '>' || sc.Match('/', '>') || (lexer.tagType == HtmlTagType::Question && sc.Match('?', '>'))) {
lexer.ClassifyHtmlTag();
lexer.ClassifyHtmlTag(keywordLists);
break;
}
if (sc.ch == '<') {
Expand Down
10 changes: 0 additions & 10 deletions scintilla/lexlib/DocUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,6 @@

namespace Lexilla {

constexpr size_t maxHtmlVoidTagLen = CStrLen("basefont");

inline bool IsHtmlVoidTag(const char *tag) noexcept {
return nullptr != strstr(
// void elements
" area base basefont br col command embed frame hr img input isindex keygen link meta param source track wbr "
// end tag can be omitted
"p ", tag);
}

// based on CommonMark Spec 6.6 Raw HTML
constexpr bool IsHtmlTagStart(int ch) noexcept {
return IsAlpha(ch);
Expand Down
6 changes: 3 additions & 3 deletions src/EditAutoC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -708,8 +708,8 @@ enum {
GroovyKeywordIndex_Annotation = 7,
GroovyKeywordIndex_GroovyDoc = 9,
HTMLKeywordIndex_Tag = 0,
HTMLKeywordIndex_Attribute = 4,
HTMLKeywordIndex_Value = 5,
HTMLKeywordIndex_Attribute = 5,
HTMLKeywordIndex_Value = 6,
HaxeKeywordIndex_Preprocessor = 1,
HaxeKeywordIndex_CommentTag = 8,
InnoKeywordIndex_Directive = 4,
Expand All @@ -723,7 +723,7 @@ enum {
KotlinKeywordIndex_KDoc = 8,
NSISKeywordIndex_PredefinedVariable = 5,
PHPKeywordIndex_PredefinedVariable = 4,
PHPKeywordIndex_Phpdoc = 11,
PHPKeywordIndex_Phpdoc = 12,
PerlKeywordIndex_Variable = 2,
PowerShellKeywordIndex_PredefinedVariable = 4,
PythonKeywordIndex_Decorator = 7,
Expand Down
14 changes: 9 additions & 5 deletions src/EditLexers/stlPHP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,16 +387,19 @@ static KEYWORDLIST Keywords_PHP = {{
"param_count params postname protocol_version queryString reconnect recurrences replacement report_mode "
"server_info server_version sqlstate start stream thread_id type warning_count "

, // 10 JavaScript
, // 10 void tag
" area base basefont br col command embed frame hr img input isindex keygen link meta p param source track wbr "

, // 11 JavaScript
"Infinity NaN arguments async await break case catch class const continue debugger default delete do else export extends "
"false finally for function get globalThis if import in instanceof let new null of return set static super switch "
"this throw true try typeof undefined var void while with yield "

, // 11 phpdoc
, // 12 phpdoc
"api author category copyright deprecated example filesource global ignore internal license link method "
"package param property return see since source subpackage throws todo used uses var version "

, nullptr, nullptr, nullptr
, nullptr, nullptr
//--Autogenerated -- end of section automatically generated

, // 15 Code Snippet
Expand Down Expand Up @@ -455,8 +458,9 @@ EDITLEXER lexPHP = {
| KeywordAttr64(7, KeywordAttr_NoLexer) // constant
| KeywordAttr64(8, KeywordAttr_NoLexer) // function
| KeywordAttr64(9, KeywordAttr_NoLexer) // misc
| KeywordAttr64(10, KeywordAttr_PreSorted | KeywordAttr_NoAutoComp) // JavaScript
| KeywordAttr64(11, KeywordAttr_NoLexer | KeywordAttr_NoAutoComp) // phpdoc
| KeywordAttr64(10, KeywordAttr_PreSorted | KeywordAttr_NoAutoComp) // void tag
| KeywordAttr64(11, KeywordAttr_PreSorted | KeywordAttr_NoAutoComp) // JavaScript
| KeywordAttr64(12, KeywordAttr_NoLexer | KeywordAttr_NoAutoComp) // phpdoc
, 0,
SCE_PHP_ESCAPECHAR, SCE_PHP_NOWDOC_ID,
//Settings--Autogenerated -- end of section automatically generated
Expand Down
21 changes: 12 additions & 9 deletions src/EditLexers/stlXML.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,27 @@ static KEYWORDLIST Keywords_XML = {{
//++Autogenerated -- start of section automatically generated
nullptr

, // 1 JavaScript
, // 1 void tag
nullptr

, // 2 VBScript
, // 2 JavaScript
nullptr

, // 3 SGML
, // 3 VBScript
nullptr

, // 4 SGML
"ANY ATTLIST CDATA DOCTYPE ELEMENT EMPTY ENTITIES ENTITY FIXED ID IDREF IDREFS IGNORE IMPLIED INCLUDE "
"NDATA NMTOKEN NMTOKENS NOTATION PCDATA PUBLIC REQUIRED SGML SYSTEM doctype "

, // 4 attribute
, // 5 attribute
"alternate charset encoding href id media name standalone title type value version "
"xml xml-stylesheet xml:lang xmlns xmlns:xsi xsi:noNamespaceSchemaLocation xsi:schemaLocation "

, // 5 value
, // 6 value
"ISO-8859-1 UTF-8 false no off on true utf-8 yes "

, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr
, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr
//--Autogenerated -- end of section automatically generated
}};

Expand Down Expand Up @@ -52,9 +55,9 @@ EDITLEXER lexXML = {
0,
0, 0,
0, 0
, KeywordAttr32(3, KeywordAttr_PreSorted) // SGML
| KeywordAttr32(4, KeywordAttr_NoLexer) // attribute
| KeywordAttr32(5, KeywordAttr_NoLexer) // value
, KeywordAttr32(4, KeywordAttr_PreSorted) // SGML
| KeywordAttr32(5, KeywordAttr_NoLexer) // attribute
| KeywordAttr32(6, KeywordAttr_NoLexer) // value
, 0,
0, 0,
//Settings--Autogenerated -- end of section automatically generated
Expand Down
7 changes: 3 additions & 4 deletions src/Helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,9 @@ constexpr bool IsSchemeNameChar(int ch) noexcept {

inline bool IsHtmlVoidTag(const char *tag) noexcept {
return nullptr != StrStrIA(
// void elements
" area base basefont br col command embed frame hr img input isindex keygen link meta param source track wbr "
// end tag can be omitted
"p ", tag);
// void elements, should keep sync with EditLexers/stlHTML.cpp
" area base basefont br col command embed frame hr img input isindex keygen link meta p param source track wbr "
, tag);
}

constexpr int ToUpperA(int ch) noexcept {
Expand Down
9 changes: 9 additions & 0 deletions tools/KeywordCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
ColorNameList = set()
CSharpKeywordMap = {}
GroovyKeyword = []
HtmlVoidTagList = """area base basefont br col command embed frame hr
img input isindex keygen link meta param source track wbr
p""".split()
JavaKeywordMap = {}
JavaScriptKeywordMap = {}
SGMLKeyword = []
Expand Down Expand Up @@ -97,6 +100,9 @@ def BuildKeywordContent(rid, lexer, keywordList, keywordCount=16):
length = len(lines) + sum(len(line) for line in lines)
if length >= 0xffff:
print(rid, comment, 'string exceeds 64 KiB:', length)
if attr & KeywordAttr.PrefixSpace:
attr &= ~KeywordAttr.PrefixSpace
lines[0] = ' ' + lines[0]
output.extend('"' + line + ' "' for line in lines)
else:
output.append('nullptr')
Expand Down Expand Up @@ -1299,6 +1305,7 @@ def parse_html_api_file(path):
values = set(values) - set(keywords) - set(attributes)
return [
('tag', keywords, KeywordAttr.Special),
('void tag', HtmlVoidTagList, KeywordAttr.NoAutoComp | KeywordAttr.PrefixSpace),
('JavaScript', JavaScriptKeywordMap['keywords'], KeywordAttr.NoAutoComp),
('VBScript', VBScriptKeyword, KeywordAttr.MakeLower | KeywordAttr.NoAutoComp),
('SGML', SGMLKeyword, KeywordAttr.Default),
Expand Down Expand Up @@ -1911,6 +1918,7 @@ def parse_php_api_file(path):
('constant', keywordMap['constant'], KeywordAttr.NoLexer),
('function', keywordMap['function'], KeywordAttr.NoLexer),
('misc', keywordMap['misc'], KeywordAttr.NoLexer),
('void tag', HtmlVoidTagList, KeywordAttr.NoAutoComp | KeywordAttr.PrefixSpace),
('JavaScript', JavaScriptKeywordMap['keywords'], KeywordAttr.NoAutoComp),
('phpdoc', keywordMap['phpdoc'], KeywordAttr.NoLexer | KeywordAttr.NoAutoComp | KeywordAttr.Special),
]
Expand Down Expand Up @@ -2762,6 +2770,7 @@ def parse_xml_api_file(path):
SGMLKeyword.extend(keywords)
return [
('tag', [], KeywordAttr.Default),
('void tag', [], KeywordAttr.Default),
('JavaScript', [], KeywordAttr.Default),
('VBScript', [], KeywordAttr.Default),
('SGML', keywords, KeywordAttr.Default),
Expand Down
1 change: 1 addition & 0 deletions tools/LexerConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class KeywordAttr(IntFlag):
NoLexer = 4 # not used by lexer, listed for auto-completion.
NoAutoComp = 8 # don't add to default auto-completion list.
Special = 256 # used by context based auto-completion.
PrefixSpace = 512 # prefix first item with extra space.

TabSettings_Default = {
'tab_width': 4,
Expand Down

0 comments on commit bbe02d6

Please sign in to comment.