From aeeb03a824e70bf6db5cc3447eda66f15e3acbb5 Mon Sep 17 00:00:00 2001 From: Andrew Dupont Date: Sun, 21 Jan 2024 10:33:01 -0800 Subject: [PATCH] =?UTF-8?q?Adopt=20the=20`support.storage`=20compromise=20?= =?UTF-8?q?found=20in=20legacy=20Tree-sitter=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …in which all _value types_ are scoped under `support.storage`. The goal is to avoid drawing an arbitrary distinction between, say, `int` and `SomeCustomCType` — those things are different, but not _so_ different that they belong in separate root scope namespaces. `storage.type` can be used for core language constructs — `var`, `class`, `function`, `struct`, `enum`, `namespace`, and so on. `support.storage.type` should be used for any _recognized_ value type — certainly core language types like `int`, but also other things if they make sense. `support.other.storage.type` can be used for user-defined types, or any other types that we don't recognize. (Using the `support.other` ordering here adheres to our new convention of reserving all of `support.other` for user-defined things.) The point of this compromise is that both the `syntax--storage` and `syntax--support` class names are present so that many themes will still highlight these tokens as though they were in the root `storage` namespace. This commit changes the C/C++ and TypeScript `highlights.scm` files. The results appear to be satisfactory to my eye after testing on two of the builtin themes. I’ve updated the taxonomy document to reflect this new proposal: https://gist.github.com/savetheclocktower/c9607b97477d4817911e4f2f8db89679#file-scope-taxonomy-md --- .../grammars/tree-sitter-c/highlights.scm | 37 ++++++---- .../grammars/tree-sitter-cpp/highlights.scm | 38 ++++++---- .../grammars/common/highlights.scm | 70 +++++++++++-------- 3 files changed, 90 insertions(+), 55 deletions(-) diff --git a/packages/language-c/grammars/tree-sitter-c/highlights.scm b/packages/language-c/grammars/tree-sitter-c/highlights.scm index 0fedcb3cfc..1d5012c401 100644 --- a/packages/language-c/grammars/tree-sitter-c/highlights.scm +++ b/packages/language-c/grammars/tree-sitter-c/highlights.scm @@ -47,30 +47,41 @@ (type_identifier) @_IGNORE_ (#set! capture.final true)) -(primitive_type) @support.type.builtin.c -(type_identifier) @support.type.other.c +(primitive_type) @support.storage.type.builtin.c +(type_identifier) @support.other.storage.type.c ; These types are all reserved words; if we see an identifier with this name, ; it must be a type. -((identifier) @support.type.builtin.c - (#match? @support.type.builtin.c "^(char|int|float|double|long)$")) +((identifier) @support.storage.type.builtin.c + (#match? @support.storage.type.builtin.c "^(char|int|float|double|long)$")) ; Assume any identifier that ends in `_t` is a type. This convention is not ; always followed, but it's a very strong indicator when it's present. -((identifier) @support.type.other.c - (#match? @support.type.other.c "_t$")) +((identifier) @support.other.storage.type.c + (#match? @support.other.storage.type.c "_t$")) +; These refer to language constructs and remain in the `storage` namespace. [ "enum" - "long" - "short" - "signed" "struct" "typedef" "union" - "unsigned" ] @storage.type.c +; These refer to value types and go under `support`. +[ + "long" + "short" +] @support.storage.type.builtin.c + +; These act as modifiers to value types and also go under `support`. +[ + "signed" + "unsigned" +] @support.storage.modifier.builtin.c + +; These act as general language modifiers and remain in the `storage` +; namespace. [ "const" "extern" @@ -79,10 +90,10 @@ "restrict" "static" "volatile" -] @storage.modifier.c +] @storage.modifier._TYPE_.c -((primitive_type) @support.type.stdint.c - (#match? @support.type.stdint.c "^(int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|int_least8_t|int_least16_t|int_least32_t|int_least64_t|uint_least8_t|uint_least16_t|uint_least32_t|uint_least64_t|int_fast8_t|int_fast16_t|int_fast32_t|int_fast64_t|uint_fast8_t|uint_fast16_t|uint_fast32_t|uint_fast64_t|intptr_t|uintptr_t|intmax_t|intmax_t|uintmax_t|uintmax_t)$")) +((primitive_type) @support.storage.type.stdint.c + (#match? @support.storage.type.stdint.c "^(int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|int_least8_t|int_least16_t|int_least32_t|int_least64_t|uint_least8_t|uint_least16_t|uint_least32_t|uint_least64_t|int_fast8_t|int_fast16_t|int_fast32_t|int_fast64_t|uint_fast8_t|uint_fast16_t|uint_fast32_t|uint_fast64_t|intptr_t|uintptr_t|intmax_t|intmax_t|uintmax_t|uintmax_t)$")) (enum_specifier name: (type_identifier) @variable.other.declaration.type.c) diff --git a/packages/language-c/grammars/tree-sitter-cpp/highlights.scm b/packages/language-c/grammars/tree-sitter-cpp/highlights.scm index 8ec2abb7bf..b1243b46ce 100644 --- a/packages/language-c/grammars/tree-sitter-cpp/highlights.scm +++ b/packages/language-c/grammars/tree-sitter-cpp/highlights.scm @@ -73,28 +73,38 @@ ; These types are all reserved words; if we see an identifier with this name, ; it must be a type. -((identifier) @support.type.builtin.cpp - (#match? @support.type.builtin.cpp "^(char|int|float|double|long)$")) +((identifier) @support.storage.type.builtin.cpp + (#match? @support.storage.type.builtin.cpp "^(char|int|float|double|long)$")) ; Assume any identifier that ends in `_t` is a type. This convention is not ; always followed, but it's a very strong indicator when it's present. -((identifier) @support.type.other.cpp - (#match? @support.type.other.cpp "_t$")) +((identifier) @support.other.storage.type.cpp + (#match? @support.other.storage.type.cpp "_t$")) +; These refer to language constructs and remain in the `storage` namespace. [ "enum" - "long" - "short" - "signed" "struct" "typedef" "union" - "unsigned" - "template" ] @storage.type.cpp +; These refer to value types and go under `support`. +[ + "long" + "short" +] @support.storage.type.builtin.cpp + +; These act as modifiers to value types and also go under `support`. +[ + "signed" + "unsigned" +] @support.storage.modifier.builtin.cpp + +; These act as general language modifiers and remain in the `storage` +; namespace. [ "const" "extern" @@ -114,15 +124,15 @@ "override" "final" "noexcept" -] @storage.modifier.cpp + + "typename" +] @storage.modifier._TYPE_.cpp ( - (primitive_type) @support.type.stdint.cpp - (#match? @support.type.stdint.cpp "^(int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|int_least8_t|int_least16_t|int_least32_t|int_least64_t|uint_least8_t|uint_least16_t|uint_least32_t|uint_least64_t|int_fast8_t|int_fast16_t|int_fast32_t|int_fast64_t|uint_fast8_t|uint_fast16_t|uint_fast32_t|uint_fast64_t|intptr_t|uintptr_t|intmax_t|intmax_t|uintmax_t|uintmax_t)$") + (primitive_type) @support.storage.type.stdint.cpp + (#match? @support.storage.type.stdint.cpp "^(int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|int_least8_t|int_least16_t|int_least32_t|int_least64_t|uint_least8_t|uint_least16_t|uint_least32_t|uint_least64_t|int_fast8_t|int_fast16_t|int_fast32_t|int_fast64_t|uint_fast8_t|uint_fast16_t|uint_fast32_t|uint_fast64_t|intptr_t|uintptr_t|intmax_t|intmax_t|uintmax_t|uintmax_t)$") ) -"typename" @storage.modifier.typename.cpp - ; FUNCTIONS ; ========= diff --git a/packages/language-typescript/grammars/common/highlights.scm b/packages/language-typescript/grammars/common/highlights.scm index 26cbdbdab2..36c54b9e11 100644 --- a/packages/language-typescript/grammars/common/highlights.scm +++ b/packages/language-typescript/grammars/common/highlights.scm @@ -78,9 +78,6 @@ (asserts "asserts" @keyword.type.asserts._LANG_) (asserts (identifier) @variable.other.type._LANG_) - -["var" "const" "let"] @storage.type._TYPE_._LANG_ - ; A simple variable declaration: ; The "foo" in `let foo = true` (variable_declarator @@ -303,11 +300,11 @@ ; TYPES ; ===== -["var" "let" "const"] @storage.modifier._TYPE_._LANG_ +; These go under `storage.type`/`storage.modifier` because they’re core +; language constructs. +["var" "let" "const" "class" "function"] @storage.type._TYPE_._LANG_ ["extends" "static" "async" "infer"] @storage.modifier._TYPE_._LANG_ -["class" "function"] @storage.type._TYPE_._LANG_ - (type_arguments "<" @punctuation.definition.parameters.begin.bracket.angle.js (#set! capture.final)) (type_arguments ">" @punctuation.definition.parameters.end.bracket.angle.js @@ -318,48 +315,68 @@ ; TODO: If I allow scopes like `storage.type.string._LANG_`, I will make a lot of ; text look like strings by accident. This really needs to be fixed in syntax ; themes. -(predefined_type _ @storage.type._LANG_ @support.type._LANG_) +; +; NOTE: To settle the long debate (in my head) about whether value types are +; `support.type` or `storage.type`, I’ve adopted the same compromised used +; by legacy Tree-sitter: value types are filed under `support.storage.type`. + +; These appear to be the primitives like `number`, `string`, `boolean`, `void`, +; et cetera. `null` and `undefined` get their own nodes. +(predefined_type _ @support.storage.type.predefined._LANG_) (type_alias_declaration name: (type_identifier) @variable.declaration.type._LANG_) -((literal_type [(null) (undefined)]) @storage.type._TEXT_._LANG_) -((literal_type [(null) (undefined)]) @support.type._TEXT_._LANG_ - (#set! capture.final true)) +((literal_type [(null) (undefined)]) @support.storage.type._TEXT_._LANG_ + (#set! capture.final)) ; TODO: Decide whether other literal types — strings, booleans, and whatnot — ; should be highlighted as they are in JS, or should be highlighted like other ; types in annotations. +; These are `storage.type` because they are core language constructs rather +; than value types. [ - "implements" "namespace" "enum" "interface" "module" "declare" +] @storage.type._TYPE_._LANG_ +"type" @storage.type._LANG_ + +; These are `storage.modifier` becase they act as adjectives and verbs for +; language constructs. +[ + "implements" "public" "private" "protected" "readonly" "satisfies" - "type" ] @storage.modifier._TYPE_._LANG_ (index_signature name: (identifier) @entity.other.attribute-name.type._LANG_) -((type_identifier) @storage.type._LANG_ - ; (#is? test.descendantOfType "type_annotation type_arguments satisfies_expression type_parameter") - ) +; The utility types documented at +; https://www.typescriptlang.org/docs/handbook/utility-types.html. +(generic_type + (type_identifier) @support.storage.type.builtin.utility._LANG_ + (#match? @support.storage.type.builtin.utility._LANG_ "^(Awaited|Partial|Required|Readonly|Record|Pick|Omit|Exclude|Extract|NonNullable|(?:Constructor)?Parameters|(?:Return|Instance|(?:Omit)?ThisParameter|This)Type|(?:Upper|Lower)case|Capitalize|Uncapitalize)$") + (#set! capture.final)) -; A capture can satisfy more than one of these criteria, so we need to guard -; against multiple matches. That's why we use `test.final` here, and why the -; two capture names are applied in separate captures — otherwise `test.final` -; would be applied after the first capture. -((type_identifier) @support.type._LANG_ - ; (#is? test.descendantOfType "type_annotation type_arguments satisfies_expression type_parameter") - (#set! capture.final true)) +; All core language builtin types. +((type_identifier) @support.storage.type.builtin._LANG_ +(#match? @support.storage.type.builtin._LANG_ "^(AggregateError|Array|ArrayBuffer|BigInt|BigInt64Array|BigUint64Array|DataView|Date|Error|EvalError|FinalizationRegistry|Float32Array|Float64Array|Function|ImageCapture|Int8Array|Int16Array|Int32Array|Map|Object|Promise|Proxy|RangeError|ReferenceError|RegExp|Set|Symbol|SyntaxError|TypeError|Uint8Array|Uint8ClampedArray|Uint16Array|Uint32Array|URIError|URL|WeakMap|WeakRef|WeakSet|XMLHttpRequest)$") + (#set! capture.final)) + +; TODO: We could add a special scope name to the entire suite of DOM types, but +; I don't have the strength for that right now. + +; +((type_identifier) @support.storage.other.type._LANG_ + ) ; SUPPORT ; ======= @@ -577,13 +594,9 @@ (property_identifier) @entity.other.attribute-name._LANG_) - ; FUNCTIONS ; ========= -(method_definition - name: (property_identifier) @entity.name.function.method._LANG_) - (call_expression function: (member_expression property: (property_identifier) @support.other.function.method._LANG_)) @@ -644,12 +657,15 @@ key: (property_identifier) @entity.name.function.method.definition._LANG_ value: [(function) (arrow_function)]) +; Function is `storage.type` because it's a core language construct. (function "function" @storage.type.function._LANG_) (function_declaration "function" @storage.type.function._LANG_) (generator_function "function" @storage.type.function._LANG_) (generator_function_declaration "function" @storage.type.function._LANG_) +; The `*` sigil acts as a modifier on a core language construct, hence +; `storage.modifier`. (generator_function "*" @storage.modifier.generator._LANG_) (generator_function_declaration "*" @storage.modifier.generator._LANG_) (method_definition "*" @storage.modifier.generator._LANG_) @@ -661,8 +677,6 @@ function: (identifier) @support.other.function._LANG_ (#set! capture.shy true)) - - ; Things that `LOOK_LIKE_CONSTANTS`. ([(property_identifier) (identifier)] @constant.other._LANG_ (#match? @constant.other._LANG_ "^[A-Z_][A-Z0-9_]*$")