Skip to content

Commit

Permalink
Give newline token symbolic name; print raw tokens c-escaped.
Browse files Browse the repository at this point in the history
The newline token can mean `'\n'`, but also `'\r\n'`, so printing the
token symbolic name as simply `<<\n>>` can be confusing in that context; instead,
print it as symbolic name `<<newline>>`.

Also: the actual raw text of the token is printed in a c-escaped way now,
which makes it easier to human-read if special characters are involved
(and possibly easier to process when grepping through results).
  • Loading branch information
hzeller committed Oct 3, 2024
1 parent 56d75de commit 30fbee8
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 9 deletions.
1 change: 1 addition & 0 deletions common/text/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ cc_library(
"//common/util:iterator-range",
"//common/util:logging",
"//common/util:range",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:string_view",
],
)
Expand Down
6 changes: 4 additions & 2 deletions common/text/token_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <string>
#include <vector>

#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#include "common/strings/rebase.h"
#include "common/text/constants.h"
Expand Down Expand Up @@ -53,14 +54,15 @@ std::ostream &TokenInfo::ToStream(std::ostream &output_stream,
output_stream << "(#";
context.token_enum_translator(output_stream, token_enum_);
output_stream << " @" << left(context.base) << '-' << right(context.base)
<< ": \"" << text_ << "\")";
<< ": \"" << absl::CEscape(text_) << "\")";
const auto dist = std::distance(context.base.end(), text_.end());
CHECK(IsSubRange(text_, context.base)) << "text.end() is off by " << dist;
return output_stream;
}

std::ostream &TokenInfo::ToStream(std::ostream &output_stream) const {
return output_stream << "(#" << token_enum_ << ": \"" << text_ << "\")";
return output_stream << "(#" << token_enum_ << ": \"" << absl::CEscape(text_)
<< "\")";
}

std::string TokenInfo::ToString(const Context &context) const {
Expand Down
6 changes: 3 additions & 3 deletions verilog/parser/verilog.y
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ is not locally defined, so the grammar here uses only generic identifiers.
%token TK_COMMENT_BLOCK "/&lowast;comment&lowast;/"
%token TK_EOL_COMMENT "// end of line comment"
%token TK_SPACE "<<space>>" /* includes tabs */
%token TK_NEWLINE "<<\\n>>"
%token TK_NEWLINE "<<newline>>"
%token TK_LINE_CONT "<<\\line-cont>>"
%token TK_ATTRIBUTE "(*attribute*)"

Expand Down Expand Up @@ -3518,12 +3518,12 @@ instantiation_base
{ $$ = MakeInstantiationBase($1, $2); }
/*
* TODO: support mixed anonymous declarations
*
*
* This production rule was commented out because it caused
* verible-verilog-syntax to crash for some inputs. It may be necessary to
* re-enable it in the future to support declarations that mix anonymous and
* named instances.
*
*
* For more details, see https://github.com/chipsalliance/verible/issues/2181
*/
// | reference call_base ',' gate_instance_or_register_variable_list
Expand Down
6 changes: 2 additions & 4 deletions verilog/tools/syntax/verilog_syntax_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -307,11 +307,9 @@ All lexed tokens:
(#"<<space>>" @6-7: " ")
(#SymbolIdentifier @7-9: "mm")
(#';' @9-10: ";")
(#"<<\\\\n>>" @10-11: "
")
(#"<<newline>>" @10-11: "\\n")
(#"endmodule" @11-20: "endmodule")
(#"<<\\\\n>>" @20-21: "
")
(#"<<newline>>" @20-21: "\\n")
(#"end of file" @21-21: "")
EOF

Expand Down

0 comments on commit 30fbee8

Please sign in to comment.