From 13e0cd0c7f3524cfd49a43ab44e045f431d2c067 Mon Sep 17 00:00:00 2001
From: Henner Zeller
Date: Wed, 2 Oct 2024 19:27:25 -0700
Subject: [PATCH] If non-mmap: use stdio reading of files instead of
std::stream
std::fstream is much slower than the good ol' standard implementation.
In addition, there are no risks of throwing exceptions.
---
.github/workflows/verible-ci.yml | 4 +-
common/util/file_util.cc | 56 +++++++++++--------
.../checkers/token_stream_lint_rule.cc | 3 +-
.../checkers/token_stream_lint_rule_test.cc | 6 ++
verilog/tools/obfuscator/verilog_obfuscate.cc | 11 ++++
5 files changed, 55 insertions(+), 25 deletions(-)
diff --git a/.github/workflows/verible-ci.yml b/.github/workflows/verible-ci.yml
index 5202674bb..f4b83770f 100644
--- a/.github/workflows/verible-ci.yml
+++ b/.github/workflows/verible-ci.yml
@@ -462,8 +462,8 @@ jobs:
uses: actions/cache@v3
with:
path: "c:/users/runneradmin/_bazel_runneradmin"
- key: bazelcache_windows2_${{ steps.cache_timestamp.outputs.time }}
- restore-keys: bazelcache_windows2_
+ key: bazelcache_windows_${{ steps.cache_timestamp.outputs.time }}
+ restore-keys: bazelcache_windows_
- name: Install dependencies
run: |
diff --git a/common/util/file_util.cc b/common/util/file_util.cc
index e75ccba4e..03a8a299c 100644
--- a/common/util/file_util.cc
+++ b/common/util/file_util.cc
@@ -14,13 +14,15 @@
#include "common/util/file_util.h"
+#include
+
#include
#include
+#include
+#include
#include
#include
#include
-#include
-#include
#include
#include
#include
@@ -34,10 +36,11 @@
#include "common/util/logging.h"
#ifndef _WIN32
-#include
#include
#include
#include
+#else
+#include
#endif
namespace fs = std::filesystem;
@@ -154,34 +157,35 @@ absl::Status FileExists(const std::string &filename) {
absl::StatusOr GetContentAsString(absl::string_view filename) {
std::string content;
- std::ifstream fs;
- std::istream *stream = nullptr;
+ FILE *stream = nullptr;
const bool use_stdin = IsStdin(filename);
if (use_stdin) {
- stream = &std::cin;
+#ifdef _WIN32
+ _setmode(_fileno(stdin), _O_BINARY); // Work around DOS/Win silliness.
+#endif
+ stream = stdin;
} else {
const std::string filename_str = std::string{filename};
if (absl::Status status = FileExists(filename_str); !status.ok()) {
return status; // Bail
}
- fs.open(filename_str.c_str());
+ stream = fopen(filename_str.c_str(), "rb");
std::error_code err;
const size_t prealloc = fs::file_size(filename_str, err);
if (err.value() == 0) content.reserve(prealloc);
- stream = &fs;
}
- if (!stream->good()) {
+ if (!stream) {
return CreateErrorStatusFromErrno(filename, "can't read");
}
char buffer[4096];
- while (stream->good() && !stream->eof()) {
- stream->read(buffer, sizeof(buffer));
- content.append(buffer, stream->gcount());
- }
-
- // Allow stdin to be reopened for more input.
- if (use_stdin && std::cin.eof()) std::cin.clear();
- return std::move(content);
+ int bytes_read;
+ do {
+ bytes_read = fread(buffer, 1, sizeof(buffer), stream);
+ content.append(buffer, bytes_read);
+ } while (bytes_read > 0);
+ fclose(stream);
+
+ return content;
}
static absl::StatusOr> AttemptMemMapFile(
@@ -244,11 +248,19 @@ absl::StatusOr> GetContentAsMemBlock(
absl::Status SetContents(absl::string_view filename,
absl::string_view content) {
VLOG(1) << __FUNCTION__ << ": Writing file: " << filename;
- std::ofstream f(std::string(filename).c_str());
- if (!f.good()) return CreateErrorStatusFromErrno(filename, "can't write.");
- f << content;
- f.close();
- if (!f.good()) return CreateErrorStatusFromErrno(filename, "closing.");
+ FILE *out = fopen(std::string(filename).c_str(), "wb");
+ if (!out) return CreateErrorStatusFromErrno(filename, "can't write.");
+ const int64_t expected_write = content.size();
+ int64_t total_written = 0;
+ while (!content.empty()) {
+ int64_t w = fwrite(content.data(), 1, content.size(), out);
+ total_written += w;
+ content.remove_prefix(w);
+ }
+ const bool written_completely = (total_written == expected_write);
+ if (fclose(out) != 0 || !written_completely) {
+ return CreateErrorStatusFromErrno(filename, "closing.");
+ }
return absl::OkStatus();
}
diff --git a/verilog/analysis/checkers/token_stream_lint_rule.cc b/verilog/analysis/checkers/token_stream_lint_rule.cc
index 7d05cfcb7..10362624b 100644
--- a/verilog/analysis/checkers/token_stream_lint_rule.cc
+++ b/verilog/analysis/checkers/token_stream_lint_rule.cc
@@ -75,7 +75,8 @@ void TokenStreamLintRule::HandleSymbol(const verible::Symbol &symbol,
return p->Tag().tag == TK_StringLiteral;
});
const auto &string_literal = SymbolCastToLeaf(**literal);
- if (absl::StrContains(string_literal.get().text(), "\\\n")) {
+ if (absl::StrContains(string_literal.get().text(), "\\\n") ||
+ absl::StrContains(string_literal.get().text(), "\\\r")) {
violations_.insert(LintViolation(string_literal, kMessage, context));
}
}
diff --git a/verilog/analysis/checkers/token_stream_lint_rule_test.cc b/verilog/analysis/checkers/token_stream_lint_rule_test.cc
index eb145d3ae..88fcf287c 100644
--- a/verilog/analysis/checkers/token_stream_lint_rule_test.cc
+++ b/verilog/analysis/checkers/token_stream_lint_rule_test.cc
@@ -56,6 +56,12 @@ TEST(StringLiteralConcatenationTest, FunctionFailures) {
"\"Humpty Dumpty sat on a wall. \\\nHumpty Dumpty had a great fall.\""},
";",
"\nendmodule"},
+ {"module m;\n",
+ "string tmp=",
+ {kToken,
+ "\"Humpty Dumpty discovers CRLF \\\r\nHumpty Dumpty CRinged.\""},
+ ";",
+ "\nendmodule"},
{"module m;\n",
"string tmp=",
{kToken,
diff --git a/verilog/tools/obfuscator/verilog_obfuscate.cc b/verilog/tools/obfuscator/verilog_obfuscate.cc
index 30604b529..bde9a7d6c 100644
--- a/verilog/tools/obfuscator/verilog_obfuscate.cc
+++ b/verilog/tools/obfuscator/verilog_obfuscate.cc
@@ -25,6 +25,11 @@
#include // IWYU pragma: keep // for ostringstream
#include // for string, allocator, etc
+#ifdef _WIN32
+#include
+#include
+#endif
+
#include "absl/flags/flag.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
@@ -70,6 +75,12 @@ static constexpr absl::string_view kBuiltinFunctions[] = {
};
int main(int argc, char **argv) {
+#ifdef _WIN32
+ // stdio: Windows messes with newlines by default. Fix this here.
+ _setmode(_fileno(stdin), _O_BINARY);
+ _setmode(_fileno(stdout), _O_BINARY);
+#endif
+
const auto usage = absl::StrCat("usage: ", argv[0],
" [options] < original > output\n"
R"(