Skip to content

Commit

Permalink
Replace llvm::MD5 with StableHasher
Browse files Browse the repository at this point in the history
  • Loading branch information
CodaFi committed Jan 22, 2021
1 parent f5cb08e commit 73ac8d3
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 42 deletions.
2 changes: 1 addition & 1 deletion include/swift/AST/ParseRequests.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class ParseAbstractFunctionBodyRequest :
struct SourceFileParsingResult {
ArrayRef<Decl *> TopLevelDecls;
Optional<ArrayRef<Token>> CollectedTokens;
Optional<llvm::MD5> InterfaceHash;
Optional<StableHasher> InterfaceHasher;
Optional<syntax::SourceFileSyntax> SyntaxRoot;
};

Expand Down
5 changes: 3 additions & 2 deletions include/swift/AST/SourceFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,10 @@ class SourceFile final : public FileUnit {
SourceLoc MainDeclDiagLoc;

/// A hash of all interface-contributing tokens that have been lexed for
/// this source file so far.
/// this source file.
///
/// We only collect interface hash for primary input files.
llvm::Optional<llvm::MD5> InterfaceHash;
llvm::Optional<StableHasher> InterfaceHasher;

/// The ID for the memory buffer containing this file's source.
///
Expand Down
33 changes: 23 additions & 10 deletions include/swift/Basic/Fingerprint.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
#ifndef SWIFT_BASIC_FINGERPRINT_H
#define SWIFT_BASIC_FINGERPRINT_H

#include "swift/Basic/StableHasher.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MD5.h"

#include <string>

Expand Down Expand Up @@ -52,11 +52,6 @@ namespace swift {
/// iterable decl contexts to detect when the tokens in their bodies have
/// changed. This makes them a coarse - yet safe - overapproximation for when a
/// decl has changed semantically.
///
/// \c Fingerprints are currently implemented as a thin wrapper around an MD5
/// hash. MD5 is known to be neither the fastest nor the most
/// cryptographically capable algorithm, but it does afford us the avalanche
/// effect we desire. We should revisit the modeling decision here.
class Fingerprint final {
public:
/// The size (in bytes) of the raw value of all fingerprints.
Expand All @@ -66,6 +61,8 @@ class Fingerprint final {
private:
Core core;

friend struct StableHasher::Combiner<swift::Fingerprint>;

public:
/// Creates a fingerprint value from a pair of 64-bit integers.
explicit Fingerprint(Fingerprint::Core value) : core(value) {}
Expand All @@ -76,9 +73,9 @@ class Fingerprint final {
/// Strings that violate this invariant will return a null optional.
static llvm::Optional<Fingerprint> fromString(llvm::StringRef value);

/// Creates a fingerprint value by consuming the given \c MD5Result from LLVM.
explicit Fingerprint(llvm::MD5::MD5Result &&MD5Value)
: core{MD5Value.words()} {}
/// Creates a fingerprint value by consuming the given \c StableHasher.
explicit Fingerprint(StableHasher &&stableHasher)
: core{std::move(stableHasher).finalize()} {}

public:
/// Retrieve the raw underlying bytes of this fingerprint.
Expand All @@ -100,7 +97,7 @@ class Fingerprint final {
public:
/// The fingerprint value consisting of 32 bytes of zeroes.
///
/// This fingerprint is a perfectly fine value for an MD5 hash, but it is
/// This fingerprint is a perfectly fine value for a hash, but it is
/// completely arbitrary.
static Fingerprint ZERO() {
return Fingerprint(Fingerprint::Core{0, 0});
Expand All @@ -118,6 +115,22 @@ class Fingerprint final {
void simple_display(llvm::raw_ostream &out, const Fingerprint &fp);
}; // namespace swift

namespace swift {

template <> struct StableHasher::Combiner<Fingerprint> {
static void combine(StableHasher &hasher, const Fingerprint &Val) {
// Our underlying buffer is already byte-swapped. Combine the
// raw bytes from the core by hand.
uint8_t buffer[8];
memcpy(buffer, &Val.core.first, sizeof(buffer));
hasher.combine(buffer);
memcpy(buffer, &Val.core.second, sizeof(buffer));
hasher.combine(buffer);
}
};

}; // namespace swift

namespace llvm {
class raw_ostream;
raw_ostream &operator<<(raw_ostream &OS, const swift::Fingerprint &fp);
Expand Down
2 changes: 1 addition & 1 deletion include/swift/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class Parser {

/// The current token hash, or \c None if the parser isn't computing a hash
/// for the token stream.
Optional<llvm::MD5> CurrentTokenHash;
Optional<StableHasher> CurrentTokenHash;

void recordTokenHash(const Token Tok) {
if (!Tok.getText().empty())
Expand Down
19 changes: 8 additions & 11 deletions lib/AST/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1092,22 +1092,21 @@ Fingerprint SourceFile::getInterfaceHash() const {
assert(hasInterfaceHash() && "Interface hash not enabled");
auto &eval = getASTContext().evaluator;
auto *mutableThis = const_cast<SourceFile *>(this);
auto md5 = *evaluateOrDefault(eval, ParseSourceFileRequest{mutableThis}, {})
.InterfaceHash;
llvm::MD5::MD5Result result;
md5.final(result);
return Fingerprint{std::move(result)};
Optional<StableHasher> interfaceHasher =
evaluateOrDefault(eval, ParseSourceFileRequest{mutableThis}, {})
.InterfaceHasher;
return Fingerprint{StableHasher{interfaceHasher.getValue()}.finalize()};
}

Fingerprint SourceFile::getInterfaceHashIncludingTypeMembers() const {
/// FIXME: Gross. Hashing multiple "hash" values.
llvm::MD5 hash;
hash.update(getInterfaceHash().getRawValue());
auto hash = StableHasher::defaultHasher();
hash.combine(getInterfaceHash());

std::function<void(IterableDeclContext *)> hashTypeBodyFingerprints =
[&](IterableDeclContext *IDC) {
if (auto fp = IDC->getBodyFingerprint())
hash.update(fp->getRawValue());
hash.combine(*fp);
for (auto *member : IDC->getParsedMembers())
if (auto *childIDC = dyn_cast<IterableDeclContext>(member))
hashTypeBodyFingerprints(childIDC);
Expand All @@ -1118,9 +1117,7 @@ Fingerprint SourceFile::getInterfaceHashIncludingTypeMembers() const {
hashTypeBodyFingerprints(IDC);
}

llvm::MD5::MD5Result result;
hash.final(result);
return Fingerprint{std::move(result)};
return Fingerprint{std::move(hash)};
}

syntax::SourceFileSyntax SourceFile::getSyntaxRoot() const {
Expand Down
1 change: 1 addition & 0 deletions lib/Basic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ add_swift_host_library(swiftBasic STATIC
Program.cpp
QuotedString.cpp
SourceLoc.cpp
StableHasher.cpp
Statistic.cpp
StringExtras.cpp
TaskQueue.cpp
Expand Down
11 changes: 6 additions & 5 deletions lib/Parse/ParseDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4800,7 +4800,8 @@ Parser::parseDeclList(SourceLoc LBLoc, SourceLoc &RBLoc, Diag<> ErrorDiag,

// If we're hashing the type body separately, record the curly braces but
// nothing inside for the interface hash.
llvm::SaveAndRestore<Optional<llvm::MD5>> MemberHashingScope{CurrentTokenHash, llvm::MD5()};
llvm::SaveAndRestore<Optional<StableHasher>> MemberHashingScope{
CurrentTokenHash, StableHasher::defaultHasher()};
recordTokenHash("{");
recordTokenHash("}");

Expand Down Expand Up @@ -4833,9 +4834,9 @@ Parser::parseDeclList(SourceLoc LBLoc, SourceLoc &RBLoc, Diag<> ErrorDiag,
if (RBLoc.isInvalid())
hadError = true;

llvm::MD5::MD5Result result;
CurrentTokenHash->final(result);
return std::make_pair(decls, Fingerprint{std::move(result)});
// Clone the current hasher and extract a Fingerprint.
StableHasher currentHash{*CurrentTokenHash};
return std::make_pair(decls, Fingerprint{std::move(currentHash)});
}

bool Parser::canDelayMemberDeclParsing(bool &HasOperatorDeclarations,
Expand Down Expand Up @@ -6725,7 +6726,7 @@ void Parser::parseAbstractFunctionBody(AbstractFunctionDecl *AFD) {
recordTokenHash("{");
recordTokenHash("}");

llvm::SaveAndRestore<Optional<llvm::MD5>> T(CurrentTokenHash, None);
llvm::SaveAndRestore<Optional<StableHasher>> T(CurrentTokenHash, None);

// If we can delay parsing this body, or this is the first pass of code
// completion, skip until the end. If we encounter a code completion token
Expand Down
4 changes: 2 additions & 2 deletions lib/Parse/ParseIfConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ ParserResult<IfConfigDecl> Parser::parseIfConfig(
SourceMgr.getCodeCompletionBufferID() == L->getBufferID() &&
SourceMgr.isBeforeInBuffer(Tok.getLoc(),
SourceMgr.getCodeCompletionLoc())) {
llvm::SaveAndRestore<Optional<llvm::MD5>> H(CurrentTokenHash, None);
llvm::SaveAndRestore<Optional<StableHasher>> H(CurrentTokenHash, None);
BacktrackingScope backtrack(*this);
do {
auto startLoc = Tok.getLoc();
Expand Down Expand Up @@ -706,7 +706,7 @@ ParserResult<IfConfigDecl> Parser::parseIfConfig(
llvm::SaveAndRestore<bool> S(InInactiveClauseEnvironment,
InInactiveClauseEnvironment || !isActive);
// Disable updating the interface hash inside inactive blocks.
Optional<llvm::SaveAndRestore<Optional<llvm::MD5>>> T;
Optional<llvm::SaveAndRestore<Optional<StableHasher>>> T;
if (!isActive)
T.emplace(CurrentTokenHash, None);

Expand Down
4 changes: 2 additions & 2 deletions lib/Parse/ParseRequests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,15 @@ ParseSourceFileRequest::getCachedResult() const {
syntaxRoot.emplace(*rootPtr);

return SourceFileParsingResult{*decls, SF->AllCollectedTokens,
SF->InterfaceHash, syntaxRoot};
SF->InterfaceHasher, syntaxRoot};
}

void ParseSourceFileRequest::cacheResult(SourceFileParsingResult result) const {
auto *SF = std::get<0>(getStorage());
assert(!SF->Decls);
SF->Decls = result.TopLevelDecls;
SF->AllCollectedTokens = result.CollectedTokens;
SF->InterfaceHash = result.InterfaceHash;
SF->InterfaceHasher = result.InterfaceHasher;

if (auto &root = result.SyntaxRoot)
SF->SyntaxRoot = std::make_unique<SourceFileSyntax>(std::move(*root));
Expand Down
10 changes: 4 additions & 6 deletions lib/Parse/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
#include "swift/SyntaxParse/SyntaxTreeCreator.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/ADT/PointerUnion.h"
Expand Down Expand Up @@ -148,7 +147,7 @@ void Parser::performCodeCompletionSecondPassImpl(
SyntaxContext->disable();

// Disable updating the interface hash
llvm::SaveAndRestore<Optional<llvm::MD5>> CurrentTokenHashSaver(
llvm::SaveAndRestore<Optional<StableHasher>> CurrentTokenHashSaver(
CurrentTokenHash, None);

auto BufferID = L->getBufferID();
Expand Down Expand Up @@ -540,7 +539,7 @@ Parser::Parser(std::unique_ptr<Lexer> Lex, SourceFile &SF,

// If the interface hash is enabled, set up the initial hash.
if (SF.hasInterfaceHash())
CurrentTokenHash.emplace();
CurrentTokenHash.emplace(StableHasher::defaultHasher());

// Set the token to a sentinel so that we know the lexer isn't primed yet.
// This cannot be tok::unknown, since that is a token the lexer could produce.
Expand Down Expand Up @@ -590,10 +589,9 @@ SourceLoc Parser::consumeTokenWithoutFeedingReceiver() {
void Parser::recordTokenHash(StringRef token) {
assert(!token.empty());
if (CurrentTokenHash) {
CurrentTokenHash->update(token);
CurrentTokenHash->combine(token);
// Add null byte to separate tokens.
uint8_t a[1] = {0};
CurrentTokenHash->update(a);
CurrentTokenHash->combine(uint8_t{0});
}
}

Expand Down
4 changes: 2 additions & 2 deletions test/Serialization/sourceinfo.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ import MyModule
// RUN: %target-swiftc_driver -emit-module -module-name MyModule -o %t/Modules/MyModule.swiftmodule %S/Inputs/SourceInfo/File1.swift %S/Inputs/SourceInfo/File2.swift
// RUN: %target-swift-ide-test -print-module-metadata -module-to-print MyModule -enable-swiftsourceinfo -I %t/Modules -source-filename %s | %FileCheck %s

// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File1.swift; hash=b44bab617797a7239a9fa948f11eb90b; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=35
// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File2.swift; hash=c989d6b98d505a1f52749d43ea0569a1; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=57
// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File1.swift; hash=9da710e9b2de1fff2915639236b8929c; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=35
// CHECK: filepath=SOURCE_DIR{{[/\\]}}test{{[/\\]}}Serialization{{[/\\]}}Inputs{{[/\\]}}SourceInfo{{[/\\]}}File2.swift; hash=4ce628834bb98fd822ac840ea341de26; mtime={{[0-9]{4}-[0-9]{2}-[0-9]{2} .*}}; size=57

0 comments on commit 73ac8d3

Please sign in to comment.