Skip to content

Commit

Permalink
Implement name hashing algorithm in incremental compiler.
Browse files Browse the repository at this point in the history
Provide implementation of invalidation logic that takes computed
name hashes into account. The implementation is spread amongst two
classes:

  1. `IncrementalNameHashing` which implements a variant of
     incremental compilation algorithm that computes modified
     names and delegates to `MemberReferenceInvalidationStrategy`
     when invalidating member reference dependencies
  2. `MemberReferenceInvalidationStrategy` which implements the
     core logic of dealing with dependencies introduced by member
     reference. See documentation of that class for details.

The name hashing optimization is applied when invalidating source files
having both internal and external dependencies (in initial iteration),
check `invalidateByExternal` and `invalidateSource` methods for details.

As seen in implementation of `MemberReferenceInvalidationStrategy`
the name hashing optimization is not applied when implicit members
change.

NOTE: All functionality introduced in this commit is enabled only
when `IncOptions.nameHashing` flag is set to true.

The `source-dependencies/transitive-memberRef` test has been changed
to test name hashing variant of incremental compilation. The change
to invalidated files reflects the difference between the old and the
new algorithm.

Also, there a few new tests added that cover issues previously found
while testing name hashing algorithm and are fixed in this commit.
Each paragraph describes a single test.

Add a test case which shows that detect properly changes to type aliases
in the name hashing algorithm. See gkossakowski#6 for details.

Add test covering bug with use of symbolic names (issue
gkossakowski#5).

Add a test which covers the case where we refer to a name that is
declared in the same file. See issue gkossakowski#3 for details.
  • Loading branch information
gkossakowski authored and eed3si9n committed Mar 21, 2014
1 parent 1c0553c commit 921c903
Show file tree
Hide file tree
Showing 19 changed files with 309 additions and 8 deletions.
33 changes: 33 additions & 0 deletions compile/inc/src/main/scala/sbt/inc/Changes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ package inc

import xsbt.api.NameChanges
import java.io.File
import xsbti.api.{_internalOnly_NameHashes => NameHashes}
import xsbti.api.{_internalOnly_NameHash => NameHash}

final case class InitialChanges(internalSrc: Changes[File], removedProducts: Set[File], binaryDeps: Set[File], external: APIChanges[String])
final class APIChanges[T](val apiChanges: Iterable[APIChange[T]])
Expand All @@ -22,6 +24,37 @@ sealed abstract class APIChange[T](val modified: T)
*/
case class APIChangeDueToMacroDefinition[T](modified0: T) extends APIChange(modified0)
case class SourceAPIChange[T](modified0: T) extends APIChange(modified0)
/**
* An APIChange that carries information about modified names.
*
* This class is used only when name hashing algorithm is enabled.
*/
case class NamesChange[T](modified0: T, modifiedNames: ModifiedNames) extends APIChange(modified0)

/**
* ModifiedNames are determined by comparing name hashes in two versions of an API representation.
*
* Note that we distinguish between sets of regular (non-implicit) and implicit modified names.
* This distinction is needed because the name hashing algorithm makes different decisions based
* on whether modified name is implicit or not. Implicit names are much more difficult to handle
* due to difficulty of reasoning about the implicit scope.
*/
case class ModifiedNames(regularNames: Set[String], implicitNames: Set[String]) {
override def toString: String =
s"ModifiedNames(regularNames = ${regularNames mkString ", "}, implicitNames = ${implicitNames mkString ", "})"
}
object ModifiedNames {
def compareTwoNameHashes(a: NameHashes, b: NameHashes): ModifiedNames = {
val modifiedRegularNames = calculateModifiedNames(a.regularMembers.toSet, b.regularMembers.toSet)
val modifiedImplicitNames = calculateModifiedNames(a.implicitMembers.toSet, b.implicitMembers.toSet)
ModifiedNames(modifiedRegularNames, modifiedImplicitNames)
}
private def calculateModifiedNames(xs: Set[NameHash], ys: Set[NameHash]): Set[String] = {
val differentNameHashes = (xs union ys) diff (xs intersect ys)
differentNameHashes.map(_.name)
}
}


trait Changes[A]
{
Expand Down
95 changes: 89 additions & 6 deletions compile/inc/src/main/scala/sbt/inc/Incremental.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@ object Incremental
log: Logger,
options: IncOptions)(implicit equivS: Equiv[Stamp]): (Boolean, Analysis) =
{
assert(!options.nameHashing, "We don't support name hashing algorithm yet.")
val incremental = new IncrementalDefaultImpl(log, options)
val incremental: IncrementalCommon =
if (!options.nameHashing)
new IncrementalDefaultImpl(log, options)
else
new IncrementalNameHashing(log, options)
val initialChanges = incremental.changedInitial(entry, sources, previous, current, forEntry)
val binaryChanges = new DependencyChanges {
val modifiedBinaries = initialChanges.binaryDeps.toArray
Expand Down Expand Up @@ -128,7 +131,8 @@ private abstract class IncrementalCommon(log: Logger, options: IncOptions) {
apiChanges foreach {
case APIChangeDueToMacroDefinition(src) =>
log.debug(s"Public API is considered to be changed because $src contains a macro definition.")
case SourceAPIChange(src) =>
case apiChange@(_: SourceAPIChange[T] | _: NamesChange[T]) =>
val src = apiChange.modified
val oldApi = oldAPIMapping(src)
val newApi = newAPIMapping(src)
val apiUnifiedPatch = apiDiff.generateApiDiff(src.toString, oldApi.api, newApi.api, contextSize)
Expand Down Expand Up @@ -176,7 +180,7 @@ private abstract class IncrementalCommon(log: Logger, options: IncOptions) {
}
}

protected def sameAPI[T](src: T, a: Source, b: Source): Option[SourceAPIChange[T]]
protected def sameAPI[T](src: T, a: Source, b: Source): Option[APIChange[T]]

def shortcutSameSource(a: Source, b: Source): Boolean = !a.hash.isEmpty && !b.hash.isEmpty && sameCompilation(a.compilation, b.compilation) && (a.hash.deep equals b.hash.deep)
def sameCompilation(a: Compilation, b: Compilation): Boolean = a.startTime == b.startTime && a.outputs.corresponds(b.outputs){
Expand Down Expand Up @@ -475,11 +479,90 @@ private final class IncrementalDefaultImpl(log: Logger, options: IncOptions) ext
log.debug("Invalidated by transitive public inheritance: " + transitiveInherited)
val direct = transitiveInherited flatMap directDeps
log.debug("Invalidated by direct dependency: " + direct)
val all = transitiveInherited ++ direct
all
transitiveInherited ++ direct
}

override protected def allDeps(relations: Relations): File => Set[File] =
f => relations.direct.internal.reverse(f)

}

/**
* Implementation of incremental algorithm known as "name hashing". It differs from the default implementation
* by applying pruning (filter) of member reference dependencies based on used and modified simple names.
*
* See MemberReferenceInvalidationStrategy for some more information.
*/
private final class IncrementalNameHashing(log: Logger, options: IncOptions) extends IncrementalCommon(log, options) {

private val memberRefInvalidator = new MemberRefInvalidator(log)

// Package objects are fragile: if they inherit from an invalidated source, get "class file needed by package is missing" error
// This might be too conservative: we probably only need package objects for packages of invalidated sources.
override protected def invalidatedPackageObjects(invalidated: Set[File], relations: Relations): Set[File] =
invalidated flatMap relations.inheritance.internal.reverse filter { _.getName == "package.scala" }

override protected def sameAPI[T](src: T, a: Source, b: Source): Option[APIChange[T]] = {
if (SameAPI(a,b))
None
else {
val aNameHashes = a._internalOnly_nameHashes
val bNameHashes = b._internalOnly_nameHashes
val modifiedNames = ModifiedNames.compareTwoNameHashes(aNameHashes, bNameHashes)
val apiChange = NamesChange(src, modifiedNames)
Some(apiChange)
}
}

/** Invalidates sources based on initially detected 'changes' to the sources, products, and dependencies.*/
override protected def invalidateByExternal(relations: Relations, externalAPIChange: APIChange[String]): Set[File] = {
val modified = externalAPIChange.modified
val invalidationReason = memberRefInvalidator.invalidationReason(externalAPIChange)
log.debug(s"$invalidationReason\nAll member reference dependencies will be considered within this context.")
// Propagate inheritance dependencies transitively.
// This differs from normal because we need the initial crossing from externals to sources in this project.
val externalInheritanceR = relations.inheritance.external
val byExternalInheritance = externalInheritanceR.reverse(modified)
log.debug(s"Files invalidated by inheriting from (external) $modified: $byExternalInheritance; now invalidating by inheritance (internally).")
val transitiveInheritance = byExternalInheritance flatMap { file =>
invalidateByInheritance(relations, file)
}
val memberRefInvalidationInternal = memberRefInvalidator.get(relations.memberRef.internal,
relations.names, externalAPIChange)
val memberRefInvalidationExternal = memberRefInvalidator.get(relations.memberRef.external,
relations.names, externalAPIChange)

// Get the member reference dependencies of all sources transitively invalidated by inheritance
log.debug("Getting direct dependencies of all sources transitively invalidated by inheritance.")
val memberRefA = transitiveInheritance flatMap memberRefInvalidationInternal
// Get the sources that depend on externals by member reference.
// This includes non-inheritance dependencies and is not transitive.
log.debug(s"Getting sources that directly depend on (external) $modified.")
val memberRefB = memberRefInvalidationExternal(modified)
transitiveInheritance ++ memberRefA ++ memberRefB
}

private def invalidateByInheritance(relations: Relations, modified: File): Set[File] = {
val inheritanceDeps = relations.inheritance.internal.reverse _
log.debug(s"Invalidating (transitively) by inheritance from $modified...")
val transitiveInheritance = transitiveDeps(Set(modified))(inheritanceDeps)
log.debug("Invalidated by transitive inheritance dependency: " + transitiveInheritance)
transitiveInheritance
}

override protected def invalidateSource(relations: Relations, change: APIChange[File]): Set[File] = {
log.debug(s"Invalidating ${change.modified}...")
val transitiveInheritance = invalidateByInheritance(relations, change.modified)
val reasonForInvalidation = memberRefInvalidator.invalidationReason(change)
log.debug(s"$reasonForInvalidation\nAll member reference dependencies will be considered within this context.")
val memberRefInvalidation = memberRefInvalidator.get(relations.memberRef.internal,
relations.names, change)
val memberRef = transitiveInheritance flatMap memberRefInvalidation
val all = transitiveInheritance ++ memberRef
all
}

override protected def allDeps(relations: Relations): File => Set[File] =
f => relations.memberRef.internal.reverse(f)

}
124 changes: 124 additions & 0 deletions compile/inc/src/main/scala/sbt/inc/MemberRefInvalidator.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package sbt.inc

import sbt.Relation
import java.io.File
import sbt.Logger
import xsbt.api.APIUtil

/**
* Implements various strategies for invalidating dependencies introduced by member reference.
*
* The strategy is represented as function T => Set[File] where T is a source file that other
* source files depend on. When you apply that function to given element `src` you get set of
* files that depend on `src` by member reference and should be invalidated due to api change
* that was passed to a method constructing that function. There are two questions that arise:
*
* 1. Why is signature T => Set[File] and not T => Set[T] or File => Set[File]?
* 2. Why would we apply that function to any other `src` that then one that got modified
* and the modification is described by APIChange?
*
* Let's address the second question with the following example of source code structure:
*
* // A.scala
* class A
*
* // B.scala
* class B extends A
*
* // C.scala
* class C { def foo(a: A) = ??? }
*
* // D.scala
* class D { def bar(b: B) = ??? }
*
* Member reference dependencies on A.scala are B.scala, C.scala. When the api of A changes
* then we would consider B and C for invalidation. However, B is also a dependency by inheritance
* so we always invalidate it. The api change to A is relevant when B is considered (because
* of how inheritance works) so we would invalidate B by inheritance and then we would like to
* invalidate member reference dependencies of B as well. In other words, we have a function
* because we want to apply it (with the same api change in mind) to all src files invalidated
* by inheritance of the originally modified file.
*
* The first question is a bit more straightforward to answer. We always invalidate internal
* source files (in given project) that are represented as File but they might depend either on
* internal source files (then T=File) or they can depend on external class name (then T=String).
*
* The specific invalidation strategy is determined based on APIChange that describes a change to api
* of a single source file.
*
* For example, if we get APIChangeDueToMacroDefinition then we invalidate all member reference
* dependencies unconditionally. On the other hand, if api change is due to modified name hashes
* of regular members then we'll invalidate sources that use those names.
*/
private[inc] class MemberRefInvalidator(log: Logger) {
def get[T](memberRef: Relation[File, T], usedNames: Relation[File, String], apiChange: APIChange[_]):
T => Set[File] = apiChange match {
case _: APIChangeDueToMacroDefinition[_] =>
new InvalidateUnconditionally(memberRef)
case NamesChange(_, modifiedNames) if !modifiedNames.implicitNames.isEmpty =>
new InvalidateUnconditionally(memberRef)
case NamesChange(modifiedSrcFile, modifiedNames) =>
new NameHashFilteredInvalidator[T](usedNames, memberRef, modifiedNames.regularNames)
case _: SourceAPIChange[_] =>
sys.error(wrongAPIChangeMsg)
}

def invalidationReason(apiChange: APIChange[_]): String = apiChange match {
case APIChangeDueToMacroDefinition(modifiedSrcFile) =>
s"The $modifiedSrcFile source file declares a macro."
case NamesChange(modifiedSrcFile, modifiedNames) if !modifiedNames.implicitNames.isEmpty =>
s"""|The $modifiedSrcFile source file has the following implicit definitions changed:
|\t${modifiedNames.implicitNames.mkString(", ")}.""".stripMargin
case NamesChange(modifiedSrcFile, modifiedNames) =>
s"""|The $modifiedSrcFile source file has the following regular definitions changed:
|\t${modifiedNames.regularNames.mkString(", ")}.""".stripMargin
case _: SourceAPIChange[_] =>
sys.error(wrongAPIChangeMsg)
}

private val wrongAPIChangeMsg =
"MemberReferenceInvalidator.get should be called when name hashing is enabled " +
"and in that case we shouldn't have SourceAPIChange as an api change."

private class InvalidateUnconditionally[T](memberRef: Relation[File, T]) extends (T => Set[File]) {
def apply(from: T): Set[File] = {
val invalidated = memberRef.reverse(from)
if (!invalidated.isEmpty)
log.debug(s"The following member ref dependencies of $from are invalidated:\n" +
formatInvalidated(invalidated))
invalidated
}
private def formatInvalidated(invalidated: Set[File]): String = {
val sortedFiles = invalidated.toSeq.sortBy(_.getAbsolutePath)
sortedFiles.map(file => "\t"+file).mkString("\n")
}
}

private class NameHashFilteredInvalidator[T](
usedNames: Relation[File, String],
memberRef: Relation[File, T],
modifiedNames: Set[String]) extends (T => Set[File]) {

def apply(to: T): Set[File] = {
val dependent = memberRef.reverse(to)
filteredDependencies(dependent)
}
private def filteredDependencies(dependent: Set[File]): Set[File] = {
dependent.filter {
case from if APIUtil.isScalaSourceName(from.getName) =>
val usedNamesInDependent = usedNames.forward(from)
val modifiedAndUsedNames = modifiedNames intersect usedNamesInDependent
if (modifiedAndUsedNames.isEmpty) {
log.debug(s"None of the modified names appears in $from. This dependency is not being considered for invalidation.")
false
} else {
log.debug(s"The following modified names cause invalidation of $from: $modifiedAndUsedNames")
true
}
case from =>
log.debug(s"Name hashing optimization doesn't apply to non-Scala dependency: $from")
true
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
object A {
def `=` = 3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
object B extends App {
println(A.`=`)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
incOptions := incOptions.value.withNameHashing(true)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
object A {
def asdf = 3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
> compile

# rename def with symbolic name (`=`)
$ copy-file changes/A.scala A.scala

# Both A.scala and B.scala should be recompiled, producing a compile error
-> compile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
object A {
def x = 3

def y = {
import B._
x
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
object B {
// def x = 3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
incOptions := incOptions.value.withNameHashing(true)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
object B {
def x = 3
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
> compile

# uncomment definition of `x` that leads to ambiguity error in A
$ copy-file changes/B.scala B.scala

# Both A.scala and B.scala should be recompiled, producing a compile error
-> compile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
logLevel := Level.Debug

incOptions := incOptions.value.withNameHashing(true)

// disable sbt's heauristic which recompiles everything in case
// some fraction (e.g. 50%) of files is scheduled to be recompiled
// in this test we want precise information about recompiled files
Expand All @@ -24,13 +26,13 @@ TaskKey[Unit]("check-compilations") <<= (compile in Compile, scalaSource in Comp
assert(recompiledFiles(iteration) == files, "%s != %s".format(recompiledFiles(iteration), files))
}
// Y.scala is compiled only at the beginning as changes to A.scala do not affect it
recompiledFilesInIteration(0, Set("Y.scala"))
recompiledFilesInIteration(0, Set("X.scala", "Y.scala"))
// A.scala is changed and recompiled
recompiledFilesInIteration(1, Set("A.scala"))
// change in A.scala causes recompilation of B.scala, C.scala, D.scala which depend on transtiviely
// and by inheritance on A.scala
// X.scala is also recompiled because it depends by member reference on B.scala
// Note that Y.scala is not recompiled because it depends just on X through member reference dependency
recompiledFilesInIteration(2, Set("B.scala", "C.scala", "D.scala", "X.scala"))
recompiledFilesInIteration(2, Set("B.scala", "C.scala", "D.scala"))
assert(allCompilations.size == 3)
}
4 changes: 4 additions & 0 deletions sbt/src/sbt-test/source-dependencies/type-alias/A.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
object A {
type X = Option[Int]
}

3 changes: 3 additions & 0 deletions sbt/src/sbt-test/source-dependencies/type-alias/B.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
object B {
def y: A.X = Option(3)
}
3 changes: 3 additions & 0 deletions sbt/src/sbt-test/source-dependencies/type-alias/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
logLevel in compile := Level.Debug

incOptions := incOptions.value.withNameHashing(true)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
object A {
type X = Int
}
7 changes: 7 additions & 0 deletions sbt/src/sbt-test/source-dependencies/type-alias/test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
> compile

# change type alias
$ copy-file changes/A.scala A.scala

# Both A.scala and B.scala should be recompiled, producing a compile error
-> compile

0 comments on commit 921c903

Please sign in to comment.