Add support for tracking names used in Scala source files.

Tracking of used names is a component needed by the name hashing algorithm. The extraction and storage of used names is active only when `AnalysisCallback.nameHashing` flag is enabled and it's disabled by default. This change constists of two parts: 1. Modification of Relations to include a new `names` relation that allows us to track used names in Scala source files 2. Implementation of logic that extracts used names from Scala compilation units (that correspond to Scala source files) The first part is straightforward: add standard set of methods in Relations (along with their implementation) and update the logic which serializes and deserializes Relations. The second part is implemented as tree walk that collects all symbols associated with trees. For each symbol we extract a simple, decoded name and add it to a set of extracted names. Check documentation of `ExtractUsedNames` for discussion of implementation details. The `ExtractUsedNames` comes with unit tests grouped in `ExtractUsedNamesSpecification`. Check that class for details. Given the fact that we fork while running tests in `compiler-interface` subproject and tests are ran in parallel which involves allocating multiple Scala compiler instances we had to bump the default memory limit. This commit contains fixes for gkossakowski#3, gkossakowski#5 and gkossakowski#6 issues.
sbt · Mar 21, 2014 · 81f5086 · 81f5086
1 parent 3206f77
commit 81f5086
Show file tree

Hide file tree

Showing 11 changed files with 306 additions and 22 deletions.
diff --git a/compile/inc/src/main/scala/sbt/inc/Compile.scala b/compile/inc/src/main/scala/sbt/inc/Compile.scala
@@ -67,6 +67,7 @@ private final class AnalysisCallback(internalMap: File => Option[File], external
 	import collection.mutable.{HashMap, HashSet, ListBuffer, Map, Set}
 
 	private[this] val apis = new HashMap[File, (Int, SourceAPI)]
+	private[this] val usedNames = new HashMap[File, Set[String]]
 	private[this] val unreporteds = new HashMap[File, ListBuffer[Problem]]
 	private[this] val reporteds = new HashMap[File, ListBuffer[Problem]]
 	private[this] val binaryDeps = new HashMap[File, Set[File]]
@@ -151,9 +152,11 @@ private final class AnalysisCallback(internalMap: File => Option[File], external
 		apis(sourceFile) = (HashAPI(source), savedSource)
 	}
 
+	def usedName(sourceFile: File, name: String) = add(usedNames, sourceFile, name)
+
 	def nameHashing: Boolean = false // TODO: define the flag in IncOptions which controls this
 
-	def get: Analysis = addCompilation( addExternals( addBinaries( addProducts( addSources(Analysis.Empty) ) ) ) )
+	def get: Analysis = addUsedNames( addCompilation( addExternals( addBinaries( addProducts( addSources(Analysis.Empty) ) ) ) ) )
 	def addProducts(base: Analysis): Analysis = addAll(base, classes) { case (a, src, (prod, name)) => a.addProduct(src, prod, current product prod, name ) }
 	def addBinaries(base: Analysis): Analysis = addAll(base, binaryDeps)( (a, src, bin) => a.addBinaryDep(src, bin, binaryClassName(bin), current binary bin) )
 	def addSources(base: Analysis): Analysis =
@@ -171,6 +174,9 @@ private final class AnalysisCallback(internalMap: File => Option[File], external
 	def getOrNil[A,B](m: collection.Map[A, Seq[B]], a: A): Seq[B] = m.get(a).toList.flatten
 	def addExternals(base: Analysis): Analysis = (base /: extSrcDeps) { case (a, (source, name, api, inherited)) => a.addExternalDep(source, name, api, inherited) }
 	def addCompilation(base: Analysis): Analysis = base.copy(compilations = base.compilations.add(compilation))
+	def addUsedNames(base: Analysis): Analysis = (base /: usedNames) { case (a, (src, names)) =>
+	  (a /: names) { case (a, name) => a.copy(relations = a.relations.addUsedName(src, name)) }
+	}
 
 	def addAll[A,B](base: Analysis, m: Map[A, Set[B]])( f: (Analysis, A, B) => Analysis): Analysis =
 		(base /: m) { case (outer, (a, bs)) =>

diff --git a/compile/inc/src/main/scala/sbt/inc/Relations.scala b/compile/inc/src/main/scala/sbt/inc/Relations.scala
@@ -58,6 +58,8 @@ trait Relations
 	/** Internal source dependencies that depend on external source file `dep`.  This includes both direct and inherited dependencies.  */
 	def usesExternal(dep: String): Set[File]
 
+	private[inc] def usedNames(src: File): Set[String]
+
 	/** Records internal source file `src` as generating class file `prod` with top-level class `name`. */
 	def addProduct(src: File, prod: File, name: String): Relations
 
@@ -74,6 +76,8 @@ trait Relations
 	* this method does not automatically record direct dependencies like `addExternalDep` does.*/
 	def addInternalSrcDeps(src: File, directDependsOn: Iterable[File], inheritedDependsOn: Iterable[File]): Relations
 
+	private[inc] def addUsedName(src: File, name: String): Relations
+
 	/** Concatenates the two relations. Acts naively, i.e., doesn't internalize external deps on added files. */
 	def ++ (o: Relations): Relations
 
@@ -163,6 +167,10 @@ trait Relations
 	 * relations is illegal and will cause runtime exception being thrown.
 	 */
 	private[inc] def nameHashing: Boolean
+	/**
+	 * Relation between source files and _unqualified_ term and type names used in given source file.
+	 */
+	private[inc] def names: Relation[File, String]
 }
 
 
@@ -221,16 +229,18 @@ object Relations
 	def empty: Relations = empty(nameHashing = false)
 	private[inc] def empty(nameHashing: Boolean): Relations =
 		if (nameHashing)
-			new MRelationsNameHashing(e, e, emptySourceDependencies, emptySourceDependencies, estr)
+			new MRelationsNameHashing(e, e, emptySourceDependencies, emptySourceDependencies, estr, estr)
 		else
 			new MRelationsDefaultImpl(e, e, es, es, estr)
 
 	def make(srcProd: Relation[File, File], binaryDep: Relation[File, File], direct: Source, publicInherited: Source, classes: Relation[File, String]): Relations =
 		new MRelationsDefaultImpl(srcProd, binaryDep, direct = direct, publicInherited = publicInherited, classes)
 
 	private[inc] def make(srcProd: Relation[File, File], binaryDep: Relation[File, File],
-			memberRef: SourceDependencies, inheritance: SourceDependencies, classes: Relation[File, String]): Relations =
-		new MRelationsNameHashing(srcProd, binaryDep, memberRef = memberRef, inheritance = inheritance, classes)
+			memberRef: SourceDependencies, inheritance: SourceDependencies, classes: Relation[File, String],
+			names: Relation[File, String]): Relations =
+		new MRelationsNameHashing(srcProd, binaryDep, memberRef = memberRef, inheritance = inheritance,
+			classes, names)
 	def makeSource(internal: Relation[File,File], external: Relation[File,String]): Source = new Source(internal, external)
 	private[inc] def makeSourceDependencies(internal: Relation[File,File], external: Relation[File,String]): SourceDependencies = new SourceDependencies(internal, external)
 }
@@ -281,6 +291,8 @@ private abstract class MRelationsCommon(val srcProd: Relation[File, File], val b
 	def externalDeps(src: File): Set[String] = externalDep.forward(src)
 	def usesExternal(dep: String): Set[File] = externalDep.reverse(dep)
 
+	def usedNames(src: File): Set[String] = names.forward(src)
+
   /** Making large Relations a little readable. */
   private val userDir = sys.props("user.dir").stripSuffix("/") + "/"
   private def nocwd(s: String)              = s stripPrefix userDir
@@ -348,6 +360,14 @@ private class MRelationsDefaultImpl(srcProd: Relation[File, File], binaryDep: Re
 			new MRelationsDefaultImpl( srcProd, binaryDep, direct = newD, publicInherited = newI, classes)
 		}
 
+	def names: Relation[File, String] =
+		throw new UnsupportedOperationException("Tracking of used names is not supported " +
+			"when `nameHashing` is disabled.")
+
+	def addUsedName(src: File, name: String): Relations =
+		throw new UnsupportedOperationException("Tracking of used names is not supported " +
+			"when `nameHashing` is disabled.")
+
 	def addBinaryDep(src: File, dependsOn: File): Relations =
 		new MRelationsDefaultImpl( srcProd, binaryDep + (src, dependsOn), direct = direct,
 			publicInherited = publicInherited, classes)
@@ -368,7 +388,8 @@ private class MRelationsDefaultImpl(srcProd: Relation[File, File], binaryDep: Re
 	{
 		type MapRel[T] = Map[K, Relation[File, T]]
 		def outerJoin(srcProdMap: MapRel[File], binaryDepMap: MapRel[File], direct: Map[K, Source],
-					  inherited: Map[K, Source], classesMap: MapRel[String]): Map[K, Relations] =
+					  inherited: Map[K, Source], classesMap: MapRel[String],
+					  namesMap: MapRel[String]): Map[K, Relations] =
 		{
 			def kRelations(k: K): Relations = {
 				def get[T](m: Map[K, Relation[File, T]]) = Relations.getOrEmpty(m, k)
@@ -385,7 +406,7 @@ private class MRelationsDefaultImpl(srcProd: Relation[File, File], binaryDep: Re
 		def f1[B](item: (File, B)): K = f(item._1)
 
 		outerJoin(srcProd.groupBy(f1), binaryDep.groupBy(f1), direct.groupBySource(f),
-				  publicInherited.groupBySource(f), classes.groupBy(f1))
+				  publicInherited.groupBySource(f), classes.groupBy(f1), names.groupBy(f1))
     }
 
 	override def equals(other: Any) = other match {
@@ -413,7 +434,8 @@ private class MRelationsDefaultImpl(srcProd: Relation[File, File], binaryDep: Re
 	  |  src deps: %s
 	  |  ext deps: %s
 	  |  class names: %s
-	  """.trim.stripMargin.format(List(srcProd, binaryDep, internalSrcDep, externalDep, classes) map relation_s : _*)
+	  |  used names: %s
+	  """.trim.stripMargin.format(List(srcProd, binaryDep, internalSrcDep, externalDep, classes, names) map relation_s : _*)
 	)
 }
 
@@ -425,7 +447,8 @@ private class MRelationsDefaultImpl(srcProd: Relation[File, File], binaryDep: Re
 private class MRelationsNameHashing(srcProd: Relation[File, File], binaryDep: Relation[File, File],
 	// memberRef should include everything in inherited
 	val memberRef: SourceDependencies, val inheritance: SourceDependencies,
-	classes: Relation[File, String]) extends MRelationsCommon(srcProd, binaryDep, classes)
+	classes: Relation[File, String],
+	val names: Relation[File, String]) extends MRelationsCommon(srcProd, binaryDep, classes)
 {
 	def direct: Source =
 		throw new UnsupportedOperationException("The `direct` source dependencies relation is not supported " +
@@ -441,35 +464,42 @@ private class MRelationsNameHashing(srcProd: Relation[File, File], binaryDep: Re
 
 	def addProduct(src: File, prod: File, name: String): Relations =
 		new MRelationsNameHashing(srcProd + (src, prod), binaryDep, memberRef = memberRef,
-			inheritance = inheritance, classes + (src, name))
+			inheritance = inheritance, classes + (src, name), names = names)
 
 	def addExternalDep(src: File, dependsOn: String, inherited: Boolean): Relations = {
 		val newIH = if(inherited) inheritance.addExternal(src, dependsOn) else inheritance
 		val newMR = memberRef.addExternal(src, dependsOn)
-		new MRelationsNameHashing( srcProd, binaryDep, memberRef = newMR, inheritance = newIH, classes)
+		new MRelationsNameHashing( srcProd, binaryDep, memberRef = newMR, inheritance = newIH, classes,
+			names = names)
 	}
 
 	def addInternalSrcDeps(src: File, dependsOn: Iterable[File], inherited: Iterable[File]): Relations = {
 		val newIH = inheritance.addInternal(src, inherited)
 		val newMR = memberRef.addInternal(src, dependsOn)
-		new MRelationsNameHashing( srcProd, binaryDep, memberRef = newMR, inheritance = newIH, classes)
+		new MRelationsNameHashing( srcProd, binaryDep, memberRef = newMR, inheritance = newIH, classes,
+			names = names)
 	}
 
+	def addUsedName(src: File, name: String): Relations =
+		new MRelationsNameHashing(srcProd, binaryDep, memberRef = memberRef,
+			inheritance = inheritance, classes, names = names + (src, name))
+
 	def addBinaryDep(src: File, dependsOn: File): Relations =
 		new MRelationsNameHashing(srcProd, binaryDep + (src, dependsOn), memberRef = memberRef,
-			inheritance = inheritance, classes)
+			inheritance = inheritance, classes, names = names)
 
 	def ++ (o: Relations): Relations = {
 		if (!o.nameHashing)
 			throw new UnsupportedOperationException("The `++` operation is not supported for relations " +
 				"with different values of `nameHashing` flag.")
 		new MRelationsNameHashing(srcProd ++ o.srcProd, binaryDep ++ o.binaryDep,
 			memberRef = memberRef ++ o.memberRef, inheritance = inheritance ++ o.inheritance,
-			classes ++ o.classes)
+			classes ++ o.classes, names = names ++ o.names)
 	}
 	def -- (sources: Iterable[File]) =
 		new MRelationsNameHashing(srcProd -- sources, binaryDep -- sources,
-			memberRef = memberRef -- sources, inheritance = inheritance -- sources, classes -- sources)
+			memberRef = memberRef -- sources, inheritance = inheritance -- sources, classes -- sources,
+			names = names -- sources)
 
 	def groupBy[K](f: File => K): Map[K, Relations] = {
 		throw new UnsupportedOperationException("Merging of Analyses that have" +

diff --git a/compile/interface/src/main/scala/xsbt/API.scala b/compile/interface/src/main/scala/xsbt/API.scala
@@ -43,6 +43,12 @@ final class API(val global: CallbackGlobal) extends Compat
 			val extractApi = new ExtractAPI[global.type](global, sourceFile)
 			val traverser = new TopLevelHandler(extractApi)
 			traverser.apply(unit.body)
+			if (global.callback.nameHashing) {
+				val extractUsedNames = new ExtractUsedNames[global.type](global)
+				val names = extractUsedNames.extract(unit)
+				debug("The " + sourceFile + " contains the following used names " + names)
+				names foreach { (name: String) => callback.usedName(sourceFile, name) }
+			}
 			val packages = traverser.packages.toArray[String].map(p => new xsbti.api.Package(p))
 			val source = new xsbti.api.SourceAPI(packages, traverser.definitions.toArray[xsbti.api.Definition])
 			extractApi.forceStructures()

diff --git a/compile/interface/src/main/scala/xsbt/ExtractUsedNames.scala b/compile/interface/src/main/scala/xsbt/ExtractUsedNames.scala
@@ -0,0 +1,103 @@
+package xsbt
+
+import scala.tools.nsc._
+
+/**
+ * Extracts simple names used in given compilation unit.
+ *
+ * Extracts simple (unqualified) names mentioned in given in non-definition position by collecting
+ * all symbols associated with non-definition trees and extracting names from all collected symbols.
+ *
+ * If given symbol is mentioned both in definition and in non-definition position (e.g. in member
+ * selection) then that symbol is collected. It means that names of symbols defined and used in the
+ * same compilation unit are extracted. We've considered not extracting names of those symbols
+ * as an optimization strategy. It turned out that this is not correct.  Check
+ * https://github.com/gkossakowski/sbt/issues/3 for an example of scenario where it matters.
+ *
+ * All extracted names are returned in _decoded_ form. This way we stay consistent with the rest
+ * of incremental compiler which works with names in decoded form.
+ *
+ * Names mentioned in Import nodes are handled properly but require some special logic for two
+ * reasons:
+ *
+ *   1. import node itself has a term symbol associated with it with a name `<import`>.
+ *      I (gkossakowski) tried to track down what role this symbol serves but I couldn't.
+ *      It doesn't look like there are many places in Scala compiler that refer to
+ *      that kind of symbols explicitly.
+ *   2. ImportSelector is not subtype of Tree therefore is not processed by `Tree.foreach`
+ *
+ * Another type of tree nodes that requires special handling is TypeTree. TypeTree nodes
+ * has a little bit odd representation:
+ *
+ *   1. TypeTree.hasSymbol always returns false even when TypeTree.symbol
+ *      returns a symbol
+ *   2. The original tree from which given TypeTree was derived is stored
+ *      in TypeTree.original but Tree.forech doesn't walk into original
+ *      tree so we missed it
+ *
+ * The tree walking algorithm walks into TypeTree.original explicitly.
+ *
+ */
+class ExtractUsedNames[GlobalType <: CallbackGlobal](val global: GlobalType) {
+	import global._
+
+	def extract(unit: CompilationUnit): Set[String] = {
+		val tree = unit.body
+		val extractedByTreeWalk = extractByTreeWalk(tree)
+		extractedByTreeWalk
+	}
+
+	private def extractByTreeWalk(tree: Tree): Set[String] = {
+		val namesBuffer = collection.mutable.ListBuffer.empty[String]
+		def addSymbol(symbol: Symbol): Unit = {
+			val symbolNameAsString = symbol.name.decode.trim
+			namesBuffer += symbolNameAsString
+		}
+		def handleTreeNode(node: Tree): Unit = node match {
+			case _: DefTree | _: Template => ()
+			// turns out that Import node has a TermSymbol associated with it
+			// I (Grzegorz) tried to understand why it's there and what does it represent but
+			// that logic was introduced in 2005 without any justification I'll just ignore the
+			// import node altogether and just process the selectors in the import node
+			case Import(_, selectors: List[ImportSelector]) =>
+				def usedNameInImportSelector(name: Name): Unit =
+					if ((name != null) && (name != nme.WILDCARD)) namesBuffer += name.toString
+				selectors foreach { selector =>
+					usedNameInImportSelector(selector.name)
+					usedNameInImportSelector(selector.rename)
+				}
+			// TODO: figure out whether we should process the original tree or walk the type
+			// the argument for processing the original tree: we process what user wrote
+			// the argument for processing the type: we catch all transformations that typer applies
+			// to types but that might be a bad thing because it might expand aliases eagerly which
+			// not what we need
+			case t: TypeTree if t.original != null =>
+				t.original.foreach(handleTreeNode)
+			case t if t.hasSymbol && eligibleAsUsedName(t.symbol) =>
+				addSymbol(t.symbol)
+			case _ => ()
+		}
+		tree.foreach(handleTreeNode)
+		namesBuffer.toSet
+	}
+
+
+	/**
+	 * Needed for compatibility with Scala 2.8 which doesn't define `tpnme`
+	 */
+	private object tpnme {
+		val EMPTY = nme.EMPTY.toTypeName
+		val EMPTY_PACKAGE_NAME = nme.EMPTY_PACKAGE_NAME.toTypeName
+	}
+
+	private def eligibleAsUsedName(symbol: Symbol): Boolean = {
+		def emptyName(name: Name): Boolean = name match {
+			case nme.EMPTY | nme.EMPTY_PACKAGE_NAME | tpnme.EMPTY | tpnme.EMPTY_PACKAGE_NAME => true
+			case _ => false
+		}
+
+		(symbol != NoSymbol) &&
+		!symbol.isSynthetic &&
+		!emptyName(symbol.name)
+	}
+}