Skip to content

Commit

Permalink
Merge pull request #190 from smacker/exclude_vendor
Browse files Browse the repository at this point in the history
exclude vendored code from hashing
  • Loading branch information
smacker authored Feb 5, 2019
2 parents 067e4b3 + 69c408c commit 52ba94e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
3 changes: 2 additions & 1 deletion src/main/scala/tech/sourced/gemini/Hash.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ import org.apache.spark.storage.StorageLevel
import org.bblfsh.client.BblfshClient
import org.slf4j.{Logger => Slf4jLogger}
import tech.sourced.engine._
import tech.sourced.enry.Enry
import tech.sourced.featurext.{Extractor, FEClient, SparkFEClient}
import tech.sourced.featurext.generated.service.Feature
import tech.sourced.gemini.util.MapAccumulator

import scala.collection.JavaConverters._
import scala.collection.mutable


Expand Down Expand Up @@ -125,6 +125,7 @@ class Hash(session: SparkSession,
.getCommits
.getTreeEntries
.getBlobs
.filter(r => !Enry.isVendor(r.getAs[String]("path")))
.filter('is_binary === false)
}

Expand Down
6 changes: 3 additions & 3 deletions src/test/scala/tech/sourced/gemini/SparkHashSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ class SparkHashSpec extends FlatSpec
val hashResult = hashWithNewGemini()
val files = hashResult.files
// num of files * num of repos
files.count() shouldEqual 6
files.count() shouldEqual 4
// make sure DataFrame contains correct fields
val row = files.limit(1).select("blob_id", "repository_id", "commit_hash", "path").collect().last
row.getAs[String]("blob_id") shouldEqual "d8c728246ae60060da0d199f530f47772f89c77b"
row.getAs[String]("blob_id") shouldEqual "6b600b3f0a6172af59eddecef8ea39fde80fe66c"
row.getAs[String]("repository_id") shouldEqual "github.com/erizocosmico/borges.git"
row.getAs[String]("commit_hash") shouldEqual "b1fcd3bf0ba810c05cb418babc09cc7f7783cc03"
row.getAs[String]("path") shouldEqual ".gitignore"
row.getAs[String]("path") shouldEqual "archiver.go"
}

"Hash" should "calculate hashes" in {
Expand Down

0 comments on commit 52ba94e

Please sign in to comment.