Skip to content

Commit

Permalink
Go repo scan (#480)
Browse files Browse the repository at this point in the history
* Real fast

* Addressing some golint stuff
  • Loading branch information
zricethezav authored Dec 8, 2020
1 parent 65a91ae commit 65f4202
Show file tree
Hide file tree
Showing 11 changed files with 307 additions and 241 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module github.com/zricethezav/gitleaks/v7

go 1.15

replace github.com/go-git/go-git/v5 => github.com/zricethezav/go-git/v5 v5.2.1

require (
github.com/BurntSushi/toml v0.3.1
github.com/go-git/go-git/v5 v5.2.0
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJy
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
github.com/zricethezav/go-git/v5 v5.2.1 h1:snaoKv8ksDSz7NfBRXsBr9Yr8IKEKWRWf1zdhFmcFvI=
github.com/zricethezav/go-git/v5 v5.2.1/go.mod h1:kh02eMX+wdqqxgNMEyq8YgwlIOsDOa9homkUq1PoTMs=
golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073 h1:xMPOj6Pz6UipU1wXLkrtqpHbR0AVFnyPEQq/wRWz9lM=
Expand All @@ -78,6 +80,7 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527 h1:uYVVQ9WP/Ds2ROhcaGPeIdVq0RIXVLwsHlnvJ+cT1So=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand All @@ -88,6 +91,7 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
33 changes: 12 additions & 21 deletions scan/commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ type CommitScanner struct {
repo *git.Repository
repoName string
commit *object.Commit
patch *object.Patch
}

// NewCommitScanner creates and returns a commit scanner
Expand All @@ -40,12 +39,6 @@ func (cs *CommitScanner) SetRepoName(repoName string) {
cs.repoName = repoName
}

// SetPatch sets the patch to be inspected by the commit scanner. This is used to avoid
// a race condition when running a threaded repo scan
func (cs *CommitScanner) SetPatch(patch *object.Patch) {
cs.patch = patch
}

// Scan kicks off a CommitScanner Scan
func (cs *CommitScanner) Scan() (Report, error) {
var scannerReport Report
Expand All @@ -54,25 +47,23 @@ func (cs *CommitScanner) Scan() (Report, error) {
return facScanner.Scan()
}

if cs.patch == nil {
parent, err := cs.commit.Parent(0)
if err != nil {
return scannerReport, err
}
parent, err := cs.commit.Parent(0)
if err != nil {
return scannerReport, err
}

if parent == nil {
return scannerReport, nil
}
if parent == nil {
return scannerReport, nil
}

cs.patch, err = parent.Patch(cs.commit)
if err != nil {
return scannerReport, fmt.Errorf("could not generate Patch")
}
patch, err := parent.Patch(cs.commit)
if err != nil {
return scannerReport, fmt.Errorf("could not generate Patch")
}

patchContent := cs.patch.String()
patchContent := patch.String()

for _, f := range cs.patch.FilePatches() {
for _, f := range patch.FilePatches() {
if f.IsBinary() {
continue
}
Expand Down
135 changes: 58 additions & 77 deletions scan/repo.go
Original file line number Diff line number Diff line change
@@ -1,53 +1,48 @@
package scan

import (
"fmt"
"sync"
"context"

"golang.org/x/sync/errgroup"

"github.com/zricethezav/gitleaks/v7/config"
"github.com/zricethezav/gitleaks/v7/options"

log "github.com/sirupsen/logrus"

"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/go-git/go-git/v5/plumbing/storer"
log "github.com/sirupsen/logrus"
)

// RepoScanner is a repo scanner
type RepoScanner struct {
opts options.Options
cfg config.Config
repo *git.Repository
throttle *Throttle
repoName string

leakChan chan Leak
leakWG *sync.WaitGroup
leakCache map[string]bool
leaks []Leak
}

// NewRepoScanner returns a new repo scanner (go figure). This function also
// sets up the leak listener for multi-threaded awesomeness.
func NewRepoScanner(opts options.Options, cfg config.Config, repo *git.Repository) *RepoScanner {
rs := &RepoScanner{
opts: opts,
cfg: cfg,
repo: repo,
leakChan: make(chan Leak),
leakWG: &sync.WaitGroup{},
leakCache: make(map[string]bool),
repoName: getRepoName(opts),
opts: opts,
cfg: cfg,
repo: repo,
throttle: NewThrottle(opts),
repoName: getRepoName(opts),
}

go rs.receiveLeaks()

return rs
}

// Scan kicks of a repo scan
func (rs *RepoScanner) Scan() (Report, error) {
var scannerReport Report
var (
scannerReport Report
commits chan *object.Commit
)
logOpts, err := logOptions(rs.repo, rs.opts)
if err != nil {
return scannerReport, err
Expand All @@ -56,80 +51,66 @@ func (rs *RepoScanner) Scan() (Report, error) {
if err != nil {
return scannerReport, err
}
semaphore := make(chan bool, howManyThreads(rs.opts.Threads))
wg := sync.WaitGroup{}

err = cIter.ForEach(func(c *object.Commit) error {
if c == nil || depthReached(scannerReport.Commits, rs.opts) {
return storer.ErrStop
}
g, _ := errgroup.WithContext(context.Background())
commits = make(chan *object.Commit)
leaks := make(chan Leak)

if rs.cfg.Allowlist.CommitAllowed(c.Hash.String()) {
return nil
}

// Check if at root
if len(c.ParentHashes) == 0 {
scannerReport.Commits++
facScanner := NewFilesAtCommitScanner(rs.opts, rs.cfg, rs.repo, c)
facScanner.repoName = rs.repoName
facReport, err := facScanner.Scan()
if err != nil {
return err
commitNum := 0
g.Go(func() error {
defer close(commits)
err = cIter.ForEach(func(c *object.Commit) error {
if c == nil || depthReached(commitNum, rs.opts) {
return storer.ErrStop
}

if rs.cfg.Allowlist.CommitAllowed(c.Hash.String()) {
return nil
}
commitNum++
commits <- c
if c.Hash.String() == rs.opts.CommitTo {
return storer.ErrStop
}
scannerReport.Leaks = append(scannerReport.Leaks, facReport.Leaks...)
return nil
}

// inspect first parent only as all other parents will be eventually reached
// (they exist as the tip of other branches, etc)
// See https://github.com/zricethezav/gitleaks/issues/413 for details
parent, err := c.Parent(0)
if err != nil || parent == nil {
return err
}
patch, err := parent.Patch(c)
if err != nil {
return fmt.Errorf("could not generate Patch")
}

scannerReport.Commits++
wg.Add(1)
semaphore <- true
go func(c *object.Commit, patch *object.Patch) {
defer func() {
<-semaphore
wg.Done()
}()
})
cIter.Close()
return nil
})

for commit := range commits {
c := commit
rs.throttle.Limit()
g.Go(func() error {
commitScanner := NewCommitScanner(rs.opts, rs.cfg, rs.repo, c)
commitScanner.SetRepoName(rs.repoName)
commitScanner.SetPatch(patch)
report, err := commitScanner.Scan()
rs.throttle.Release()
if err != nil {
log.Error(err)
}
for _, leak := range report.Leaks {
rs.leakWG.Add(1)
rs.leakChan <- leak
leaks <- leak
}
}(c, patch)
return nil
})
}

if c.Hash.String() == rs.opts.CommitTo {
return storer.ErrStop
}
return nil
})
go func() {
g.Wait()
close(leaks)
}()

wg.Wait()
rs.leakWG.Wait()
scannerReport.Leaks = append(scannerReport.Leaks, rs.leaks...)
return scannerReport, nil
for leak := range leaks {
scannerReport.Leaks = append(scannerReport.Leaks, leak)
}

scannerReport.Commits = commitNum
return scannerReport, g.Wait()
}

func (rs *RepoScanner) receiveLeaks() {
for leak := range rs.leakChan {
rs.leaks = append(rs.leaks, leak)
rs.leakWG.Done()
}
// SetRepoName sets the repo name
func (rs *RepoScanner) SetRepoName(repoName string) {
rs.repoName = repoName
}
71 changes: 56 additions & 15 deletions scan/scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@ import (
"fmt"
"io/ioutil"
"os"
"reflect"
"runtime"
"sort"
"testing"

"github.com/zricethezav/gitleaks/v7/config"
"github.com/zricethezav/gitleaks/v7/options"

"github.com/sergi/go-diff/diffmatchpatch"
)

const testRepoBase = "../test_data/test_repos/"
Expand Down Expand Up @@ -202,6 +198,7 @@ func TestScan(t *testing.T) {
Path: "../test_data/test_repos/",
Report: "../test_data/test_local_owner_aws_leak.json.got",
ReportFormat: "json",
Threads: runtime.GOMAXPROCS(0),
},
wantPath: "../test_data/test_local_owner_aws_leak.json",
},
Expand Down Expand Up @@ -643,24 +640,68 @@ func fileCheck(wantPath, gotPath string) error {
return err
}

sort.Slice(gotLeaks, func(i, j int) bool {
return (gotLeaks)[i].Offender+(gotLeaks)[i].File < (gotLeaks)[j].Offender+(gotLeaks)[j].File
})
sort.Slice(wantLeaks, func(i, j int) bool {
return (wantLeaks)[i].Offender+(wantLeaks)[i].File < (wantLeaks)[j].Offender+(wantLeaks)[j].File
})

if !reflect.DeepEqual(gotLeaks, wantLeaks) {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(want), string(got), false)
return fmt.Errorf("%s does not equal %s: %s", wantPath, gotPath, dmp.DiffPrettyText(diffs))
if len(wantLeaks) != len(gotLeaks) {
return fmt.Errorf("got %d leaks, want %d leaks", len(gotLeaks), len(wantLeaks))
}

for _, wantLeak := range wantLeaks {
found := false
for _, gotLeak := range gotLeaks {
if same(gotLeak, wantLeak) {
found = true
}
}
if !found {
return fmt.Errorf("unable to find %+v in got leaks", wantLeak)
}
}

if err := os.Remove(gotPath); err != nil {
return err
}
return nil
}

func same(l1, l2 Leak) bool {
if l1.Commit != l2.Commit {
return false
}

if l1.Offender != l2.Offender {
return false
}

if l1.Line != l2.Line {
return false
}

if l1.Tags != l2.Tags {
return false
}

if l1.LineNumber != l2.LineNumber {
return false
}

if l1.Author != l2.Author {
return false
}

if l1.LeakURL != l2.LeakURL {
return false
}

if l1.RepoURL != l2.RepoURL {
return false
}

if l1.Repo != l2.Repo {
return false
}
return true

}

func moveDotGit(from, to string) error {
repoDirs, err := ioutil.ReadDir("../test_data/test_repos")
if err != nil {
Expand Down
Loading

0 comments on commit 65f4202

Please sign in to comment.