Skip to content

Commit

Permalink
refactor: fix #722 properly (#1250)
Browse files Browse the repository at this point in the history
* refactor: fix #722 properly

* fix(deps): update go-gitdiff to v0.9.0

* refactor: second attempt

* refactor: better git package api

* fix: add comments
  • Loading branch information
savely-krasovsky authored Aug 22, 2023
1 parent b1a2ce7 commit 4526655
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 73 deletions.
86 changes: 49 additions & 37 deletions detect/detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,80 +357,92 @@ func (d *Detector) detectRule(fragment Fragment, rule config.Rule) []report.Find
return findings
}

// GitScan accepts a *gitdiff.File channel which contents a git history generated from
// the output of `git log -p ...`. startGitScan will look at each file (patch) in the history
// and determine if the patch contains any findings.
// DetectGit accepts source directory, log opts and GitScanType and returns a slice of report.Finding.
func (d *Detector) DetectGit(source string, logOpts string, gitScanType GitScanType) ([]report.Finding, error) {
var (
gitdiffFiles <-chan *gitdiff.File
diffFilesCmd *git.DiffFilesCmd
err error
)
switch gitScanType {
case DetectType:
gitdiffFiles, err = git.GitLog(source, logOpts)
diffFilesCmd, err = git.NewGitLogCmd(source, logOpts)
if err != nil {
return d.findings, err
}
case ProtectType:
gitdiffFiles, err = git.GitDiff(source, false)
diffFilesCmd, err = git.NewGitDiffCmd(source, false)
if err != nil {
return d.findings, err
}
case ProtectStagedType:
gitdiffFiles, err = git.GitDiff(source, true)
diffFilesCmd, err = git.NewGitDiffCmd(source, true)
if err != nil {
return d.findings, err
}
}
defer diffFilesCmd.Wait()
diffFilesCh := diffFilesCmd.DiffFilesCh()
errCh := diffFilesCmd.ErrCh()

s := semgroup.NewGroup(context.Background(), 4)

for gitdiffFile := range gitdiffFiles {
gitdiffFile := gitdiffFile

// skip binary files
if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
continue
}
// loop to range over both DiffFiles (stdout) and ErrCh (stderr)
for diffFilesCh != nil || errCh != nil {
select {
case gitdiffFile, open := <-diffFilesCh:
if !open {
diffFilesCh = nil
break
}

// Check if commit is allowed
commitSHA := ""
if gitdiffFile.PatchHeader != nil {
commitSHA = gitdiffFile.PatchHeader.SHA
if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
// skip binary files
if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
continue
}
}
d.addCommit(commitSHA)

s.Go(func() error {
for _, textFragment := range gitdiffFile.TextFragments {
if textFragment == nil {
return nil
// Check if commit is allowed
commitSHA := ""
if gitdiffFile.PatchHeader != nil {
commitSHA = gitdiffFile.PatchHeader.SHA
if d.Config.Allowlist.CommitAllowed(gitdiffFile.PatchHeader.SHA) {
continue
}
}
d.addCommit(commitSHA)

fragment := Fragment{
Raw: textFragment.Raw(gitdiff.OpAdd),
CommitSHA: commitSHA,
FilePath: gitdiffFile.NewName,
}
s.Go(func() error {
for _, textFragment := range gitdiffFile.TextFragments {
if textFragment == nil {
return nil
}

for _, finding := range d.Detect(fragment) {
d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
fragment := Fragment{
Raw: textFragment.Raw(gitdiff.OpAdd),
CommitSHA: commitSHA,
FilePath: gitdiffFile.NewName,
}

for _, finding := range d.Detect(fragment) {
d.addFinding(augmentGitFinding(finding, textFragment, gitdiffFile))
}
}
return nil
})
case err, open := <-errCh:
if !open {
errCh = nil
break
}
return nil
})

return d.findings, err
}
}

if err := s.Wait(); err != nil {
return d.findings, err
}
log.Info().Msgf("%d commits scanned.", len(d.commitMap))
log.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
if git.ErrEncountered {
return d.findings, fmt.Errorf("%s", "git error encountered, see logs")
}
return d.findings, nil
}

Expand Down
100 changes: 72 additions & 28 deletions detect/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,30 @@ package git

import (
"bufio"
"errors"
"io"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"

"github.com/gitleaks/go-gitdiff/gitdiff"
"github.com/rs/zerolog/log"
)

var ErrEncountered bool

// GitLog returns a channel of gitdiff.File objects from the
// git log -p command for the given source.
var quotedOptPattern = regexp.MustCompile(`^(?:"[^"]+"|'[^']+')$`)
func GitLog(source string, logOpts string) (<-chan *gitdiff.File, error) {

// DiffFilesCmd helps to work with Git's output.
type DiffFilesCmd struct {
cmd *exec.Cmd
diffFilesCh <-chan *gitdiff.File
errCh <-chan error
}

// NewGitLogCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
// Caller should read everything from channels until receiving a signal about their closure and call
// the `func (*DiffFilesCmd) Wait()` error in order to release resources.
func NewGitLogCmd(source string, logOpts string) (*DiffFilesCmd, error) {
sourceClean := filepath.Clean(source)
var cmd *exec.Cmd
if logOpts != "" {
Expand Down Expand Up @@ -54,21 +61,29 @@ func GitLog(source string, logOpts string) (<-chan *gitdiff.File, error) {
if err != nil {
return nil, err
}
if err := cmd.Start(); err != nil {
return nil, err
}

go listenForStdErr(stderr)
errCh := make(chan error)
go listenForStdErr(stderr, errCh)

if err := cmd.Start(); err != nil {
gitdiffFiles, err := gitdiff.Parse(stdout)
if err != nil {
return nil, err
}
// HACK: to avoid https://github.com/zricethezav/gitleaks/issues/722
time.Sleep(50 * time.Millisecond)

return gitdiff.Parse(cmd, stdout)
return &DiffFilesCmd{
cmd: cmd,
diffFilesCh: gitdiffFiles,
errCh: errCh,
}, nil
}

// GitDiff returns a channel of gitdiff.File objects from
// the git diff command for the given source.
func GitDiff(source string, staged bool) (<-chan *gitdiff.File, error) {
// NewGitDiffCmd returns `*DiffFilesCmd` with two channels: `<-chan *gitdiff.File` and `<-chan error`.
// Caller should read everything from channels until receiving a signal about their closure and call
// the `func (*DiffFilesCmd) Wait()` error in order to release resources.
func NewGitDiffCmd(source string, staged bool) (*DiffFilesCmd, error) {
sourceClean := filepath.Clean(source)
var cmd *exec.Cmd
cmd = exec.Command("git", "-C", sourceClean, "diff", "-U0", ".")
Expand All @@ -86,21 +101,50 @@ func GitDiff(source string, staged bool) (<-chan *gitdiff.File, error) {
if err != nil {
return nil, err
}
if err := cmd.Start(); err != nil {
return nil, err
}

go listenForStdErr(stderr)
errCh := make(chan error)
go listenForStdErr(stderr, errCh)

if err := cmd.Start(); err != nil {
gitdiffFiles, err := gitdiff.Parse(stdout)
if err != nil {
return nil, err
}
// HACK: to avoid https://github.com/zricethezav/gitleaks/issues/722
time.Sleep(50 * time.Millisecond)

return gitdiff.Parse(cmd, stdout)
return &DiffFilesCmd{
cmd: cmd,
diffFilesCh: gitdiffFiles,
errCh: errCh,
}, nil
}

// DiffFilesCh returns a channel with *gitdiff.File.
func (c *DiffFilesCmd) DiffFilesCh() <-chan *gitdiff.File {
return c.diffFilesCh
}

// ErrCh returns a channel that could produce an error if there is something in stderr.
func (c *DiffFilesCmd) ErrCh() <-chan error {
return c.errCh
}

// Wait waits for the command to exit and waits for any copying to
// stdin or copying from stdout or stderr to complete.
//
// Wait also closes underlying stdout and stderr.
func (c *DiffFilesCmd) Wait() (err error) {
return c.cmd.Wait()
}

// listenForStdErr listens for stderr output from git and prints it to stdout
// then exits with exit code 1
func listenForStdErr(stderr io.ReadCloser) {
// listenForStdErr listens for stderr output from git, prints it to stdout,
// sends to errCh and closes it.
func listenForStdErr(stderr io.ReadCloser, errCh chan<- error) {
defer close(errCh)

var errEncountered bool

scanner := bufio.NewScanner(stderr)
for scanner.Scan() {
// if git throws one of the following errors:
Expand All @@ -126,12 +170,12 @@ func listenForStdErr(stderr io.ReadCloser) {
log.Warn().Msg(scanner.Text())
} else {
log.Error().Msgf("[git] %s", scanner.Text())

// asynchronously set this error flag to true so that we can
// capture a log message and exit with a non-zero exit code
// This value should get set before the `git` command exits so it's
// safe-ish, although I know I know, bad practice.
ErrEncountered = true
errEncountered = true
}
}

if errEncountered {
errCh <- errors.New("stderr is not empty")
return
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.19
require (
github.com/charmbracelet/lipgloss v0.5.0
github.com/fatih/semgroup v1.2.0
github.com/gitleaks/go-gitdiff v0.8.0
github.com/gitleaks/go-gitdiff v0.9.0
github.com/h2non/filetype v1.1.3
github.com/rs/zerolog v1.26.1
github.com/spf13/cobra v1.2.1
Expand Down
9 changes: 2 additions & 7 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ github.com/fatih/semgroup v1.2.0/go.mod h1:1KAD4iIYfXjE4U13B48VM4z9QUwV5Tt8O4rS8
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/gitleaks/go-gitdiff v0.8.0 h1:7aExTZm+K/M/EQKOyYcub8rIAdWK6ONxPGuRzxmWW+0=
github.com/gitleaks/go-gitdiff v0.8.0/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA=
github.com/gitleaks/go-gitdiff v0.9.0 h1:SHAU2l0ZBEo8g82EeFewhVy81sb7JCxW76oSPtR/Nqg=
github.com/gitleaks/go-gitdiff v0.9.0/go.mod h1:pKz0X4YzCKZs30BL+weqBIG7mx0jl4tF1uXV9ZyNvrA=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
Expand Down Expand Up @@ -195,12 +195,10 @@ github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaW
github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
Expand All @@ -219,7 +217,6 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68 h1:y1p/ycavWjGT9FnmSjdbWUlLGvcxrY0Rw3ATltrxOhk=
github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0 h1:STjmj0uFfRryL9fzRA/OupNppeAID6QJYPMavTL7jtY=
github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
github.com/muesli/termenv v0.15.1 h1:UzuTb/+hhlBugQz28rpzey4ZuKcZ03MeKsoG7IJZIxs=
github.com/muesli/termenv v0.15.1/go.mod h1:HeAQPTzpfs016yGtA4g00CsdYnVLJvxsS4ANqrZs2sQ=
Expand Down Expand Up @@ -440,8 +437,6 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 h1:WecRHqgE09JBkh/584XIE6PMz5KKE/vER4izNUi30AQ=
golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down

0 comments on commit 4526655

Please sign in to comment.