Skip to content

Commit

Permalink
feat: katana cpu + memory improvements and profiling (#1107)
Browse files Browse the repository at this point in the history
* feat: added pprof-server addition

* feat: disable wappalyzer as default + added tech-detect flag

* use default body size + bump go.mod

* go mod clean up

* dep update

* misc fix

* dep update

---------

Co-authored-by: sandeep <8293321+ehsandeep@users.noreply.github.com>
Co-authored-by: Sandeep Singh <sandeep@projectdiscovery.io>
  • Loading branch information
3 people authored Dec 2, 2024
1 parent 4c696d9 commit cf36fe1
Show file tree
Hide file tree
Showing 8 changed files with 76 additions and 23 deletions.
17 changes: 15 additions & 2 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"fmt"
"math"
"os"
"os/signal"
"path/filepath"
Expand All @@ -18,6 +17,7 @@ import (
errorutil "github.com/projectdiscovery/utils/errors"
fileutil "github.com/projectdiscovery/utils/file"
folderutil "github.com/projectdiscovery/utils/folder"
pprofutils "github.com/projectdiscovery/utils/pprof"
"github.com/rs/xid"
)

Expand Down Expand Up @@ -65,6 +65,15 @@ func main() {
}
}()

var pprofServer *pprofutils.PprofServer
if options.PprofServer {
pprofServer = pprofutils.NewPprofServer()
pprofServer.Start()
}
if pprofServer != nil {
defer pprofServer.Stop()
}

if err := katanaRunner.ExecuteCrawling(); err != nil {
gologger.Fatal().Msgf("could not execute crawling: %s", err)
}
Expand All @@ -83,6 +92,8 @@ func main() {

}

const defaultBodyReadSize = 4 * 1024 * 1024

func readFlags() (*goflags.FlagSet, error) {
flagSet := goflags.NewFlagSet()
flagSet.SetDescription(`Katana is a fast crawler focused on execution in automation
Expand All @@ -101,12 +112,13 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.BoolVarP(&options.ScrapeJSLuiceResponses, "jsluice", "jsl", false, "enable jsluice parsing in javascript file (memory intensive)"),
flagSet.DurationVarP(&options.CrawlDuration, "crawl-duration", "ct", 0, "maximum duration to crawl the target for (s, m, h, d) (default s)"),
flagSet.StringVarP(&options.KnownFiles, "known-files", "kf", "", "enable crawling of known files (all,robotstxt,sitemapxml), a minimum depth of 3 is required to ensure all known files are properly crawled."),
flagSet.IntVarP(&options.BodyReadSize, "max-response-size", "mrs", math.MaxInt, "maximum response size to read"),
flagSet.IntVarP(&options.BodyReadSize, "max-response-size", "mrs", defaultBodyReadSize, "maximum response size to read"),
flagSet.IntVar(&options.Timeout, "timeout", 10, "time to wait for request in seconds"),
flagSet.BoolVarP(&options.AutomaticFormFill, "automatic-form-fill", "aff", false, "enable automatic form filling (experimental)"),
flagSet.BoolVarP(&options.FormExtraction, "form-extraction", "fx", false, "extract form, input, textarea & select elements in jsonl output"),
flagSet.IntVar(&options.Retries, "retry", 1, "number of times to retry the request"),
flagSet.StringVar(&options.Proxy, "proxy", "", "http/socks5 proxy to use"),
flagSet.BoolVarP(&options.TechDetect, "tech-detect", "td", false, "enable technology detection"),
flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in all http request in header:value format (file)", goflags.FileStringSliceOptions),
flagSet.StringVar(&cfgFile, "config", "", "path to the katana configuration file"),
flagSet.StringVarP(&options.FormConfig, "form-config", "fc", "", "path to custom form configuration file"),
Expand All @@ -120,6 +132,7 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.CreateGroup("debug", "Debug",
flagSet.BoolVarP(&options.HealthCheck, "hc", "health-check", false, "run diagnostic check up"),
flagSet.StringVarP(&options.ErrorLogFile, "error-log", "elog", "", "file to write sent requests error log"),
flagSet.BoolVar(&options.PprofServer, "pprof-server", false, "enable pprof server"),
)

flagSet.CreateGroup("headless", "Headless",
Expand Down
10 changes: 6 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ require (
github.com/projectdiscovery/dsl v0.3.4
github.com/projectdiscovery/fastdialer v0.2.10
github.com/projectdiscovery/goflags v0.1.64
github.com/projectdiscovery/gologger v1.1.32
github.com/projectdiscovery/hmap v0.0.67
github.com/projectdiscovery/gologger v1.1.33
github.com/projectdiscovery/hmap v0.0.68
github.com/projectdiscovery/mapcidr v1.1.34
github.com/projectdiscovery/ratelimit v0.0.61
github.com/projectdiscovery/retryablehttp-go v1.0.87
github.com/projectdiscovery/utils v0.2.20
github.com/projectdiscovery/retryablehttp-go v1.0.88
github.com/projectdiscovery/utils v0.3.1-0.20241201132332-cebafa101965
github.com/projectdiscovery/wappalyzergo v0.2.4
github.com/remeh/sizedwaitgroup v1.0.0
github.com/rs/xid v1.5.0
Expand Down Expand Up @@ -48,10 +48,12 @@ require (
github.com/dlclark/regexp2 v1.11.4 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/felixge/fgprof v0.9.5 // indirect
github.com/gaissmai/bart v0.9.5 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/go-github/v30 v30.1.0 // indirect
github.com/google/go-querystring v1.1.0 // indirect
github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.3.1 // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
Expand Down
35 changes: 27 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ github.com/charmbracelet/x/exp/golden v0.0.0-20240806155701-69247e0abc2a h1:G99k
github.com/charmbracelet/x/exp/golden v0.0.0-20240806155701-69247e0abc2a/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
github.com/cheggaaa/pb/v3 v3.1.4 h1:DN8j4TVVdKu3WxVwcRKu0sG00IIU6FewoABZzXbRQeo=
github.com/cheggaaa/pb/v3 v3.1.4/go.mod h1:6wVjILNBaXMs8c21qRiaUM8BR82erfgau1DQ4iUXmSA=
github.com/chromedp/cdproto v0.0.0-20230802225258-3cf4e6d46a89/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/chromedp v0.9.2/go.mod h1:LkSXJKONWTCHAfQasKFUZI+mxqS4tZqhmtGzzhLsnLs=
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ=
github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk=
github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8=
github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU=
github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA=
github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08 h1:ox2F0PSMlrAAiAdknSRMDrAr8mfxPCfSZolH+/qQnyQ=
Expand All @@ -73,6 +79,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj6
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw=
github.com/felixge/fgprof v0.9.5 h1:8+vR6yu2vvSKn08urWyEuxx75NWPEvybbkBirEpsbVY=
github.com/felixge/fgprof v0.9.5/go.mod h1:yKl+ERSa++RYOs32d8K6WEXCB4uXdLls4ZaZPpayhMM=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
Expand All @@ -82,6 +90,9 @@ github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-rod/rod v0.114.1 h1:osBWr88guzTXAIzwJWVmGZe3/utT9+lqKjkGSBsYMxw=
github.com/go-rod/rod v0.114.1/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw=
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
github.com/gofrs/uuid v3.3.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
Expand All @@ -108,6 +119,8 @@ github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7 h1:y3N7Bm7Y9/CtpiVkw/ZWj6lSlDF3F74SfKwfTCer72Q=
github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
Expand All @@ -123,6 +136,8 @@ github.com/hdm/jarm-go v0.0.7/go.mod h1:kinGoS0+Sdn1Rr54OtanET5E5n7AlD6T6CrJAKDj
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U=
Expand All @@ -144,6 +159,7 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8=
github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
Expand All @@ -152,6 +168,7 @@ github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/lukasbob/srcset v0.0.0-20190730101422-86b742e617f3 h1:l1rIRmxNhzeQM+qA3D0CsDLo0Hx45q9JmK0BlCjt6Ks=
github.com/lukasbob/srcset v0.0.0-20190730101422-86b742e617f3/go.mod h1:j16TYl5p17+vBMyaL6Nu4ojlOnfX8lc2k2cfmw6m5TQ=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
Expand Down Expand Up @@ -195,6 +212,7 @@ github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
Expand All @@ -214,12 +232,12 @@ github.com/projectdiscovery/fastdialer v0.2.10 h1:5iciZXMPdynbk/9iuqkJT1gqMXwzgE
github.com/projectdiscovery/fastdialer v0.2.10/go.mod h1:21rwXMecVsPVdSvON8Up761/GgxC4OSc9Rvx5LNH5fY=
github.com/projectdiscovery/goflags v0.1.64 h1:FDfwdt9N97Hi8OuhbkDlKtVttpc/CRMIWQVa08VsHsI=
github.com/projectdiscovery/goflags v0.1.64/go.mod h1:3FyHIVQtnycNOc1LE3O1jj/XR5XuMdF9QfHd0ujhnX4=
github.com/projectdiscovery/gologger v1.1.32 h1:j2Y2cxypELi9zbj/7UxDTdv9UWQl7ALJrwc7wV5snuY=
github.com/projectdiscovery/gologger v1.1.32/go.mod h1:w62+CIcwygjSpSnV/3Xh+jj4bgv6lfL7kx2kA/Bl09U=
github.com/projectdiscovery/gologger v1.1.33 h1:wQxaQ8p/0Rx89lowBp0PnY2QSWiqf9QW1vGYAllsVJ4=
github.com/projectdiscovery/gologger v1.1.33/go.mod h1:P/WwqKstshQATJxN39V0KJ9ZuiGLOizmSqHIYrrz1T4=
github.com/projectdiscovery/gostruct v0.0.2 h1:s8gP8ApugGM4go1pA+sVlPDXaWqNP5BBDDSv7VEdG1M=
github.com/projectdiscovery/gostruct v0.0.2/go.mod h1:H86peL4HKwMXcQQtEa6lmC8FuD9XFt6gkNR0B/Mu5PE=
github.com/projectdiscovery/hmap v0.0.67 h1:PG09AyXH6mchdZCdxAS7WkZz0xxsOsIxJOmEixEmnzI=
github.com/projectdiscovery/hmap v0.0.67/go.mod h1:WxK8i2J+wcdimIXCgpYzfj9gKxCqRqOM4KENDRzGgAA=
github.com/projectdiscovery/hmap v0.0.68 h1:/z1Cz2wKYedTJc97UNzBBgdm744xkXi6j7125b7toqg=
github.com/projectdiscovery/hmap v0.0.68/go.mod h1:B37g7giW6i7+X1pJAeG0NPoKFpFJ7M26a18gfwfLeEc=
github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 h1:ZScLodGSezQVwsQDtBSMFp72WDq0nNN+KE/5DHKY5QE=
github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983/go.mod h1:3G3BRKui7nMuDFAZKR/M2hiOLtaOmyukT20g88qRQjI=
github.com/projectdiscovery/mapcidr v1.1.34 h1:udr83vQ7oz3kEOwlsU6NC6o08leJzSDQtls1wmXN/kM=
Expand All @@ -230,10 +248,10 @@ github.com/projectdiscovery/ratelimit v0.0.61 h1:n9PD4Z4Y6cLeT2rn9IiOAA0I/kIZE/D
github.com/projectdiscovery/ratelimit v0.0.61/go.mod h1:u7DxBBcUzFg4Cb2s5yabmtCMJs+ojulNpNrSLtftoKg=
github.com/projectdiscovery/retryabledns v1.0.86 h1:8YMJGJ94lFBKKN3t7NOzJfbGsZoh9qNpi49xdfJcZVc=
github.com/projectdiscovery/retryabledns v1.0.86/go.mod h1:5PhXvlLkEFmlYOt9i4wiKA1eONLrNiZ6DQE88Ph9rgU=
github.com/projectdiscovery/retryablehttp-go v1.0.87 h1:OE4cRNo6Y7YAXibJT88o/gEx4idmbCbFeSv3DIdL5Rg=
github.com/projectdiscovery/retryablehttp-go v1.0.87/go.mod h1:MXo3aC7aoM91FAbq5SKjDPpHw2OKTYj5eBMM5Vpsroo=
github.com/projectdiscovery/utils v0.2.20 h1:TQkBie5eGG4PnFdSLaHdehXTBEPY5byGayEk1H+HlwE=
github.com/projectdiscovery/utils v0.2.20/go.mod h1:jH4EzJuoBS/ZUgt4McnH+NWpQfcZV7A1unfLntbXFCA=
github.com/projectdiscovery/retryablehttp-go v1.0.88 h1:uR6T+i8Sy1isfG1KClhhsXnOqkOR6E8MAvuyOFq3T10=
github.com/projectdiscovery/retryablehttp-go v1.0.88/go.mod h1:ktjiIKyej+plUeK9vksqRf3wGicqY3E1rW84V/O7p0M=
github.com/projectdiscovery/utils v0.3.1-0.20241201132332-cebafa101965 h1:RbLy85dy6RpTRDihIYdbKv47/475KJcpeUoGbguzB/8=
github.com/projectdiscovery/utils v0.3.1-0.20241201132332-cebafa101965/go.mod h1:k2XlmfaYO4k6T4vAyUa3Kn/0BxPTIlNiBFpM6nVCbz0=
github.com/projectdiscovery/wappalyzergo v0.2.4 h1:fVEBM4mTJgGDOIFw8ykGw/YgA4jt/gcs1g7UImPmmLw=
github.com/projectdiscovery/wappalyzergo v0.2.4/go.mod h1:1373aKrhlUGtX13KjYXH74lf9yl32StnWuwk1FWvTgo=
github.com/refraction-networking/utls v1.6.7 h1:zVJ7sP1dJx/WtVuITug3qYUq034cDq9B2MR1K67ULZM=
Expand Down Expand Up @@ -428,6 +446,7 @@ golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210228012217-479acdf4ea46/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
8 changes: 6 additions & 2 deletions pkg/engine/common/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,18 @@ func (s *Shared) NewCrawlSessionWithURL(URL string) (*CrawlSession, error) {
httpclient, _, err := BuildHttpClient(s.Options.Dialer, s.Options.Options, func(resp *http.Response, depth int) {
body, _ := io.ReadAll(resp.Body)
reader, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
technologies := s.Options.Wappalyzer.Fingerprint(resp.Header, body)
var technologyKeys []string
if s.Options.Wappalyzer != nil {
technologies := s.Options.Wappalyzer.Fingerprint(resp.Header, body)
technologyKeys = mapsutil.GetKeys(technologies)
}
navigationResponse := &navigation.Response{
Depth: depth + 1,
RootHostname: hostname,
Resp: resp,
Body: string(body),
Reader: reader,
Technologies: mapsutil.GetKeys(technologies),
Technologies: technologyKeys,
StatusCode: resp.StatusCode,
Headers: utils.FlattenHeaders(resp.Header),
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/engine/hybrid/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,14 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re
rawBytesResponse, _ = httputil.DumpResponse(httpresp, true)

bodyReader, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
technologies := c.Options.Wappalyzer.Fingerprint(headers, body)
var technologies map[string]interface{}
if c.Options.Wappalyzer != nil {
fingerprints := c.Options.Wappalyzer.Fingerprint(headers, body)
technologies = make(map[string]interface{}, len(fingerprints))
for k := range fingerprints {
technologies[k] = struct{}{}
}
}
resp := &navigation.Response{
Resp: httpresp,
Body: string(body),
Expand Down
6 changes: 4 additions & 2 deletions pkg/engine/standard/crawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ func (c *Crawler) makeRequest(s *common.CrawlSession, request *navigation.Reques
return &navigation.Response{}, nil
}

technologies := c.Options.Wappalyzer.Fingerprint(resp.Header, data)
response.Technologies = mapsutil.GetKeys(technologies)
if c.Options.Wappalyzer != nil {
technologies := c.Options.Wappalyzer.Fingerprint(resp.Header, data)
response.Technologies = mapsutil.GetKeys(technologies)
}

resp.Body = io.NopCloser(strings.NewReader(string(data)))

Expand Down
10 changes: 6 additions & 4 deletions pkg/types/crawler_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,13 @@ func NewCrawlerOptions(options *Options) (*CrawlerOptions, error) {
crawlerOptions.RateLimit = ratelimit.New(context.Background(), uint(options.RateLimitMinute), time.Minute)
}

wappalyze, err := wappalyzer.New()
if err != nil {
return nil, err
if options.TechDetect {
wappalyze, err := wappalyzer.New()
if err != nil {
return nil, err
}
crawlerOptions.Wappalyzer = wappalyze
}
crawlerOptions.Wappalyzer = wappalyze

return crawlerOptions, nil
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ type Options struct {
Silent bool
// Verbose specifies showing verbose output
Verbose bool
// TechDetect enables technology detection
TechDetect bool
// Version enables showing of crawler version
Version bool
// ScrapeJSResponses enables scraping of relative endpoints from javascript
Expand Down Expand Up @@ -133,6 +135,8 @@ type Options struct {
XhrExtraction bool
// HealthCheck determines if a self-healthcheck should be performed
HealthCheck bool
// PprofServer enables pprof server
PprofServer bool
// ErrorLogFile specifies a file to write with the errors of all requests
ErrorLogFile string
// Resolvers contains custom resolvers
Expand Down

0 comments on commit cf36fe1

Please sign in to comment.