diff --git a/cmd/katana/main.go b/cmd/katana/main.go index 1be5933d..ec425716 100644 --- a/cmd/katana/main.go +++ b/cmd/katana/main.go @@ -2,7 +2,6 @@ package main import ( "fmt" - "math" "os" "os/signal" "path/filepath" @@ -18,6 +17,7 @@ import ( errorutil "github.com/projectdiscovery/utils/errors" fileutil "github.com/projectdiscovery/utils/file" folderutil "github.com/projectdiscovery/utils/folder" + pprofutils "github.com/projectdiscovery/utils/pprof" "github.com/rs/xid" ) @@ -65,6 +65,15 @@ func main() { } }() + var pprofServer *pprofutils.PprofServer + if options.PprofServer { + pprofServer = pprofutils.NewPprofServer() + pprofServer.Start() + } + if pprofServer != nil { + defer pprofServer.Stop() + } + if err := katanaRunner.ExecuteCrawling(); err != nil { gologger.Fatal().Msgf("could not execute crawling: %s", err) } @@ -83,6 +92,8 @@ func main() { } +const defaultBodyReadSize = 4 * 1024 * 1024 + func readFlags() (*goflags.FlagSet, error) { flagSet := goflags.NewFlagSet() flagSet.SetDescription(`Katana is a fast crawler focused on execution in automation @@ -101,12 +112,13 @@ pipelines offering both headless and non-headless crawling.`) flagSet.BoolVarP(&options.ScrapeJSLuiceResponses, "jsluice", "jsl", false, "enable jsluice parsing in javascript file (memory intensive)"), flagSet.DurationVarP(&options.CrawlDuration, "crawl-duration", "ct", 0, "maximum duration to crawl the target for (s, m, h, d) (default s)"), flagSet.StringVarP(&options.KnownFiles, "known-files", "kf", "", "enable crawling of known files (all,robotstxt,sitemapxml), a minimum depth of 3 is required to ensure all known files are properly crawled."), - flagSet.IntVarP(&options.BodyReadSize, "max-response-size", "mrs", math.MaxInt, "maximum response size to read"), + flagSet.IntVarP(&options.BodyReadSize, "max-response-size", "mrs", defaultBodyReadSize, "maximum response size to read"), flagSet.IntVar(&options.Timeout, "timeout", 10, "time to wait for request in seconds"), flagSet.BoolVarP(&options.AutomaticFormFill, "automatic-form-fill", "aff", false, "enable automatic form filling (experimental)"), flagSet.BoolVarP(&options.FormExtraction, "form-extraction", "fx", false, "extract form, input, textarea & select elements in jsonl output"), flagSet.IntVar(&options.Retries, "retry", 1, "number of times to retry the request"), flagSet.StringVar(&options.Proxy, "proxy", "", "http/socks5 proxy to use"), + flagSet.BoolVarP(&options.TechDetect, "tech-detect", "td", false, "enable technology detection"), flagSet.StringSliceVarP(&options.CustomHeaders, "headers", "H", nil, "custom header/cookie to include in all http request in header:value format (file)", goflags.FileStringSliceOptions), flagSet.StringVar(&cfgFile, "config", "", "path to the katana configuration file"), flagSet.StringVarP(&options.FormConfig, "form-config", "fc", "", "path to custom form configuration file"), @@ -120,6 +132,7 @@ pipelines offering both headless and non-headless crawling.`) flagSet.CreateGroup("debug", "Debug", flagSet.BoolVarP(&options.HealthCheck, "hc", "health-check", false, "run diagnostic check up"), flagSet.StringVarP(&options.ErrorLogFile, "error-log", "elog", "", "file to write sent requests error log"), + flagSet.BoolVar(&options.PprofServer, "pprof-server", false, "enable pprof server"), ) flagSet.CreateGroup("headless", "Headless", diff --git a/go.mod b/go.mod index 592165d7..c6fc8f08 100644 --- a/go.mod +++ b/go.mod @@ -14,12 +14,12 @@ require ( github.com/projectdiscovery/dsl v0.3.4 github.com/projectdiscovery/fastdialer v0.2.10 github.com/projectdiscovery/goflags v0.1.64 - github.com/projectdiscovery/gologger v1.1.32 - github.com/projectdiscovery/hmap v0.0.67 + github.com/projectdiscovery/gologger v1.1.33 + github.com/projectdiscovery/hmap v0.0.68 github.com/projectdiscovery/mapcidr v1.1.34 github.com/projectdiscovery/ratelimit v0.0.61 - github.com/projectdiscovery/retryablehttp-go v1.0.87 - github.com/projectdiscovery/utils v0.2.20 + github.com/projectdiscovery/retryablehttp-go v1.0.88 + github.com/projectdiscovery/utils v0.3.1-0.20241201132332-cebafa101965 github.com/projectdiscovery/wappalyzergo v0.2.4 github.com/remeh/sizedwaitgroup v1.0.0 github.com/rs/xid v1.5.0 @@ -48,10 +48,12 @@ require ( github.com/dlclark/regexp2 v1.11.4 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/fatih/color v1.15.0 // indirect + github.com/felixge/fgprof v0.9.5 // indirect github.com/gaissmai/bart v0.9.5 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/go-github/v30 v30.1.0 // indirect github.com/google/go-querystring v1.1.0 // indirect + github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.3.1 // indirect github.com/hashicorp/go-version v1.6.0 // indirect diff --git a/go.sum b/go.sum index ff4ef93e..e68c33c4 100644 --- a/go.sum +++ b/go.sum @@ -52,6 +52,12 @@ github.com/charmbracelet/x/exp/golden v0.0.0-20240806155701-69247e0abc2a h1:G99k github.com/charmbracelet/x/exp/golden v0.0.0-20240806155701-69247e0abc2a/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U= github.com/cheggaaa/pb/v3 v3.1.4 h1:DN8j4TVVdKu3WxVwcRKu0sG00IIU6FewoABZzXbRQeo= github.com/cheggaaa/pb/v3 v3.1.4/go.mod h1:6wVjILNBaXMs8c21qRiaUM8BR82erfgau1DQ4iUXmSA= +github.com/chromedp/cdproto v0.0.0-20230802225258-3cf4e6d46a89/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= +github.com/chromedp/chromedp v0.9.2/go.mod h1:LkSXJKONWTCHAfQasKFUZI+mxqS4tZqhmtGzzhLsnLs= +github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= +github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= +github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= +github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU= github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08 h1:ox2F0PSMlrAAiAdknSRMDrAr8mfxPCfSZolH+/qQnyQ= @@ -73,6 +79,8 @@ github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj6 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= +github.com/felixge/fgprof v0.9.5 h1:8+vR6yu2vvSKn08urWyEuxx75NWPEvybbkBirEpsbVY= +github.com/felixge/fgprof v0.9.5/go.mod h1:yKl+ERSa++RYOs32d8K6WEXCB4uXdLls4ZaZPpayhMM= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= @@ -82,6 +90,9 @@ github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-rod/rod v0.114.1 h1:osBWr88guzTXAIzwJWVmGZe3/utT9+lqKjkGSBsYMxw= github.com/go-rod/rod v0.114.1/go.mod h1:aiedSEFg5DwG/fnNbUOTPMTTWX3MRj6vIs/a684Mthw= +github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= +github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= github.com/gofrs/uuid v3.3.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -108,6 +119,8 @@ github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7 h1:y3N7Bm7Y9/CtpiVkw/ZWj6lSlDF3F74SfKwfTCer72Q= +github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= @@ -123,6 +136,8 @@ github.com/hdm/jarm-go v0.0.7/go.mod h1:kinGoS0+Sdn1Rr54OtanET5E5n7AlD6T6CrJAKDj github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= @@ -144,6 +159,7 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= @@ -152,6 +168,7 @@ github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/lukasbob/srcset v0.0.0-20190730101422-86b742e617f3 h1:l1rIRmxNhzeQM+qA3D0CsDLo0Hx45q9JmK0BlCjt6Ks= github.com/lukasbob/srcset v0.0.0-20190730101422-86b742e617f3/go.mod h1:j16TYl5p17+vBMyaL6Nu4ojlOnfX8lc2k2cfmw6m5TQ= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -195,6 +212,7 @@ github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg= github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= +github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM= github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= @@ -214,12 +232,12 @@ github.com/projectdiscovery/fastdialer v0.2.10 h1:5iciZXMPdynbk/9iuqkJT1gqMXwzgE github.com/projectdiscovery/fastdialer v0.2.10/go.mod h1:21rwXMecVsPVdSvON8Up761/GgxC4OSc9Rvx5LNH5fY= github.com/projectdiscovery/goflags v0.1.64 h1:FDfwdt9N97Hi8OuhbkDlKtVttpc/CRMIWQVa08VsHsI= github.com/projectdiscovery/goflags v0.1.64/go.mod h1:3FyHIVQtnycNOc1LE3O1jj/XR5XuMdF9QfHd0ujhnX4= -github.com/projectdiscovery/gologger v1.1.32 h1:j2Y2cxypELi9zbj/7UxDTdv9UWQl7ALJrwc7wV5snuY= -github.com/projectdiscovery/gologger v1.1.32/go.mod h1:w62+CIcwygjSpSnV/3Xh+jj4bgv6lfL7kx2kA/Bl09U= +github.com/projectdiscovery/gologger v1.1.33 h1:wQxaQ8p/0Rx89lowBp0PnY2QSWiqf9QW1vGYAllsVJ4= +github.com/projectdiscovery/gologger v1.1.33/go.mod h1:P/WwqKstshQATJxN39V0KJ9ZuiGLOizmSqHIYrrz1T4= github.com/projectdiscovery/gostruct v0.0.2 h1:s8gP8ApugGM4go1pA+sVlPDXaWqNP5BBDDSv7VEdG1M= github.com/projectdiscovery/gostruct v0.0.2/go.mod h1:H86peL4HKwMXcQQtEa6lmC8FuD9XFt6gkNR0B/Mu5PE= -github.com/projectdiscovery/hmap v0.0.67 h1:PG09AyXH6mchdZCdxAS7WkZz0xxsOsIxJOmEixEmnzI= -github.com/projectdiscovery/hmap v0.0.67/go.mod h1:WxK8i2J+wcdimIXCgpYzfj9gKxCqRqOM4KENDRzGgAA= +github.com/projectdiscovery/hmap v0.0.68 h1:/z1Cz2wKYedTJc97UNzBBgdm744xkXi6j7125b7toqg= +github.com/projectdiscovery/hmap v0.0.68/go.mod h1:B37g7giW6i7+X1pJAeG0NPoKFpFJ7M26a18gfwfLeEc= github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 h1:ZScLodGSezQVwsQDtBSMFp72WDq0nNN+KE/5DHKY5QE= github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983/go.mod h1:3G3BRKui7nMuDFAZKR/M2hiOLtaOmyukT20g88qRQjI= github.com/projectdiscovery/mapcidr v1.1.34 h1:udr83vQ7oz3kEOwlsU6NC6o08leJzSDQtls1wmXN/kM= @@ -230,10 +248,10 @@ github.com/projectdiscovery/ratelimit v0.0.61 h1:n9PD4Z4Y6cLeT2rn9IiOAA0I/kIZE/D github.com/projectdiscovery/ratelimit v0.0.61/go.mod h1:u7DxBBcUzFg4Cb2s5yabmtCMJs+ojulNpNrSLtftoKg= github.com/projectdiscovery/retryabledns v1.0.86 h1:8YMJGJ94lFBKKN3t7NOzJfbGsZoh9qNpi49xdfJcZVc= github.com/projectdiscovery/retryabledns v1.0.86/go.mod h1:5PhXvlLkEFmlYOt9i4wiKA1eONLrNiZ6DQE88Ph9rgU= -github.com/projectdiscovery/retryablehttp-go v1.0.87 h1:OE4cRNo6Y7YAXibJT88o/gEx4idmbCbFeSv3DIdL5Rg= -github.com/projectdiscovery/retryablehttp-go v1.0.87/go.mod h1:MXo3aC7aoM91FAbq5SKjDPpHw2OKTYj5eBMM5Vpsroo= -github.com/projectdiscovery/utils v0.2.20 h1:TQkBie5eGG4PnFdSLaHdehXTBEPY5byGayEk1H+HlwE= -github.com/projectdiscovery/utils v0.2.20/go.mod h1:jH4EzJuoBS/ZUgt4McnH+NWpQfcZV7A1unfLntbXFCA= +github.com/projectdiscovery/retryablehttp-go v1.0.88 h1:uR6T+i8Sy1isfG1KClhhsXnOqkOR6E8MAvuyOFq3T10= +github.com/projectdiscovery/retryablehttp-go v1.0.88/go.mod h1:ktjiIKyej+plUeK9vksqRf3wGicqY3E1rW84V/O7p0M= +github.com/projectdiscovery/utils v0.3.1-0.20241201132332-cebafa101965 h1:RbLy85dy6RpTRDihIYdbKv47/475KJcpeUoGbguzB/8= +github.com/projectdiscovery/utils v0.3.1-0.20241201132332-cebafa101965/go.mod h1:k2XlmfaYO4k6T4vAyUa3Kn/0BxPTIlNiBFpM6nVCbz0= github.com/projectdiscovery/wappalyzergo v0.2.4 h1:fVEBM4mTJgGDOIFw8ykGw/YgA4jt/gcs1g7UImPmmLw= github.com/projectdiscovery/wappalyzergo v0.2.4/go.mod h1:1373aKrhlUGtX13KjYXH74lf9yl32StnWuwk1FWvTgo= github.com/refraction-networking/utls v1.6.7 h1:zVJ7sP1dJx/WtVuITug3qYUq034cDq9B2MR1K67ULZM= @@ -428,6 +446,7 @@ golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210228012217-479acdf4ea46/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/pkg/engine/common/base.go b/pkg/engine/common/base.go index 55576a0c..c35989f6 100644 --- a/pkg/engine/common/base.go +++ b/pkg/engine/common/base.go @@ -156,14 +156,18 @@ func (s *Shared) NewCrawlSessionWithURL(URL string) (*CrawlSession, error) { httpclient, _, err := BuildHttpClient(s.Options.Dialer, s.Options.Options, func(resp *http.Response, depth int) { body, _ := io.ReadAll(resp.Body) reader, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) - technologies := s.Options.Wappalyzer.Fingerprint(resp.Header, body) + var technologyKeys []string + if s.Options.Wappalyzer != nil { + technologies := s.Options.Wappalyzer.Fingerprint(resp.Header, body) + technologyKeys = mapsutil.GetKeys(technologies) + } navigationResponse := &navigation.Response{ Depth: depth + 1, RootHostname: hostname, Resp: resp, Body: string(body), Reader: reader, - Technologies: mapsutil.GetKeys(technologies), + Technologies: technologyKeys, StatusCode: resp.StatusCode, Headers: utils.FlattenHeaders(resp.Header), } diff --git a/pkg/engine/hybrid/crawl.go b/pkg/engine/hybrid/crawl.go index c64d26da..9e90f9e6 100644 --- a/pkg/engine/hybrid/crawl.go +++ b/pkg/engine/hybrid/crawl.go @@ -103,7 +103,14 @@ func (c *Crawler) navigateRequest(s *common.CrawlSession, request *navigation.Re rawBytesResponse, _ = httputil.DumpResponse(httpresp, true) bodyReader, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) - technologies := c.Options.Wappalyzer.Fingerprint(headers, body) + var technologies map[string]interface{} + if c.Options.Wappalyzer != nil { + fingerprints := c.Options.Wappalyzer.Fingerprint(headers, body) + technologies = make(map[string]interface{}, len(fingerprints)) + for k := range fingerprints { + technologies[k] = struct{}{} + } + } resp := &navigation.Response{ Resp: httpresp, Body: string(body), diff --git a/pkg/engine/standard/crawl.go b/pkg/engine/standard/crawl.go index 55be557d..932e7ba9 100644 --- a/pkg/engine/standard/crawl.go +++ b/pkg/engine/standard/crawl.go @@ -80,8 +80,10 @@ func (c *Crawler) makeRequest(s *common.CrawlSession, request *navigation.Reques return &navigation.Response{}, nil } - technologies := c.Options.Wappalyzer.Fingerprint(resp.Header, data) - response.Technologies = mapsutil.GetKeys(technologies) + if c.Options.Wappalyzer != nil { + technologies := c.Options.Wappalyzer.Fingerprint(resp.Header, data) + response.Technologies = mapsutil.GetKeys(technologies) + } resp.Body = io.NopCloser(strings.NewReader(string(data))) diff --git a/pkg/types/crawler_options.go b/pkg/types/crawler_options.go index 86cb6592..170c7a16 100644 --- a/pkg/types/crawler_options.go +++ b/pkg/types/crawler_options.go @@ -117,11 +117,13 @@ func NewCrawlerOptions(options *Options) (*CrawlerOptions, error) { crawlerOptions.RateLimit = ratelimit.New(context.Background(), uint(options.RateLimitMinute), time.Minute) } - wappalyze, err := wappalyzer.New() - if err != nil { - return nil, err + if options.TechDetect { + wappalyze, err := wappalyzer.New() + if err != nil { + return nil, err + } + crawlerOptions.Wappalyzer = wappalyze } - crawlerOptions.Wappalyzer = wappalyze return crawlerOptions, nil } diff --git a/pkg/types/options.go b/pkg/types/options.go index 43e6e8f3..cc536b75 100644 --- a/pkg/types/options.go +++ b/pkg/types/options.go @@ -85,6 +85,8 @@ type Options struct { Silent bool // Verbose specifies showing verbose output Verbose bool + // TechDetect enables technology detection + TechDetect bool // Version enables showing of crawler version Version bool // ScrapeJSResponses enables scraping of relative endpoints from javascript @@ -133,6 +135,8 @@ type Options struct { XhrExtraction bool // HealthCheck determines if a self-healthcheck should be performed HealthCheck bool + // PprofServer enables pprof server + PprofServer bool // ErrorLogFile specifies a file to write with the errors of all requests ErrorLogFile string // Resolvers contains custom resolvers