diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..c16bbae --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +root = true + +[*] +indent_style = space +indent_size = 2 +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +[Makefile] +indent_style = tab diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ef71177..9aeb3cc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,8 +16,8 @@ jobs: #- windows-latest # depext issue for gnuplot/jemalloc? #- macos-latest # slow ocaml-compiler: - - 4.08.x - 4.12.x + - 5.0.x runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index 61ae156..354d9c4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .*.swp .*.swo +_opam _build *.native *.byte diff --git a/benchpress-server.opam b/benchpress-server.opam index 121a4c5..03c237d 100644 --- a/benchpress-server.opam +++ b/benchpress-server.opam @@ -23,7 +23,7 @@ depends: [ "tiny_httpd" { >= "0.12" & < "1.0" } "printbox" { >= "0.6" } "printbox-text" { >= "0.6" } - "ocaml" {>= "4.08" } + "ocaml" {>= "4.12" } "jemalloc" { >= "0.2" & < "0.3" } ] homepage: "https://github.com/sneeuwballen/benchpress/" diff --git a/benchpress-worker.opam b/benchpress-worker.opam index 37c4f1b..34ff98d 100644 --- a/benchpress-worker.opam +++ b/benchpress-worker.opam @@ -8,7 +8,7 @@ build: [ ["dune" "build" "@doc" "-p" name] {with-doc} ] depends: [ - "ocaml" {>= "4.08" } + "ocaml" {>= "4.12" } "dune" { >= "1.11" } "containers" { >= "3.0" & < "4.0" } "benchpress" { = version } diff --git a/benchpress.opam b/benchpress.opam index bf911c9..5df3525 100644 --- a/benchpress.opam +++ b/benchpress.opam @@ -28,7 +28,7 @@ depends: [ "sqlite3_utils" { >= "0.4" & < "0.6" } "printbox" { >= "0.6" } "printbox-text" { >= "0.6" } - "ocaml" {>= "4.08" } + "ocaml" {>= "4.12" } ] homepage: "https://github.com/sneeuwballen/benchpress/" dev-repo: "git+https://github.com/sneeuwballen/benchpress.git" diff --git a/src/bin/Show.ml b/src/bin/Show.ml index 7d2b1bd..a235e17 100644 --- a/src/bin/Show.ml +++ b/src/bin/Show.ml @@ -1,13 +1,14 @@ (* run tests, or compare results *) (* TODO: only load full results if needed *) -let main ?(check = true) ?(bad = true) ?csv ?summary (file : string) : unit = +let main ?(check = true) ?(bad = true) ~details ?csv ?summary (file : string) : + unit = Logs.debug (fun k -> k "loading file %S..." file); let res = Bin_utils.load_file file in Logs.debug (fun k -> k "loaded file %S" file); Bin_utils.dump_csv ~csv res; Bin_utils.dump_summary ~summary res; - Bin_utils.printbox_results res; + Bin_utils.printbox_results ~details res; if bad && not (Test_top_result.is_ok res) then Format.printf "@[<2>bad: %a@]@." Test_top_result.pp_bad res; if check then Bin_utils.check_res Notify.nil res; diff --git a/src/bin/benchpress_bin.ml b/src/bin/benchpress_bin.ml index 5c36aef..1f55696 100644 --- a/src/bin/benchpress_bin.ml +++ b/src/bin/benchpress_bin.ml @@ -382,16 +382,21 @@ module Show = struct value & opt (some string) None & info [ "summary" ] ~doc:"write summary in FILE") - and debug = Logs_cli.level () in - let aux check bad csv summary no_color debug file : bool = + and debug = Logs_cli.level () + and details = + Arg.(value & flag & info [ "details" ] ~doc:"show more details") + in + let aux check bad csv summary no_color debug details file : bool = catch_err @@ fun () -> Misc.setup_logs debug; if no_color then CCFormat.set_color_default false; - Show.main ~check ~bad ?csv ?summary file + Show.main ~check ~bad ~details ?csv ?summary file in let doc = "show benchmark results (see `list-files`)" in Cmd.v (Cmd.info ~doc "show") - Term.(const aux $ check $ bad $ csv $ summary $ no_color $ debug $ file) + Term.( + const aux $ check $ bad $ csv $ summary $ no_color $ debug $ details + $ file) end (** {2 plot results} *) diff --git a/src/core/Bin_utils.ml b/src/core/Bin_utils.ml index d9c09c6..70205e9 100644 --- a/src/core/Bin_utils.ml +++ b/src/core/Bin_utils.ml @@ -92,8 +92,8 @@ let check_res ?(no_failure = false) notify (results : Test_top_result.t) : unit let a = Test_top_result.analyze results in check_res_an ~no_failure notify a -let printbox_stat st : unit = - let box_st = Test_stat.to_printbox_l st in +let printbox_stat ~details st : unit = + let box_st = Test_stat.to_printbox_l ~details st in Printf.printf "STAT:\n%s\n%!" (PrintBox_text.to_string box_st); () @@ -103,13 +103,13 @@ let printbox_analyze a = () let printbox_compact_results (results : Test_compact_result.t) : unit = - printbox_stat results.cr_stat; + printbox_stat ~details:false results.cr_stat; printbox_analyze results.cr_analyze; () -let printbox_results (results : Test_top_result.t) : unit = +let printbox_results ~details (results : Test_top_result.t) : unit = (let st = Test_top_result.stat results in - printbox_stat st); + printbox_stat ~details st); (let a = Test_top_result.analyze results in printbox_analyze a); () diff --git a/src/core/Test_stat.ml b/src/core/Test_stat.ml index cca1599..913577b 100644 --- a/src/core/Test_stat.ml +++ b/src/core/Test_stat.ml @@ -2,16 +2,65 @@ open Common open Test module PB = PrintBox +(** Aggregate function computing the mean and the standard deviation + of data series using the Welford's algorithm. *) +module Stats = struct + type acc = { n: int; total: float; mean: float * float; s: float * float } + + let prev = fst + let curr = snd + let init = { n = 0; total = 0.; mean = 0., 0.; s = 0., 0. } + + let step ({ n; total; mean; s } as acc) x = + let x = + match x with + | Sqlite3.Data.FLOAT f -> f + | _ -> assert false + in + let n = n + 1 in + let total = total +. x in + if n = 1 then + { acc with n; total; mean = x, x } + else ( + let new_mean = prev mean +. ((x -. prev mean) /. Float.of_int n) in + let new_s = prev s +. ((x -. prev mean) *. (x -. new_mean)) in + { n; total; mean = curr mean, new_mean; s = curr s, new_s } + ) + + let total { total; _ } = total + + let mean { n; mean = _, mean; _ } = + if n = 0 then + 0. + else + mean + + let sd { n; s = _, s; _ } = + if n <= 1 then + 0. + else + Float.(sqrt (s /. of_int (n - 1))) + + let final acc = + let res = Fmt.asprintf "%f|%f|%f" (total acc) (mean acc) (sd acc) in + Sqlite3.Data.TEXT res + + let attach_aggregate db = + Sqlite3.Aggregate.create_fun1 db ~init ~step ~final "stats" +end + +type detail_stats = { n: int; total: float; mean: float; sd: float } + type t = { - unsat: int; - sat: int; + unsat: detail_stats; + sat: detail_stats; errors: int; - unknown: int; + unknown: detail_stats; timeout: int; memory: int; valid_proof: int; invalid_proof: int; - custom: (string * int) list; + custom: (string * detail_stats) list; total: int; total_time: float; (* for sat+unsat *) } @@ -24,11 +73,28 @@ let get_timeout r = r.timeout let get_valid_proof r = r.valid_proof let get_invalid_proof r = r.invalid_proof let get_memory r = r.memory -let get_custom s r = try List.assoc s r.custom with Not_found -> 0 + +let get_custom s r = + try List.assoc s r.custom + with Not_found -> { n = 0; total = 0.; mean = 0.; sd = 0. } + let get_total r = r.total let get_total_time r = r.total_time -let to_printbox_l ?to_link l : PB.t = +let pb_details_stats_color ~details c { n; total; mean; sd } = + let open PB in + if n = 0 || not details then + align ~h:`Left ~v:`Center (int n) + else + v_record + [ + "number", pb_int_color c n; + "total", text @@ Misc.human_duration total; + "mean", text @@ Misc.human_duration mean; + "deviation", text @@ Misc.human_duration sd; + ] + +let to_printbox_l ?(details = false) ?to_link l : PB.t = let mk_row ?res lbl get_r mk_box : PB.t list = match to_link with | Some f -> @@ -47,13 +113,16 @@ let to_printbox_l ?to_link l : PB.t = l | _ -> PB.text lbl :: List.map (fun (_p, r) -> mk_box (get_r r)) l and mk_row1 lbl get_r mk_box = - PB.text lbl :: List.map (fun (_, r) -> mk_box @@ get_r r) l + PB.(align ~h:`Left ~v:`Center (text lbl)) + :: List.map (fun (_, r) -> mk_box @@ get_r r) l in let r1 = [ - mk_row "sat" get_sat @@ pb_int_color PB.Style.(fg_color Green); - mk_row "unsat" get_unsat @@ pb_int_color PB.Style.(fg_color Green); - mk_row1 "sat+unsat" (fun r -> r.sat + r.unsat) + mk_row1 "sat" get_sat + @@ pb_details_stats_color ~details PB.Style.(fg_color Green); + mk_row1 "unsat" get_unsat + @@ pb_details_stats_color ~details PB.Style.(fg_color Green); + mk_row1 "sat+unsat" (fun r -> r.sat.n + r.unsat.n) @@ pb_int_color PB.Style.(fg_color Green); mk_row ~res:"error" "errors" get_errors @@ pb_int_color PB.Style.(fg_color Cyan); @@ -61,7 +130,8 @@ let to_printbox_l ?to_link l : PB.t = @@ pb_int_color PB.Style.(fg_color Green); mk_row1 "invalid_proof" get_invalid_proof @@ pb_int_color PB.Style.(fg_color Red); - mk_row "unknown" get_unknown PB.int; + mk_row1 "unknown" get_unknown + @@ pb_details_stats_color ~details PB.Style.(fg_color White); mk_row "timeout" get_timeout PB.int; ] and r2 = @@ -70,7 +140,8 @@ let to_printbox_l ?to_link l : PB.t = |> CCList.sort_uniq ~cmp:String.compare in CCList.map - (fun tag -> mk_row ~res:tag ("tag." ^ tag) (get_custom tag) PB.int) + (fun tag -> + mk_row ~res:tag ("tag." ^ tag) (fun t -> (get_custom tag t).n) PB.int) all_custom and r3 = [ @@ -88,12 +159,23 @@ let to_printbox_l ?to_link l : PB.t = let of_db_for ~(prover : Prover.name) (db : Db.t) : t = Error.guard (Error.wrapf "reading stat(%s) from DB" prover) @@ fun () -> let custom = Prover.tags_of_db db in + Stats.attach_aggregate db; + let convert n stats = + let extract_stats stats = + String.split_on_char '|' stats |> List.map Float.of_string |> function + | [ total; mean; sd ] -> + { n = CCOpt.get_or ~default:0 n; total; mean; sd } + | _ -> assert false + in + extract_stats stats + in let get_res r = Error.guard (Error.wrapf "get-res %S" r) @@ fun () -> Logs.debug (fun k -> k "get-res %S" r); - Db.exec db {| select count(*) from prover_res where prover=? and res=?; |} + Db.exec db + {| select count(*), stats(rtime) from prover_res where prover=? and res=?; |} prover r - ~ty:Db.Ty.(p2 text text, p1 (nullable int), CCOpt.get_or ~default:0) + ~ty:Db.Ty.(p2 text text, p2 (nullable int) text, convert) ~f:Db.Cursor.get_one_exn |> Misc.unwrap_db (fun () -> spf "problems with result %s" r) in @@ -119,15 +201,15 @@ let of_db_for ~(prover : Prover.name) (db : Db.t) : t = let sat = get_res "sat" in let unsat = get_res "unsat" in let unknown = get_res "unknown" in - let timeout = get_res "timeout" in - let memory = get_res "memory" in - let errors = get_res "error" in + let timeout = (get_res "timeout").n in + let memory = (get_res "memory").n in + let errors = (get_res "error").n in let valid_proof = get_proof_res "valid" in let invalid_proof = get_proof_res "invalid" in let custom = CCList.map (fun tag -> tag, get_res tag) custom in let total = - sat + unsat + unknown + timeout + memory + errors - + List.fold_left (fun n (_, i) -> n + i) 0 custom + sat.n + unsat.n + unknown.n + timeout + memory + errors + + List.fold_left (fun acc (_, { n; _ }) -> acc + n) 0 custom in let total_time = Db.exec db @@ -164,7 +246,7 @@ let pp out (s : t) : unit = "(@[:unsat %d@ :sat %d@ :solved %d@ :errors %d@ :unknown %d@ \ :valid-proof %d@ :invalid-proof %d@ :timeout %d@ :total %d@ :total_time \ %.2f@])" - s.unsat s.sat (s.sat + s.unsat) s.errors s.unknown s.valid_proof + s.unsat.n s.sat.n (s.sat.n + s.unsat.n) s.errors s.unknown.n s.valid_proof s.invalid_proof s.timeout - (s.unsat + s.sat + s.errors + s.unknown + s.timeout) + (s.unsat.n + s.sat.n + s.errors + s.unknown.n + s.timeout) s.total_time diff --git a/src/core/Test_stat.mli b/src/core/Test_stat.mli index 821d179..907c52f 100644 --- a/src/core/Test_stat.mli +++ b/src/core/Test_stat.mli @@ -4,21 +4,25 @@ open Common open Test module PB = PrintBox +type detail_stats = { n: int; total: float; mean: float; sd: float } + type t = { - unsat: int; - sat: int; + unsat: detail_stats; + sat: detail_stats; errors: int; - unknown: int; + unknown: detail_stats; timeout: int; memory: int; valid_proof: int; invalid_proof: int; - custom: (string * int) list; + custom: (string * detail_stats) list; total: int; total_time: float; (* for sat+unsat *) } -val to_printbox_l : ?to_link:prover_string_linker -> (string * t) list -> PB.t +val to_printbox_l : + ?details:bool -> ?to_link:prover_string_linker -> (string * t) list -> PB.t + val of_db_for : prover:Prover.name -> Db.t -> t val of_db : Db.t -> (Prover.name * t) list val pp : t Fmt.printer diff --git a/tests/fake.expected b/tests/fake.expected index dc05a70..f0f5751 100644 --- a/tests/fake.expected +++ b/tests/fake.expected @@ -3,9 +3,9 @@ STAT: provers │fake2│fake1 ─────────────┼─────┼───── -sat │1 │1 +sat │1 │1 ─────────────┼─────┼───── -unsat │1 │1 +unsat │1 │1 ─────────────┼─────┼───── sat+unsat │2 │2 ─────────────┼─────┼─────