Skip to content

Commit

Permalink
Merge pull request #73 from Halbaroth/statistics
Browse files Browse the repository at this point in the history
Add detailed statistics for the subcommand `show`
  • Loading branch information
c-cube authored Oct 26, 2023
2 parents 05664d1 + e452148 commit febf75b
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 43 deletions.
11 changes: 11 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
root = true

[*]
indent_style = space
indent_size = 2
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

[Makefile]
indent_style = tab
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ jobs:
#- windows-latest # depext issue for gnuplot/jemalloc?
#- macos-latest # slow
ocaml-compiler:
- 4.08.x
- 4.12.x
- 5.0.x
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.*.swp
.*.swo
_opam
_build
*.native
*.byte
Expand Down
2 changes: 1 addition & 1 deletion benchpress-server.opam
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ depends: [
"tiny_httpd" { >= "0.12" & < "1.0" }
"printbox" { >= "0.6" }
"printbox-text" { >= "0.6" }
"ocaml" {>= "4.08" }
"ocaml" {>= "4.12" }
"jemalloc" { >= "0.2" & < "0.3" }
]
homepage: "https://github.com/sneeuwballen/benchpress/"
Expand Down
2 changes: 1 addition & 1 deletion benchpress-worker.opam
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build: [
["dune" "build" "@doc" "-p" name] {with-doc}
]
depends: [
"ocaml" {>= "4.08" }
"ocaml" {>= "4.12" }
"dune" { >= "1.11" }
"containers" { >= "3.0" & < "4.0" }
"benchpress" { = version }
Expand Down
2 changes: 1 addition & 1 deletion benchpress.opam
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ depends: [
"sqlite3_utils" { >= "0.4" & < "0.6" }
"printbox" { >= "0.6" }
"printbox-text" { >= "0.6" }
"ocaml" {>= "4.08" }
"ocaml" {>= "4.12" }
]
homepage: "https://github.com/sneeuwballen/benchpress/"
dev-repo: "git+https://github.com/sneeuwballen/benchpress.git"
Expand Down
5 changes: 3 additions & 2 deletions src/bin/Show.ml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
(* run tests, or compare results *)

(* TODO: only load full results if needed *)
let main ?(check = true) ?(bad = true) ?csv ?summary (file : string) : unit =
let main ?(check = true) ?(bad = true) ~details ?csv ?summary (file : string) :
unit =
Logs.debug (fun k -> k "loading file %S..." file);
let res = Bin_utils.load_file file in
Logs.debug (fun k -> k "loaded file %S" file);
Bin_utils.dump_csv ~csv res;
Bin_utils.dump_summary ~summary res;
Bin_utils.printbox_results res;
Bin_utils.printbox_results ~details res;
if bad && not (Test_top_result.is_ok res) then
Format.printf "@[<2>bad: %a@]@." Test_top_result.pp_bad res;
if check then Bin_utils.check_res Notify.nil res;
Expand Down
13 changes: 9 additions & 4 deletions src/bin/benchpress_bin.ml
Original file line number Diff line number Diff line change
Expand Up @@ -382,16 +382,21 @@ module Show = struct
value
& opt (some string) None
& info [ "summary" ] ~doc:"write summary in FILE")
and debug = Logs_cli.level () in
let aux check bad csv summary no_color debug file : bool =
and debug = Logs_cli.level ()
and details =
Arg.(value & flag & info [ "details" ] ~doc:"show more details")
in
let aux check bad csv summary no_color debug details file : bool =
catch_err @@ fun () ->
Misc.setup_logs debug;
if no_color then CCFormat.set_color_default false;
Show.main ~check ~bad ?csv ?summary file
Show.main ~check ~bad ~details ?csv ?summary file
in
let doc = "show benchmark results (see `list-files`)" in
Cmd.v (Cmd.info ~doc "show")
Term.(const aux $ check $ bad $ csv $ summary $ no_color $ debug $ file)
Term.(
const aux $ check $ bad $ csv $ summary $ no_color $ debug $ details
$ file)
end

(** {2 plot results} *)
Expand Down
10 changes: 5 additions & 5 deletions src/core/Bin_utils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ let check_res ?(no_failure = false) notify (results : Test_top_result.t) : unit
let a = Test_top_result.analyze results in
check_res_an ~no_failure notify a

let printbox_stat st : unit =
let box_st = Test_stat.to_printbox_l st in
let printbox_stat ~details st : unit =
let box_st = Test_stat.to_printbox_l ~details st in
Printf.printf "STAT:\n%s\n%!" (PrintBox_text.to_string box_st);
()

Expand All @@ -103,13 +103,13 @@ let printbox_analyze a =
()

let printbox_compact_results (results : Test_compact_result.t) : unit =
printbox_stat results.cr_stat;
printbox_stat ~details:false results.cr_stat;
printbox_analyze results.cr_analyze;
()

let printbox_results (results : Test_top_result.t) : unit =
let printbox_results ~details (results : Test_top_result.t) : unit =
(let st = Test_top_result.stat results in
printbox_stat st);
printbox_stat ~details st);
(let a = Test_top_result.analyze results in
printbox_analyze a);
()
Expand Down
124 changes: 103 additions & 21 deletions src/core/Test_stat.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,65 @@ open Common
open Test
module PB = PrintBox

(** Aggregate function computing the mean and the standard deviation
of data series using the Welford's algorithm. *)
module Stats = struct
type acc = { n: int; total: float; mean: float * float; s: float * float }

let prev = fst
let curr = snd
let init = { n = 0; total = 0.; mean = 0., 0.; s = 0., 0. }

let step ({ n; total; mean; s } as acc) x =
let x =
match x with
| Sqlite3.Data.FLOAT f -> f
| _ -> assert false
in
let n = n + 1 in
let total = total +. x in
if n = 1 then
{ acc with n; total; mean = x, x }
else (
let new_mean = prev mean +. ((x -. prev mean) /. Float.of_int n) in
let new_s = prev s +. ((x -. prev mean) *. (x -. new_mean)) in
{ n; total; mean = curr mean, new_mean; s = curr s, new_s }
)

let total { total; _ } = total

let mean { n; mean = _, mean; _ } =
if n = 0 then
0.
else
mean

let sd { n; s = _, s; _ } =
if n <= 1 then
0.
else
Float.(sqrt (s /. of_int (n - 1)))

let final acc =
let res = Fmt.asprintf "%f|%f|%f" (total acc) (mean acc) (sd acc) in
Sqlite3.Data.TEXT res

let attach_aggregate db =
Sqlite3.Aggregate.create_fun1 db ~init ~step ~final "stats"
end

type detail_stats = { n: int; total: float; mean: float; sd: float }

type t = {
unsat: int;
sat: int;
unsat: detail_stats;
sat: detail_stats;
errors: int;
unknown: int;
unknown: detail_stats;
timeout: int;
memory: int;
valid_proof: int;
invalid_proof: int;
custom: (string * int) list;
custom: (string * detail_stats) list;
total: int;
total_time: float; (* for sat+unsat *)
}
Expand All @@ -24,11 +73,28 @@ let get_timeout r = r.timeout
let get_valid_proof r = r.valid_proof
let get_invalid_proof r = r.invalid_proof
let get_memory r = r.memory
let get_custom s r = try List.assoc s r.custom with Not_found -> 0

let get_custom s r =
try List.assoc s r.custom
with Not_found -> { n = 0; total = 0.; mean = 0.; sd = 0. }

let get_total r = r.total
let get_total_time r = r.total_time

let to_printbox_l ?to_link l : PB.t =
let pb_details_stats_color ~details c { n; total; mean; sd } =
let open PB in
if n = 0 || not details then
align ~h:`Left ~v:`Center (int n)
else
v_record
[
"number", pb_int_color c n;
"total", text @@ Misc.human_duration total;
"mean", text @@ Misc.human_duration mean;
"deviation", text @@ Misc.human_duration sd;
]

let to_printbox_l ?(details = false) ?to_link l : PB.t =
let mk_row ?res lbl get_r mk_box : PB.t list =
match to_link with
| Some f ->
Expand All @@ -47,21 +113,25 @@ let to_printbox_l ?to_link l : PB.t =
l
| _ -> PB.text lbl :: List.map (fun (_p, r) -> mk_box (get_r r)) l
and mk_row1 lbl get_r mk_box =
PB.text lbl :: List.map (fun (_, r) -> mk_box @@ get_r r) l
PB.(align ~h:`Left ~v:`Center (text lbl))
:: List.map (fun (_, r) -> mk_box @@ get_r r) l
in
let r1 =
[
mk_row "sat" get_sat @@ pb_int_color PB.Style.(fg_color Green);
mk_row "unsat" get_unsat @@ pb_int_color PB.Style.(fg_color Green);
mk_row1 "sat+unsat" (fun r -> r.sat + r.unsat)
mk_row1 "sat" get_sat
@@ pb_details_stats_color ~details PB.Style.(fg_color Green);
mk_row1 "unsat" get_unsat
@@ pb_details_stats_color ~details PB.Style.(fg_color Green);
mk_row1 "sat+unsat" (fun r -> r.sat.n + r.unsat.n)
@@ pb_int_color PB.Style.(fg_color Green);
mk_row ~res:"error" "errors" get_errors
@@ pb_int_color PB.Style.(fg_color Cyan);
mk_row1 "valid_proof" get_valid_proof
@@ pb_int_color PB.Style.(fg_color Green);
mk_row1 "invalid_proof" get_invalid_proof
@@ pb_int_color PB.Style.(fg_color Red);
mk_row "unknown" get_unknown PB.int;
mk_row1 "unknown" get_unknown
@@ pb_details_stats_color ~details PB.Style.(fg_color White);
mk_row "timeout" get_timeout PB.int;
]
and r2 =
Expand All @@ -70,7 +140,8 @@ let to_printbox_l ?to_link l : PB.t =
|> CCList.sort_uniq ~cmp:String.compare
in
CCList.map
(fun tag -> mk_row ~res:tag ("tag." ^ tag) (get_custom tag) PB.int)
(fun tag ->
mk_row ~res:tag ("tag." ^ tag) (fun t -> (get_custom tag t).n) PB.int)
all_custom
and r3 =
[
Expand All @@ -88,12 +159,23 @@ let to_printbox_l ?to_link l : PB.t =
let of_db_for ~(prover : Prover.name) (db : Db.t) : t =
Error.guard (Error.wrapf "reading stat(%s) from DB" prover) @@ fun () ->
let custom = Prover.tags_of_db db in
Stats.attach_aggregate db;
let convert n stats =
let extract_stats stats =
String.split_on_char '|' stats |> List.map Float.of_string |> function
| [ total; mean; sd ] ->
{ n = CCOpt.get_or ~default:0 n; total; mean; sd }
| _ -> assert false
in
extract_stats stats
in
let get_res r =
Error.guard (Error.wrapf "get-res %S" r) @@ fun () ->
Logs.debug (fun k -> k "get-res %S" r);
Db.exec db {| select count(*) from prover_res where prover=? and res=?; |}
Db.exec db
{| select count(*), stats(rtime) from prover_res where prover=? and res=?; |}
prover r
~ty:Db.Ty.(p2 text text, p1 (nullable int), CCOpt.get_or ~default:0)
~ty:Db.Ty.(p2 text text, p2 (nullable int) text, convert)
~f:Db.Cursor.get_one_exn
|> Misc.unwrap_db (fun () -> spf "problems with result %s" r)
in
Expand All @@ -119,15 +201,15 @@ let of_db_for ~(prover : Prover.name) (db : Db.t) : t =
let sat = get_res "sat" in
let unsat = get_res "unsat" in
let unknown = get_res "unknown" in
let timeout = get_res "timeout" in
let memory = get_res "memory" in
let errors = get_res "error" in
let timeout = (get_res "timeout").n in
let memory = (get_res "memory").n in
let errors = (get_res "error").n in
let valid_proof = get_proof_res "valid" in
let invalid_proof = get_proof_res "invalid" in
let custom = CCList.map (fun tag -> tag, get_res tag) custom in
let total =
sat + unsat + unknown + timeout + memory + errors
+ List.fold_left (fun n (_, i) -> n + i) 0 custom
sat.n + unsat.n + unknown.n + timeout + memory + errors
+ List.fold_left (fun acc (_, { n; _ }) -> acc + n) 0 custom
in
let total_time =
Db.exec db
Expand Down Expand Up @@ -164,7 +246,7 @@ let pp out (s : t) : unit =
"(@[<hv>:unsat %d@ :sat %d@ :solved %d@ :errors %d@ :unknown %d@ \
:valid-proof %d@ :invalid-proof %d@ :timeout %d@ :total %d@ :total_time \
%.2f@])"
s.unsat s.sat (s.sat + s.unsat) s.errors s.unknown s.valid_proof
s.unsat.n s.sat.n (s.sat.n + s.unsat.n) s.errors s.unknown.n s.valid_proof
s.invalid_proof s.timeout
(s.unsat + s.sat + s.errors + s.unknown + s.timeout)
(s.unsat.n + s.sat.n + s.errors + s.unknown.n + s.timeout)
s.total_time
14 changes: 9 additions & 5 deletions src/core/Test_stat.mli
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,25 @@ open Common
open Test
module PB = PrintBox

type detail_stats = { n: int; total: float; mean: float; sd: float }

type t = {
unsat: int;
sat: int;
unsat: detail_stats;
sat: detail_stats;
errors: int;
unknown: int;
unknown: detail_stats;
timeout: int;
memory: int;
valid_proof: int;
invalid_proof: int;
custom: (string * int) list;
custom: (string * detail_stats) list;
total: int;
total_time: float; (* for sat+unsat *)
}

val to_printbox_l : ?to_link:prover_string_linker -> (string * t) list -> PB.t
val to_printbox_l :
?details:bool -> ?to_link:prover_string_linker -> (string * t) list -> PB.t

val of_db_for : prover:Prover.name -> Db.t -> t
val of_db : Db.t -> (Prover.name * t) list
val pp : t Fmt.printer
4 changes: 2 additions & 2 deletions tests/fake.expected
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
STAT:
provers │fake2│fake1
─────────────┼─────┼─────
sat │11
sat │11
─────────────┼─────┼─────
unsat │11
unsat │11
─────────────┼─────┼─────
sat+unsat │2 │2
─────────────┼─────┼─────
Expand Down

0 comments on commit febf75b

Please sign in to comment.