Skip to content

Commit

Permalink
[optimization] do not store empty objects in the knowledge base (Bina…
Browse files Browse the repository at this point in the history
…ryAnalysisPlatform#1411)

Improves the performance by not storing the empty objects (objects
without values) in the knowledge base. When an object is created, we
just increase the id of the last created object instead of storing it
in the heap of objects. This improves performance and memory footprint
and creating a new object doesn't require any more memory in the
knowledge base. As a side effect, the object identifiers are never
reused, therefore there's no possiblility for unwanted aliasing. It
will also make much easier to implement KB garbage collector or tree
shaker if we will even need them. The optmization gets about 10%
improvement in both memory and time consumption, and overall
performance improvement since 2.4.0 is about 50%.

The knowledge base canonical representation is changed but in a
backward compatible manner, so that the old knowledge bases should be
read correctly and will be updated, if necessary in the new format.

This performance optimization is a tradeof between expending the
object space and overall performance of BAP. At the cost of using more
object identifiers (and we have plenty of them in the 2^60 space so
that we will run out of the space, both RAM or HDD long before we will
run out of the identifiers). The story might look different in the
OCaml 32-bit word, though.
  • Loading branch information
ivg authored Jan 25, 2022
1 parent c64a77f commit a4054d2
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 101 deletions.
153 changes: 54 additions & 99 deletions lib/knowledge/bap_knowledge.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2039,23 +2039,21 @@ module Knowledge = struct
type work = Done | Work of workers

type objects = {
last : Oid.t;
vals : Record.t Oid.Map.t;
comp : work Map.M(Name).t Oid.Map.t;
syms : fullname Oid.Map.t;
heap : cell Oid.Map.t;
data : Oid.t Cell.Map.t;
objs : Oid.t String.Map.t String.Map.t;
pubs : Oid.Set.t String.Map.t;
}

let empty_class = {
last = Oid.first_atom;
vals = Map.empty (module Oid);
comp = Map.empty (module Oid);
objs = Map.empty (module String);
syms = Map.empty (module Oid);
pubs = Map.empty (module String);
heap = Map.empty (module Oid);
data = Map.empty (module Cell);
}

type t = {
Expand Down Expand Up @@ -2303,17 +2301,9 @@ module Knowledge = struct
type +'a t = 'a obj
type 'a ord = Oid.comparator_witness

let with_new_object objs f = match Map.max_elt objs.Env.vals with
| None -> f Oid.first_atom {
objs
with vals = Map.singleton (module Oid) Oid.first_atom Record.empty
}
| Some (key,_) ->
let key = Oid.next key in
f key {
objs
with vals = Map.add_exn objs.vals ~key ~data:Record.empty
}
let with_new_object objs f =
let next = Oid.next objs.Env.last in
f next {objs with Env.last = next}

let create : ('a,_) cls -> 'a obj Knowledge.t = fun cls ->
objects cls >>= fun objs ->
Expand Down Expand Up @@ -2854,72 +2844,6 @@ module Knowledge = struct
let set_package name = update @@ fun s -> {s with package = name}
end


module Data : sig
type +'a t
type 'a ord

val atom : ('a,_) cls -> 'a obj -> 'a t knowledge
val cons : ('a,_) cls -> 'a t -> 'a t -> 'a t knowledge

val case : ('a,_) cls -> 'a t ->
null:'r knowledge ->
atom:('a obj -> 'r knowledge) ->
cons:('a t -> 'a t -> 'r knowledge) -> 'r knowledge


val id : 'a obj -> Int63.t


module type S = sig
type t [@@deriving sexp]
include Base.Comparable.S with type t := t
include Binable.S with type t := t
end

val derive : ('a,_) cls -> (module S
with type t = 'a t
and type comparator_witness = 'a ord)
end = struct
type +'a t = 'a obj
type 'a ord = Oid.comparator_witness

let atom _ x = Knowledge.return x

let add_cell {Class.name} objects oid cell =
let {Env.data; heap} = objects in
let data = Map.add_exn data ~key:cell ~data:oid in
let heap = Map.add_exn heap ~key:oid ~data:cell in
update (fun s -> {
s with classes = Map.set s.classes name {
objects with data; heap
}}) >>| fun () ->
oid

let cons cls car cdr =
let cell = {car; cdr} in
objects cls >>= function {data; heap} as s ->
match Map.find data cell with
| Some id -> Knowledge.return id
| None -> match Map.max_elt heap with
| None ->
add_cell cls s Oid.first_cell cell
| Some (id,_) ->
add_cell cls s (Oid.next id) cell

let case cls x ~null ~atom ~cons =
if Oid.is_null x then null else
if Oid.is_atom x || Oid.is_number x then atom x
else objects cls >>= fun {Env.heap} ->
let cell = Map.find_exn heap x in
cons cell.car cell.cdr

let id = Object.id

module type S = Object.S
let derive = Object.derive
end

module Syntax = struct
include Knowledge.Syntax
include Knowledge.Let
Expand Down Expand Up @@ -3040,7 +2964,7 @@ module Knowledge = struct
Format.fprintf ppf "@]";

module Io = struct
type version = V1 [@@deriving bin_io]
type version = V1 | V2 [@@deriving bin_io]

module List = Base.List

Expand All @@ -3051,12 +2975,14 @@ module Knowledge = struct
comp : Name.t list;
} [@@deriving bin_io]

type objects = data list [@@deriving bin_io]
type payload = (Name.t * objects) list [@@deriving bin_io]
type v1 = data list [@@deriving bin_io]
type v2 = Oid.t * v1 [@@deriving bin_io]
type 'a objects = 'a [@@deriving bin_io]
type 'a payload = (Name.t * 'a) list [@@deriving bin_io]

type canonical = {
type 'a canonical = {
version : version;
payload : payload;
payload : 'a payload;
} [@@deriving bin_io]

let magic = "CMU:KB"
Expand Down Expand Up @@ -3129,37 +3055,66 @@ module Knowledge = struct
| None -> []
| Some works -> Map.keys works


let to_canonical {Env.classes} =
let to_canonical {Env.classes} : v2 canonical =
let payload =
Map.to_alist classes |>
List.map ~f:(fun (cid, {Env.vals; syms; comp}) ->
cid,
Map.to_alist vals |> List.filter_map ~f:(fun (oid,value) ->
List.map ~f:(fun (cid, {Env.vals; syms; comp; last}) ->
let data = Map.to_alist vals |> List.filter_map ~f:(fun (oid,value) ->
let data = serialize_record value in
let sym = Map.find syms oid in
let comp = collect_comps comp oid in
if Array.is_empty data && Option.is_none sym
then None
else Some {key=oid; sym; data; comp})) in {
else Some {key=oid; sym; data; comp}) in
cid,(last,data)) in {
version = V1;
payload;
}

let of_canonical {payload} =
let init_last : state -> state = fun state -> {
state with
classes = Map.map state.classes ~f:(fun cls -> {
cls with
last = match Map.max_elt cls.vals with
| None -> cls.last
| Some (k,_) -> Oid.next k
})
}

let of_canonical_v1 {payload} =
let init = Map.empty (module Name) in
let classes =
List.fold payload ~init ~f:(fun state (cid,objs) ->
Map.add_exn state ~key:cid
~data:(List.fold objs ~f:add_object
~init:Env.empty_class)) in
init_last {empty with classes}

let of_canonical_v2 {payload} =
let init = Map.empty (module Name) in
let classes =
List.fold payload ~init ~f:(fun state (cid,(last,objs)) ->
let init = {
Env.empty_class with last
} in
Map.add_exn state ~key:cid
~data:(List.fold objs ~f:add_object
~init)) in
{empty with classes}


let of_bigstring data =
let pos_ref = ref (check_magic data) in
let V1 = bin_read_version data ~pos_ref in
let payload = bin_read_payload data ~pos_ref in
of_canonical {version=V1; payload}
let version = bin_read_version data ~pos_ref in
match version with
| V1 -> of_canonical_v1 {
version;
payload = bin_read_payload bin_read_v1 data ~pos_ref
}
| V2 -> of_canonical_v2 {
version;
payload = bin_read_payload bin_read_v2 data ~pos_ref
}

let load path =
let fd = Unix.openfile path Unix.[O_RDONLY] 0o400 in
Expand All @@ -3177,21 +3132,21 @@ module Knowledge = struct
let blit_canonical_to_bigstring repr buf =
Bigstring.From_string.blito ~src:magic ~dst:buf ();
let pos = String.length magic in
let _p = bin_write_canonical ~pos buf repr in
let _p = bin_write_canonical bin_write_v2 ~pos buf repr in
()

let to_bigstring state =
let repr = to_canonical state in
let size = String.length magic +
bin_size_canonical repr in
bin_size_canonical bin_size_v2 repr in
let data = Bigstring.create size in
blit_canonical_to_bigstring repr data;
data

let save state path =
let repr = to_canonical state in
let size = String.length magic +
bin_size_canonical repr in
bin_size_canonical bin_size_v2 repr in
let fd = Unix.openfile path Unix.[O_RDWR; O_CREAT; O_TRUNC] 0o660 in
try
let dim = [|size |]in
Expand Down
3 changes: 1 addition & 2 deletions plugins/bil/bil_lifter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ module Brancher = struct
let goto dst = ret Theory.Effect.Sort.jump dst

let jmp _ =
KB.Object.create Theory.Program.cls >>= fun dst ->
ret Theory.Effect.Sort.jump dst
ret Theory.Effect.Sort.jump Theory.Label.null

let seq x y =
x >>= fun x ->
Expand Down

0 comments on commit a4054d2

Please sign in to comment.