Skip to content
This repository has been archived by the owner on Nov 9, 2017. It is now read-only.

Commit

Permalink
Add string-specific memory pool
Browse files Browse the repository at this point in the history
Intern strings so they can be compared by address and stored without
wasting space.

This library uses the macros in the obj_pool.h and trp.h to create a
memory pool for strings and expose an API for handling them.

[rr: added API docs]
[jn: with some API simplifications, new documentation and tests]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
barrbrain authored and gitster committed Aug 15, 2010
1 parent 951f316 commit 1d73b52
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@
/test-run-command
/test-sha1
/test-sigchain
/test-string-pool
/test-treap
/common-cmds.h
*.tar.gz
Expand Down
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
TEST_PROGRAMS_NEED_X += test-run-command
TEST_PROGRAMS_NEED_X += test-sha1
TEST_PROGRAMS_NEED_X += test-sigchain
TEST_PROGRAMS_NEED_X += test-string-pool
TEST_PROGRAMS_NEED_X += test-treap
TEST_PROGRAMS_NEED_X += test-index-version

Expand Down Expand Up @@ -1742,7 +1743,7 @@ ifndef NO_CURL
endif
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
xdiff/xmerge.o xdiff/xpatience.o
VCSSVN_OBJS =
VCSSVN_OBJS = vcs-svn/string_pool.o
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)

dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
Expand Down Expand Up @@ -1867,7 +1868,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h

$(VCSSVN_OBJS): \
vcs-svn/obj_pool.h vcs-svn/trp.h
vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h
endif

exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
Expand Down Expand Up @@ -2018,10 +2019,12 @@ test-delta$X: diff-delta.o patch-delta.o

test-parse-options$X: parse-options.o

test-string-pool$X: vcs-svn/lib.a

.PRECIOUS: $(TEST_OBJS)

test-%$X: test-%.o $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)

check-sha1:: test-sha1$X
./test-sha1.sh
Expand Down
16 changes: 16 additions & 0 deletions t/t0080-vcs-svn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ test_expect_success 'obj pool: high-water mark' '
test_cmp expected actual
'

test_expect_success 'string pool' '
echo a does not equal b >expected.differ &&
echo a equals a >expected.match &&
echo equals equals equals >expected.matchmore &&
test-string-pool "a,--b" >actual.differ &&
test-string-pool "a,a" >actual.match &&
test-string-pool "equals-equals" >actual.matchmore &&
test_must_fail test-string-pool a,a,a &&
test_must_fail test-string-pool a &&
test_cmp expected.differ actual.differ &&
test_cmp expected.match actual.match &&
test_cmp expected.matchmore actual.matchmore
'

test_expect_success 'treap sort' '
cat <<-\EOF >unsorted &&
68
Expand Down
31 changes: 31 additions & 0 deletions test-string-pool.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* test-string-pool.c: code to exercise the svn importer's string pool
*/

#include "git-compat-util.h"
#include "vcs-svn/string_pool.h"

int main(int argc, char *argv[])
{
const uint32_t unequal = pool_intern("does not equal");
const uint32_t equal = pool_intern("equals");
uint32_t buf[3];
uint32_t n;

if (argc != 2)
usage("test-string-pool <string>,<string>");

n = pool_tok_seq(3, buf, ",-", argv[1]);
if (n >= 3)
die("too many strings");
if (n <= 1)
die("too few strings");

buf[2] = buf[1];
buf[1] = (buf[0] == buf[2]) ? equal : unequal;
pool_print_seq(3, buf, ' ', stdout);
fputc('\n', stdout);

pool_reset();
return 0;
}
102 changes: 102 additions & 0 deletions vcs-svn/string_pool.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/

#include "git-compat-util.h"
#include "trp.h"
#include "obj_pool.h"
#include "string_pool.h"

static struct trp_root tree = { ~0 };

struct node {
uint32_t offset;
struct trp_node children;
};

/* Two memory pools: one for struct node, and another for strings */
obj_pool_gen(node, struct node, 4096)
obj_pool_gen(string, char, 4096)

static char *node_value(struct node *node)
{
return node ? string_pointer(node->offset) : NULL;
}

static int node_cmp(struct node *a, struct node *b)
{
return strcmp(node_value(a), node_value(b));
}

/* Build a Treap from the node structure (a trp_node w/ offset) */
trp_gen(static, tree_, struct node, children, node, node_cmp);

const char *pool_fetch(uint32_t entry)
{
return node_value(node_pointer(entry));
}

uint32_t pool_intern(const char *key)
{
/* Canonicalize key */
struct node *match = NULL, *node;
uint32_t key_len;
if (key == NULL)
return ~0;
key_len = strlen(key) + 1;
node = node_pointer(node_alloc(1));
node->offset = string_alloc(key_len);
strcpy(node_value(node), key);
match = tree_search(&tree, node);
if (!match) {
tree_insert(&tree, node);
} else {
node_free(1);
string_free(key_len);
node = match;
}
return node_offset(node);
}

uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
{
char *token = strtok_r(str, delim, saveptr);
return token ? pool_intern(token) : ~0;
}

void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
{
uint32_t i;
for (i = 0; i < len && ~seq[i]; i++) {
fputs(pool_fetch(seq[i]), stream);
if (i < len - 1 && ~seq[i + 1])
fputc(delim, stream);
}
}

uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
{
char *context = NULL;
uint32_t token = ~0;
uint32_t length;

if (sz == 0)
return ~0;
if (str)
token = pool_tok_r(str, delim, &context);
for (length = 0; length < sz; length++) {
seq[length] = token;
if (token == ~0)
return length;
token = pool_tok_r(NULL, delim, &context);
}
seq[sz - 1] = ~0;
return sz;
}

void pool_reset(void)
{
node_reset();
string_reset();
}
11 changes: 11 additions & 0 deletions vcs-svn/string_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef STRING_POOL_H_
#define STRING_POOL_H_

uint32_t pool_intern(const char *key);
const char *pool_fetch(uint32_t entry);
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
void pool_reset(void);

#endif
43 changes: 43 additions & 0 deletions vcs-svn/string_pool.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
string_pool API
===============

The string_pool API provides facilities for replacing strings
with integer keys that can be more easily compared and stored.
The facilities are designed so that one could teach Git without
too much trouble to store the information needed for these keys to
remain valid over multiple executions.

Functions
---------

pool_intern::
Include a string in the string pool and get its key.
If that string is already in the pool, retrieves its
existing key.

pool_fetch::
Retrieve the string associated to a given key.

pool_tok_r::
Extract the key of the next token from a string.
Interface mimics strtok_r.

pool_print_seq::
Print a sequence of strings named by key to a file, using the
specified delimiter to separate them.

If NULL (key ~0) appears in the sequence, the sequence ends
early.

pool_tok_seq::
Split a string into tokens, storing the keys of segments
into a caller-provided array.

Unless sz is 0, the array will always be ~0-terminated.
If there is not enough room for all the tokens, the
array holds as many tokens as fit in the entries before
the terminating ~0. Return value is the index after the
last token, or sz if the tokens did not fit.

pool_reset::
Deallocate storage for the string pool.

0 comments on commit 1d73b52

Please sign in to comment.