This repository has been archived by the owner on Nov 9, 2017. It is now read-only.
forked from git-for-windows/git
-
Notifications
You must be signed in to change notification settings - Fork 316
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
- Loading branch information
Showing
7 changed files
with
210 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -173,6 +173,7 @@ | |
/test-run-command | ||
/test-sha1 | ||
/test-sigchain | ||
/test-string-pool | ||
/test-treap | ||
/common-cmds.h | ||
*.tar.gz | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
* test-string-pool.c: code to exercise the svn importer's string pool | ||
*/ | ||
|
||
#include "git-compat-util.h" | ||
#include "vcs-svn/string_pool.h" | ||
|
||
int main(int argc, char *argv[]) | ||
{ | ||
const uint32_t unequal = pool_intern("does not equal"); | ||
const uint32_t equal = pool_intern("equals"); | ||
uint32_t buf[3]; | ||
uint32_t n; | ||
|
||
if (argc != 2) | ||
usage("test-string-pool <string>,<string>"); | ||
|
||
n = pool_tok_seq(3, buf, ",-", argv[1]); | ||
if (n >= 3) | ||
die("too many strings"); | ||
if (n <= 1) | ||
die("too few strings"); | ||
|
||
buf[2] = buf[1]; | ||
buf[1] = (buf[0] == buf[2]) ? equal : unequal; | ||
pool_print_seq(3, buf, ' ', stdout); | ||
fputc('\n', stdout); | ||
|
||
pool_reset(); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
/* | ||
* Licensed under a two-clause BSD-style license. | ||
* See LICENSE for details. | ||
*/ | ||
|
||
#include "git-compat-util.h" | ||
#include "trp.h" | ||
#include "obj_pool.h" | ||
#include "string_pool.h" | ||
|
||
static struct trp_root tree = { ~0 }; | ||
|
||
struct node { | ||
uint32_t offset; | ||
struct trp_node children; | ||
}; | ||
|
||
/* Two memory pools: one for struct node, and another for strings */ | ||
obj_pool_gen(node, struct node, 4096) | ||
obj_pool_gen(string, char, 4096) | ||
|
||
static char *node_value(struct node *node) | ||
{ | ||
return node ? string_pointer(node->offset) : NULL; | ||
} | ||
|
||
static int node_cmp(struct node *a, struct node *b) | ||
{ | ||
return strcmp(node_value(a), node_value(b)); | ||
} | ||
|
||
/* Build a Treap from the node structure (a trp_node w/ offset) */ | ||
trp_gen(static, tree_, struct node, children, node, node_cmp); | ||
|
||
const char *pool_fetch(uint32_t entry) | ||
{ | ||
return node_value(node_pointer(entry)); | ||
} | ||
|
||
uint32_t pool_intern(const char *key) | ||
{ | ||
/* Canonicalize key */ | ||
struct node *match = NULL, *node; | ||
uint32_t key_len; | ||
if (key == NULL) | ||
return ~0; | ||
key_len = strlen(key) + 1; | ||
node = node_pointer(node_alloc(1)); | ||
node->offset = string_alloc(key_len); | ||
strcpy(node_value(node), key); | ||
match = tree_search(&tree, node); | ||
if (!match) { | ||
tree_insert(&tree, node); | ||
} else { | ||
node_free(1); | ||
string_free(key_len); | ||
node = match; | ||
} | ||
return node_offset(node); | ||
} | ||
|
||
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) | ||
{ | ||
char *token = strtok_r(str, delim, saveptr); | ||
return token ? pool_intern(token) : ~0; | ||
} | ||
|
||
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) | ||
{ | ||
uint32_t i; | ||
for (i = 0; i < len && ~seq[i]; i++) { | ||
fputs(pool_fetch(seq[i]), stream); | ||
if (i < len - 1 && ~seq[i + 1]) | ||
fputc(delim, stream); | ||
} | ||
} | ||
|
||
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) | ||
{ | ||
char *context = NULL; | ||
uint32_t token = ~0; | ||
uint32_t length; | ||
|
||
if (sz == 0) | ||
return ~0; | ||
if (str) | ||
token = pool_tok_r(str, delim, &context); | ||
for (length = 0; length < sz; length++) { | ||
seq[length] = token; | ||
if (token == ~0) | ||
return length; | ||
token = pool_tok_r(NULL, delim, &context); | ||
} | ||
seq[sz - 1] = ~0; | ||
return sz; | ||
} | ||
|
||
void pool_reset(void) | ||
{ | ||
node_reset(); | ||
string_reset(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#ifndef STRING_POOL_H_ | ||
#define STRING_POOL_H_ | ||
|
||
uint32_t pool_intern(const char *key); | ||
const char *pool_fetch(uint32_t entry); | ||
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); | ||
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); | ||
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); | ||
void pool_reset(void); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
string_pool API | ||
=============== | ||
|
||
The string_pool API provides facilities for replacing strings | ||
with integer keys that can be more easily compared and stored. | ||
The facilities are designed so that one could teach Git without | ||
too much trouble to store the information needed for these keys to | ||
remain valid over multiple executions. | ||
|
||
Functions | ||
--------- | ||
|
||
pool_intern:: | ||
Include a string in the string pool and get its key. | ||
If that string is already in the pool, retrieves its | ||
existing key. | ||
|
||
pool_fetch:: | ||
Retrieve the string associated to a given key. | ||
|
||
pool_tok_r:: | ||
Extract the key of the next token from a string. | ||
Interface mimics strtok_r. | ||
|
||
pool_print_seq:: | ||
Print a sequence of strings named by key to a file, using the | ||
specified delimiter to separate them. | ||
|
||
If NULL (key ~0) appears in the sequence, the sequence ends | ||
early. | ||
|
||
pool_tok_seq:: | ||
Split a string into tokens, storing the keys of segments | ||
into a caller-provided array. | ||
|
||
Unless sz is 0, the array will always be ~0-terminated. | ||
If there is not enough room for all the tokens, the | ||
array holds as many tokens as fit in the entries before | ||
the terminating ~0. Return value is the index after the | ||
last token, or sz if the tokens did not fit. | ||
|
||
pool_reset:: | ||
Deallocate storage for the string pool. |