Skip to content

Commit

Permalink
ctags: Add quick path for looking up too long strings in the keyword …
Browse files Browse the repository at this point in the history
…table

Parser code like

vString *str = vStringNew();
while (someCondition)
{
    int c = getcFromInputFile();
    vStringPut(str, c);
    if (lookupCaseKeyword (vStringValue(str), some_lang))
    {
        do_stuff();
        vStringClear(str);
    }
}

is prone to quadratic complexity because when someCondition isn't
satisfied in the parsed file, the string str grows by one in each
iteration and in each iteration lookupCaseKeyword() has to go
through strlen(str) characters to compute the hash.

Since we know the maximum length of the strings inside the keyword
hash table, we can store this value and if the queried string is
longer than this value, we can be sure it isn't present in the hash
table and return quickly without having to compute the full hash of the
string.
  • Loading branch information
techee committed Mar 21, 2023
1 parent 40d1db8 commit dea43ba
Showing 1 changed file with 25 additions and 4 deletions.
29 changes: 25 additions & 4 deletions ctags/main/keyword.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ typedef struct sHashEntry {
*/
static const unsigned int TableSize = 2039; /* prime */
static hashEntry **HashTable = NULL;
static unsigned int MaxEntryLen = 0;

/*
* FUNCTION DEFINITIONS
Expand Down Expand Up @@ -70,7 +71,8 @@ static hashEntry *getHashTableEntry (unsigned long hashedValue)
return entry;
}

static unsigned int hashValue (const char *const string, langType language)
static unsigned int hashValue (const char *const string, langType language,
unsigned int maxLen, bool *maxLenReached)
{
const signed char *p;
unsigned int h = 5381;
Expand All @@ -79,11 +81,19 @@ static unsigned int hashValue (const char *const string, langType language)

/* "djb" hash as used in g_str_hash() in glib */
for (p = (const signed char *)string; *p != '\0'; p++)
{
h = (h << 5) + h + tolower (*p);
if (p - (const signed char *)string > maxLen)
{
*maxLenReached = true;
return 0;
}
}

/* consider language as an extra "character" and add it to the hash */
h = (h << 5) + h + language;

*maxLenReached = false;
return h;
}

Expand All @@ -107,8 +117,13 @@ static hashEntry *newEntry (
*/
extern void addKeyword (const char *const string, langType language, int value)
{
const unsigned int index = hashValue (string, language) % TableSize;
bool dummy;
const unsigned int index = hashValue (string, language, 1000, &dummy) % TableSize;
hashEntry *entry = getHashTableEntry (index);
size_t len = strlen (string);

if (len > MaxEntryLen)
MaxEntryLen = len;

if (entry == NULL)
{
Expand Down Expand Up @@ -139,10 +154,16 @@ extern void addKeyword (const char *const string, langType language, int value)

static int lookupKeywordFull (const char *const string, bool caseSensitive, langType language)
{
const unsigned int index = hashValue (string, language) % TableSize;
hashEntry *entry = getHashTableEntry (index);
bool maxLenReached;
const unsigned int index = hashValue (string, language, MaxEntryLen, &maxLenReached) % TableSize;
hashEntry *entry;
int result = KEYWORD_NONE;

if (maxLenReached)
return KEYWORD_NONE;

entry = getHashTableEntry (index);

while (entry != NULL)
{
if (language == entry->language &&
Expand Down

0 comments on commit dea43ba

Please sign in to comment.