Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ctags: Add quick path for looking up too long strings in the keyword table #3433

Merged
merged 1 commit into from
Mar 24, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
ctags: Add quick path for looking up too long strings in the keyword …
…table

Parser code like

vString *str = vStringNew();
while (someCondition)
{
    int c = getcFromInputFile();
    vStringPut(str, c);
    if (lookupCaseKeyword (vStringValue(str), some_lang))
    {
        do_stuff();
        vStringClear(str);
    }
}

is prone to quadratic complexity because when someCondition isn't
satisfied in the parsed file, the string str grows by one in each
iteration and in each iteration lookupCaseKeyword() has to go
through strlen(str) characters to compute the hash.

Since we know the maximum length of the strings inside the keyword
hash table, we can store this value and if the queried string is
longer than this value, we can be sure it isn't present in the hash
table and return quickly without having to compute the full hash of the
string.
  • Loading branch information
techee committed Mar 21, 2023
commit dea43baf477ab71650cdfc54fa976714def9c170
29 changes: 25 additions & 4 deletions ctags/main/keyword.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ typedef struct sHashEntry {
*/
static const unsigned int TableSize = 2039; /* prime */
static hashEntry **HashTable = NULL;
static unsigned int MaxEntryLen = 0;

/*
* FUNCTION DEFINITIONS
Expand Down Expand Up @@ -70,7 +71,8 @@ static hashEntry *getHashTableEntry (unsigned long hashedValue)
return entry;
}

static unsigned int hashValue (const char *const string, langType language)
static unsigned int hashValue (const char *const string, langType language,
unsigned int maxLen, bool *maxLenReached)
{
const signed char *p;
unsigned int h = 5381;
Expand All @@ -79,11 +81,19 @@ static unsigned int hashValue (const char *const string, langType language)

/* "djb" hash as used in g_str_hash() in glib */
for (p = (const signed char *)string; *p != '\0'; p++)
{
h = (h << 5) + h + tolower (*p);
if (p - (const signed char *)string > maxLen)
{
*maxLenReached = true;
return 0;
}
}

/* consider language as an extra "character" and add it to the hash */
h = (h << 5) + h + language;

*maxLenReached = false;
return h;
}

Expand All @@ -107,8 +117,13 @@ static hashEntry *newEntry (
*/
extern void addKeyword (const char *const string, langType language, int value)
{
const unsigned int index = hashValue (string, language) % TableSize;
bool dummy;
const unsigned int index = hashValue (string, language, 1000, &dummy) % TableSize;
hashEntry *entry = getHashTableEntry (index);
size_t len = strlen (string);

if (len > MaxEntryLen)
MaxEntryLen = len;

if (entry == NULL)
{
Expand Down Expand Up @@ -139,10 +154,16 @@ extern void addKeyword (const char *const string, langType language, int value)

static int lookupKeywordFull (const char *const string, bool caseSensitive, langType language)
{
const unsigned int index = hashValue (string, language) % TableSize;
hashEntry *entry = getHashTableEntry (index);
bool maxLenReached;
const unsigned int index = hashValue (string, language, MaxEntryLen, &maxLenReached) % TableSize;
hashEntry *entry;
int result = KEYWORD_NONE;

if (maxLenReached)
return KEYWORD_NONE;

entry = getHashTableEntry (index);

while (entry != NULL)
{
if (language == entry->language &&
Expand Down