Skip to content

Commit

Permalink
added support for RLP stopwords
Browse files Browse the repository at this point in the history
git-svn-id: http://sphinxsearch.googlecode.com/svn/trunk@4437 8b96e2b9-35c5-2c16-bc47-5122d61876d4
  • Loading branch information
glook committed Dec 24, 2013
1 parent ec99c30 commit 9497356
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions src/sphinx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2935,6 +2935,8 @@ class CSphRLPTokenizer : public CSphTokenFilter

BYTE * GetNextTokenRLP()
{
static const char * RPL_SPECIAL_STOPWORD = "rlpspecialstopword";

if ( !m_pTokenIterator )
return NULL;

Expand All @@ -2945,8 +2947,13 @@ class CSphRLPTokenizer : public CSphTokenFilter
else
{
const BT_Char16 * pToken = BT_RLP_TokenIterator_GetCompoundComponent ( m_pTokenIterator, m_iNextCompoundComponent++ );
assert ( pToken );
bt_xutf16toutf8 ( (char*)m_dUTF8Buffer, pToken, sizeof(m_dUTF8Buffer) );
if ( BT_RLP_TokenIterator_IsStopword ( m_pTokenIterator ) )
strncpy ( (char*)m_dUTF8Buffer, RPL_SPECIAL_STOPWORD, MAX_TOKEN_LEN );
else
{
assert ( pToken );
bt_xutf16toutf8 ( (char*)m_dUTF8Buffer, pToken, sizeof(m_dUTF8Buffer) );
}

return &(m_dUTF8Buffer[0]);
}
Expand All @@ -2968,8 +2975,13 @@ class CSphRLPTokenizer : public CSphTokenFilter
} else
pToken = BT_RLP_TokenIterator_GetToken ( m_pTokenIterator );

assert ( pToken );
bt_xutf16toutf8 ( (char*)m_dUTF8Buffer, pToken, sizeof(m_dUTF8Buffer) );
if ( BT_RLP_TokenIterator_IsStopword ( m_pTokenIterator ) )
strncpy ( (char*)m_dUTF8Buffer, RPL_SPECIAL_STOPWORD, MAX_TOKEN_LEN );
else
{
assert ( pToken );
bt_xutf16toutf8 ( (char*)m_dUTF8Buffer, pToken, sizeof(m_dUTF8Buffer) );
}

return &(m_dUTF8Buffer[0]);
}
Expand Down

0 comments on commit 9497356

Please sign in to comment.