Skip to content

Commit

Permalink
changed default to not include retweeted content
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandra DeLucia committed Oct 16, 2020
1 parent a633b87 commit 4d8a14c
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions littlebird/tweet_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self, lang: str):

# Define base tokenizer
class BaseTweetTokenizer:
def __init__(self, include_retweeted_and_quoted_content: bool = True):
def __init__(self, include_retweeted_and_quoted_content: bool):
self.include_retweeted_and_quoted_content = include_retweeted_and_quoted_content

def tokenize(self, tweet: str) -> List[str]:
Expand Down Expand Up @@ -202,7 +202,7 @@ def __init__(
lowercase: bool = True,
expand_contractions: bool = False,
remove_lone_digits: bool = True,
include_retweeted_and_quoted_content: bool = True,
include_retweeted_and_quoted_content: bool = False,
replace_usernames_with: str = " ",
replace_urls_with: str = " "
):
Expand Down Expand Up @@ -303,7 +303,7 @@ class GloVeTweetTokenizer(BaseTweetTokenizer):
Tokenizer that tokenizes like the GloVe pre-processor.
Original Ruby script here: https://nlp.stanford.edu/projects/glove/preprocess-twitter.rb
"""
def __init__(self, include_retweeted_and_quoted_content: bool = True):
def __init__(self, include_retweeted_and_quoted_content: bool = False):
# Initialize base class
super().__init__(include_retweeted_and_quoted_content)

Expand Down Expand Up @@ -389,7 +389,7 @@ class BERTweetTokenizer(BaseTweetTokenizer):
Full tokenizer here: https://github.com/VinAIResearch/BERTweet/blob/master/TweetNormalizer.py
"""
def __init__(self, include_retweeted_and_quoted_content: bool = True):
def __init__(self, include_retweeted_and_quoted_content: bool = False):
# Initialize base class
super().__init__(include_retweeted_and_quoted_content)
self.nltk_tokenizer = NLTKTweetTokenizer()
Expand Down

0 comments on commit 4d8a14c

Please sign in to comment.