{ "cells": [ { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import json\n", "import csv\n", "import tweepy\n", "import re" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\"\"\"\n", "INPUTS:\n", " consumer_key, consumer_secret, access_token, access_token_secret: codes \n", " telling twitter that we are authorized to access this data\n", " hashtag_phrase: the combination of hashtags to search for\n", "OUTPUTS:\n", " none, simply save the tweet info to a spreadsheet\n", "\"\"\"\n", "def search_for_hashtags(consumer_key, consumer_secret, access_token, access_token_secret, hashtag_phrase):\n", " \n", " #create authentication for accessing Twitter\n", " auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n", " auth.set_access_token(access_token, access_token_secret)\n", "\n", " #initialize Tweepy API\n", " api = tweepy.API(auth)\n", " \n", " #get the name of the spreadsheet we will write to\n", " fname = '_'.join(re.findall(r\"#(\\w+)\", hashtag_phrase))\n", "\n", " #open the spreadsheet we will write to\n", " with open('%s.csv' % (fname), 'wb') as file:\n", "\n", " w = csv.writer(file)\n", "\n", " #write header row to spreadsheet\n", " w.writerow(['timestamp', 'tweet_text', 'username', 'all_hashtags', 'followers_count'])\n", "\n", " #for each tweet matching our hashtags, write relevant info to the spreadsheet\n", " for tweet in tweepy.Cursor(api.search, q=hashtag_phrase+' -filter:retweets', \\\n", " lang=\"en\", tweet_mode='extended').items(100):\n", " w.writerow([tweet.created_at, tweet.full_text.replace('\\n',' ').encode('utf-8'), tweet.user.screen_name.encode('utf-8'), [e['text'] for e in tweet._json['entities']['hashtags']], tweet.user.followers_count])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "consumer_key = raw_input('Consumer Key ')\n", "consumer_secret = raw_input('Consumer Secret ')\n", "access_token = raw_input('Access Token ')\n", "access_token_secret = raw_input('Access Token Secret ')\n", " \n", "hashtag_phrase = raw_input('Hashtag Phrase ')\n", "\n", "if __name__ == '__main__':\n", " search_for_hashtags(consumer_key, consumer_secret, access_token, access_token_secret, hashtag_phrase)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }