# ! pip install tweepy
# Import package
import tweepy
# my twitter API key, you are free to use! since my website doesn't have many visitors.
# Saya young
access_token = "1330365234-xDjSixFZfSeboDSkHS0WgNvOu5zZw4HeUL8ijVq"
access_token_secret = "QuhhHxIMSxVC2QhVqaxtdgZtc4pyJBWVg2C6D5IHCH9ph"
consumer_key = "JES0pDVJW2WCscy1LhFFMxz4A"
consumer_secret = "uOoW3PCx8nI0kIfsifXfCibYwaeMrHh73TrV2TyuILL9vR9Bdx"
# Pass OAuth details to tweepy's OAuth handler
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
# api = tweepy.API(auth)
# api.update_status('tweepy + oauth!')
a Tweet listener that creates a file called 'tweets.txt'
collects streaming tweets as .jsons and writes them to the file 'tweets.txt'
once 100 tweets have been streamed, the listener closes the file and stops listening.
# class use json
import json
class MyStreamListener(tweepy.StreamListener):
def __init__(self, api=None):
# inherit class attributes
super(MyStreamListener, self).__init__()
self.num_tweets = 0
self.file = open("tweets.txt", "w+")
def on_status(self, status):
tweet = status._json
self.file.write( json.dumps(tweet) + '\n' )
self.num_tweets += 1
if self.num_tweets < 1000:
return True
else:
return False
self.file.close()
def on_error(self, status):
print(status)
# Initialize Stream listener
l = MyStreamListener()
# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)
# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=['Trump stupid','Trump Hillary','Hillary stupid','Trump daughter'], async=True)
# Import package
import json
# Initialize empty list to store tweets: tweets_data
tweets_data = []
# Open connection to file
h=open('tweets.txt','r')
# Read in tweets and store in list: tweets_data
for i in h:
try:
print 'O',
tmp=json.loads(i)
tweets_data.append(tmp)
except:
print 'X',
h.close()
import pandas as pd
pd.DataFrame(tweets_data).head(1)
# Build DataFrame of tweet texts and languages
df = pd.DataFrame(tweets_data, columns=['text', 'lang'])
print df.shape
# Print head of DataFrame
df.head(3)
import re
def word_in_text(word, tweet):
word = word.lower()
text = tweet.lower()
match = re.search(word, tweet)
if match:
return True
return False
# Initialize list to store tweet counts
[Trump, stupid, girl, hillary] = [0, 0, 0, 0]
# Iterate through df, counting the number of tweets in which
# each candidate is mentioned
for index, row in df.iterrows():
Trump += word_in_text('trump', row['text'].lower())
stupid += word_in_text('stupid', row['text'].lower())
girl += word_in_text('girl', row['text'].lower())
hillary += word_in_text('hillary', row['text'].lower())
print Trump, stupid, girl, hillary
regular expression
case=False
# pd.Series.str.contains?
df['text'].str.contains('hillary',case=False).sum()
df['text'].str.contains('[Tt]rump').sum()
#override tweepy.StreamListener to add logic to on_status
class test(tweepy.StreamListener):
def __init__(self):
# inherit class attributes
super(test, self).__init__()
# tweepy.StreamListener.__init__(self)
self.num=0
def on_status(self, status):
self.num+=1
print self.num
print(status.text)
if self.num==10:
#returning False in on_data disconnects the stream
return False
def on_error(self, status):
print(status)
# Initialize Stream listener
l = test()
# Create you Stream object with authentication
stream = tweepy.Stream(auth, l)
# Filter Twitter Streams to capture data by the keywords:
stream.filter(track=['Trump stupid','Trump Hillary','Hillary','Trump daughter'], async=True)
'capitalize string'.capitalize()
# Import packages
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# Set seaborn style
sns.set(color_codes=True)
# Create a list of labels:cd
cd = ['hillary', 'trump', 'stupid', 'girl']
# Plot histogram
ax = sns.barplot(cd, [hillary, Trump, stupid, girl],alpha=.6)
ax.set(ylabel="count")
plt.show()