YWBAT
- apply twitter's streamlistener to gather data on a continuous basis
- store data as csv format and save as file
import json # used to read twitter data
import pandas as pd
import numpy as np
import tweepy as tw
from pprint import pprint
from pymongo import MongoClient
import matplotlib.pyplot as plt
# use 'with' to open file because it automatically closes files for you
with open("") as f:
d = json.load(f)
print(d.keys())
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-1-a6c6df099b6f> in <module>
1 # use 'with' to open file because it automatically closes files for you
----> 2 with open("") as f:
3 d = json.load(f)
4 print(d.keys())
FileNotFoundError: [Errno 2] No such file or directory: ''
# create your auth
auth = tw.OAuthHandler(consumer_key=d["consumer_key"], consumer_secret=d["consumer_secret"])
# pass in key and secret to your auth handler
auth.set_access_token(key=d["access_token"], secret=d["access_token_secret"])
# create our api (the thing we use to interact with twitter) by passing in our auth
api = tw.API(auth_handler=auth)
# check connection with basic search (uncomment lines below to run)
# for res in api.search("Drake", tweet_mode='extended'):
# pprint(res._json)
# break
# Build our StreamListener
# Twitter requires you to populate it's methods
# child class of StreamListener class
class MyStreamListener(tw.StreamListener):
def on_status(self, status):
print(status.text)
# this is our listener
myStreamListener = MyStreamListener()
myStream = tw.Stream(auth = api.auth, listener=myStreamListener, tweet_mode='extended')
# myStream.filter(track=["drake", "Drake", "#carepackage", "#drake", "#Drake"])
client = MongoClient(port=27017, host='localhost')
# storing names of dbs to a list
db_names_list = client.list_database_names()
# mongo is built on json formatting so everything reads like a dictionary
music_tweets = client["music_tweets"]
# new collection/table called drake
drake = music_tweets["drake"]
# notice we don't see the drake collection yet...why?
# this is because a collection needs a document in order to get built
music_tweets.list_collection_names()
['kendrickLamar']
# edited for our mongodb child class of StreamListener class
class MyStreamListener(tw.StreamListener):
def on_status(self, status):
j = status._json
drake.insert_one(j)
print(f'inserted tweet: {j["text"]}')
pass
# rerun our myStream object to reset the memory
myStreamListener = MyStreamListener()
myStream = tw.Stream(auth = api.auth, listener=myStreamListener, tweet_mode='extended')
myStream.filter(track=["drake", "Drake", "#carepackage", "#drake", "#Drake"])
music_tweets.list_collection_names()
['drake', 'kendrickLamar']
drake.count_documents(filter={}) #filter={} -> 'select * from drake'
2630
# let's view these tweets
for res in drake.find(filter={}):
print(res)
break
# excuse the language
{'_id': ObjectId('5d44694751483225fdac9acc'), 'created_at': 'Fri Aug 02 16:48:02 +0000 2019', 'id': 1157332236661866500, 'id_str': '1157332236661866500', 'text': 'RT @KenTheRuthless: This nigga Drake has me hyped over songs Iβve heard before. πͺ I hate this nigga.', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 744747281547595776, 'id_str': '744747281547595776', 'name': 'Cash Marqtier π·π΅πΆ', 'screen_name': 'marqthemilkman', 'location': 'Sandy Springs, GA', 'url': 'http://soundcloud.com/augustmarquis', 'description': 'its like conquistador but Marquis instead πFather of Fashion. Musical artist', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 136, 'friends_count': 356, 'listed_count': 0, 'favourites_count': 5229, 'statuses_count': 1543, 'created_at': 'Mon Jun 20 04:22:43 +0000 2016', 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': '', 'profile_background_image_url_https': '', 'profile_background_tile': False, 'profile_link_color': '1DA1F2', 'profile_sidebar_border_color': 'C0DEED', 'profile_sidebar_fill_color': 'DDEEF6', 'profile_text_color': '333333', 'profile_use_background_image': True, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1118458235818790913/ze3l5TG-_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1118458235818790913/ze3l5TG-_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/744747281547595776/1545879537', 'default_profile': True, 'default_profile_image': False, 'following': None, 'follow_request_sent': None, 'notifications': None}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'retweeted_status': {'created_at': 'Thu Aug 01 21:56:49 +0000 2019', 'id': 1157047557819949057, 'id_str': '1157047557819949057', 'text': 'This nigga Drake has me hyped over songs Iβve heard before. πͺ I hate this nigga.', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 326281322, 'id_str': '326281322', 'name': 'ππ―π¦πͺ ππ₯π¦π―π±π’π’π«', 'screen_name': 'KenTheRuthless', 'location': 'Atlanta', 'url': None, 'description': 'β οΈ 1397 β οΈ', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 2089, 'friends_count': 815, 'listed_count': 6, 'favourites_count': 3087, 'statuses_count': 89349, 'created_at': 'Wed Jun 29 17:26:57 +0000 2011', 'utc_offset': None, 'time_zone': None, 'geo_enabled': True, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'profile_background_color': 'C0DEED', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': True, 'profile_link_color': 'DD2E44', 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': '210CE3', 'profile_text_color': '03FF8E', 'profile_use_background_image': True, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1147530958704402432/sNtJ2whH_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1147530958704402432/sNtJ2whH_normal.jpg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/326281322/1554696484', 'default_profile': False, 'default_profile_image': False, 'following': None, 'follow_request_sent': None, 'notifications': None}, 'geo': None, 'coordinates': None, 'place': None, 'contributors': None, 'is_quote_status': False, 'quote_count': 616, 'reply_count': 51, 'retweet_count': 10483, 'favorite_count': 30512, 'entities': {'hashtags': [], 'urls': [], 'user_mentions': [], 'symbols': []}, 'favorited': False, 'retweeted': False, 'filter_level': 'low', 'lang': 'en'}, 'is_quote_status': False, 'quote_count': 0, 'reply_count': 0, 'retweet_count': 0, 'favorite_count': 0, 'entities': {'hashtags': [], 'urls': [], 'user_mentions': [{'screen_name': 'KenTheRuthless', 'name': 'ππ―π¦πͺ ππ₯π¦π―π±π’π’π«', 'id': 326281322, 'id_str': '326281322', 'indices': [3, 18]}], 'symbols': []}, 'favorited': False, 'retweeted': False, 'filter_level': 'low', 'lang': 'en', 'timestamp_ms': '1564764482070'}
document_list = list(drake.find(filter={}))
df = pd.DataFrame(document_list) # list of jsons
df.head()
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
_id | contributors | coordinates | created_at | display_text_range | entities | extended_entities | extended_tweet | favorite_count | favorited | ... | quoted_status_permalink | reply_count | retweet_count | retweeted | retweeted_status | source | text | timestamp_ms | truncated | user | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5d44694751483225fdac9acc | None | None | Fri Aug 02 16:48:02 +0000 2019 | NaN | {'hashtags': [], 'urls': [], 'user_mentions': ... | NaN | NaN | 0 | False | ... | NaN | 0 | 0 | False | {'created_at': 'Thu Aug 01 21:56:49 +0000 2019... | <a href="http://twitter.com/download/iphone" r... | RT @KenTheRuthless: This nigga Drake has me hy... | 1564764482070 | False | {'id': 744747281547595776, 'id_str': '74474728... |
1 | 5d44694751483225fdac9acd | None | None | Fri Aug 02 16:48:02 +0000 2019 | NaN | {'hashtags': [], 'urls': [], 'user_mentions': ... | NaN | NaN | 0 | False | ... | NaN | 0 | 0 | False | {'created_at': 'Thu Aug 01 23:33:13 +0000 2019... | <a href="http://twitter.com/download/iphone" r... | RT @Jersey_Jinx: I'll also say that Drake's mu... | 1564764482017 | False | {'id': 2847895388, 'id_str': '2847895388', 'na... |
2 | 5d44694751483225fdac9ace | None | None | Fri Aug 02 16:48:02 +0000 2019 | NaN | {'hashtags': [], 'urls': [], 'user_mentions': ... | {'media': [{'id': 1157042637473325056, 'id_str... | NaN | 0 | False | ... | NaN | 0 | 0 | False | {'created_at': 'Fri Aug 02 13:46:54 +0000 2019... | <a href="http://twitter.com/download/iphone" r... | RT @willis_cj: bringing this gem back to the T... | 1564764482155 | False | {'id': 543577551, 'id_str': '543577551', 'name... |
3 | 5d44694751483225fdac9acf | None | None | Fri Aug 02 16:48:02 +0000 2019 | NaN | {'hashtags': [], 'urls': [], 'user_mentions': ... | NaN | NaN | 0 | False | ... | NaN | 0 | 0 | False | {'created_at': 'Fri Aug 02 04:18:54 +0000 2019... | <a href="http://twitter.com/download/android" ... | RT @big_business_: Drake was giving us toxic s... | 1564764482544 | False | {'id': 113987040, 'id_str': '113987040', 'name... |
4 | 5d44694751483225fdac9ad0 | None | None | Fri Aug 02 16:48:02 +0000 2019 | NaN | {'hashtags': [], 'urls': [], 'user_mentions': ... | NaN | NaN | 0 | False | ... | NaN | 0 | 0 | False | {'created_at': 'Fri Aug 02 06:07:28 +0000 2019... | <a href="http://twitter.com/download/iphone" r... | RT @zephaniiiah: this old drake hitting harder... | 1564764482555 | False | {'id': 2516793480, 'id_str': '2516793480', 'na... |
5 rows Γ 37 columns