Giter VIP home page Giter VIP logo

twitter_analytics's Issues

Tweet analyzing error

I have applied your codes I could collect tweets data but for visualization when I applied data in Python 3.5.2 I changed the code for Python 2 to Python 3.The code is following
`'''
Author: Adil Moujahid
Description: Script for analyzing tweets to compare the popularity of 3 programming languages: Python, Javascript and ruby
Reference: http://adilmoujahid.com/posts/2014/07/twitter-analytics/
'''

import json
import pandas as pd
import matplotlib.pyplot as plt
import re

def word_in_text(word, text):
word = word.lower()
text = text.lower()
match = re.search(word, text)
if match:
return True
return False

def extract_link(text):
regex = r'https?://[^\s<>"]+|www.[^\s<>"]+'
match = re.search(regex, text)
if match:
return match.group()
return ''

def main():

#Reading Tweets
print('Reading Tweets\n')
tweets_data_path = 'd:\\twitter.txt'

tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
    try:
        tweet = json.loads(line)
        tweets_data.append(tweet)
    except:
        continue


#Structuring Tweets
print('Structuring Tweets\n')
tweets = pd.DataFrame()
tweets['text'] = map(lambda tweet: tweet['text'], tweets_data)
tweets['lang'] = map(lambda tweet: tweet['lang'], tweets_data)
tweets['country'] = map(lambda tweet: tweet['place']['country'] if tweet['place'] != None else None, tweets_data)


#Analyzing Tweets by Language
print('Analyzing tweets by language\n')
tweets_by_lang = tweets['lang'].value_counts()
fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Languages', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=10)
ax.set_title('Top 5 languages', fontsize=15, fontweight='bold')
tweets_by_lang[:5].plot(ax=ax, kind='bar', color='red')
plt.show('tweet_by_lang')


#Analyzing Tweets by Country
print('Analyzing tweets by country\n')
tweets_by_country = tweets['country'].value_counts()
fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Countries', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=10)
ax.set_title('Top 5 countries', fontsize=15, fontweight='bold')
tweets_by_country[:5].plot(ax=ax, kind='bar', color='blue')
plt.show('tweet_by_country')


#Adding programming languages columns to the tweets DataFrame
print('Adding programming languages tags to the data\n')
tweets['python'] = tweets['text'].apply(lambda tweet: word_in_text('python', tweet))
tweets['javascript'] = tweets['text'].apply(lambda tweet: word_in_text('javascript', tweet))
tweets['ruby'] = tweets['text'].apply(lambda tweet: word_in_text('ruby', tweet))


#Analyzing Tweets by programming language: First attempt
print('Analyzing tweets by programming language: First attempt\n')
prg_langs = ['python', 'javascript', 'ruby']
tweets_by_prg_lang = [tweets['python'].value_counts()[True], tweets['javascript'].value_counts()[True], tweets['ruby'].value_counts()[True]]
x_pos = list(range(len(prg_langs)))
width = 0.8
fig, ax = plt.subplots()
plt.bar(x_pos, tweets_by_prg_lang, width, alpha=1, color='g')
ax.set_ylabel('Number of tweets', fontsize=15)
ax.set_title('Ranking: python vs. javascript vs. ruby (Raw data)', fontsize=10, fontweight='bold')
ax.set_xticks([p + 0.4 * width for p in x_pos])
ax.set_xticklabels(prg_langs)
plt.grid()
plt.show('tweet_by_prg_language_1')


#Targeting relevant tweets
print('Targeting relevant tweets\n')
tweets['programming'] = tweets['text'].apply(lambda tweet: word_in_text('programming', tweet))
tweets['tutorial'] = tweets['text'].apply(lambda tweet: word_in_text('tutorial', tweet))
tweets['relevant'] = tweets['text'].apply(lambda tweet: word_in_text('programming', tweet) or word_in_text('tutorial', tweet))


#Analyzing Tweets by programming language: Second attempt
print('Analyzing tweets by programming language: First attempt\n')
tweets_by_prg_lang = [tweets[tweets['relevant'] == True]['python'].value_counts()[True], 
                  tweets[tweets['relevant'] == True]['javascript'].value_counts()[True], 
                  tweets[tweets['relevant'] == True]['ruby'].value_counts()[True]]
x_pos = list(range(len(prg_langs)))
width = 0.8
fig, ax = plt.subplots()
plt.bar(x_pos, tweets_by_prg_lang, width,alpha=1,color='g')
ax.set_ylabel('Number of tweets', fontsize=15)
ax.set_title('Ranking: python vs. javascript vs. ruby (Relevant data)', fontsize=10, fontweight='bold')
ax.set_xticks([p + 0.4 * width for p in x_pos])
ax.set_xticklabels(prg_langs)
plt.grid()
plt.show('tweet_by_prg_language_2')


#Extracting Links
tweets['link'] = tweets['text'].apply(lambda tweet: extract_link(tweet))
tweets_relevant = tweets[tweets['relevant'] == True]
tweets_relevant_with_link = tweets_relevant[tweets_relevant['link'] != '']

print('\nBelow are some Python links that we extracted\n')
print(tweets_relevant_with_link[tweets_relevant_with_link['python'] == True]['link'].head())

if name=='main':
main()

But it shows these graphs ![figure_1](https://cloud.githubusercontent.com/assets/23469906/22321042/d33498f0-e3bb-11e6-8f2c-104e3708e88b.png) ![figure_1-1](https://cloud.githubusercontent.com/assets/23469906/22321043/d3370270-e3bb-11e6-9c3a-bf72f0fc0227.png) and shows following error============== RESTART: C:\Users\User\Desktop\analyze_tweets.py ==============
Reading Tweets

Structuring Tweets

Analyzing tweets by language

Analyzing tweets by country

Adding programming languages tags to the data

Traceback (most recent call last):
File "C:\Users\User\Desktop\analyze_tweets.py", line 137, in
main()
File "C:\Users\User\Desktop\analyze_tweets.py", line 83, in main
tweets['python'] = tweets['text'].apply(lambda tweet: word_in_text('python', tweet))
File "E:\WinPython-64bit-3.5.2.3Qt5\python-3.5.2.amd64\lib\site-packages\pandas\core\series.py", line 2292, in apply
mapped = lib.map_infer(values, f, convert=convert_dtype)
File "pandas\src\inference.pyx", line 1207, in pandas.lib.map_infer (pandas\lib.c:66116)
File "C:\Users\User\Desktop\analyze_tweets.py", line 83, in
tweets['python'] = tweets['text'].apply(lambda tweet: word_in_text('python', tweet))
File "C:\Users\User\Desktop\analyze_tweets.py", line 15, in word_in_text
text = text.lower()
AttributeError: 'map' object has no attribute 'lower'

`

Error

Reading Tweets

Structuring Tweets

Traceback (most recent call last):
File "read_tweets.py", line 131, in
main()
File "read_tweets.py", line 44, in main
tweets['text'] = map(lambda tweet: tweet['text'], tweets_data)
File "read_tweets.py", line 44, in
tweets['text'] = map(lambda tweet: tweet['text'], tweets_data)
KeyError: 'text'

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.