notebook.community

Edit and run



In [1]:

    
import pandas as pd
import json,csv,re,os,sys,glob
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:

    
import numpy as np



In [55]:

    
times=[]
content=[]
topics=[]

for f in glob.glob('../data/2014-06/Data*json')[0:50]:
    for line in open(f,'r').read().decode('utf-8').split('\n'):
        tweet=json.loads(line)
        
        try:
            times.append(tweet['interaction']['created_at'])
        except:
            times.append('NaN')
            
        try:
            content.append(tweet['interaction']['content'].encode('utf-8'))
        except:
            content.append('NaN')
            
        try:
            topics.append(tweet['interaction']['tag_tree']['topic'][0])
        except:
            topics.append('NaN')



In [5]:

    
outFile=csv.writer(open('times_file.csv','w'),delimiter='\t')
for a,b,c in zip(times,content,topics):
    outFile.writerow([a,b,c])



In [6]:

    
df=pd.read_csv('times_file.csv',parse_dates=True,header=False,index_col=0,delimiter='\t',names=['content','topics'])



In [91]:

    
series=pd.Series(data=topics,index=times)
#df=pd.DataFrame(data={'topic':topics},index=pd.to_datetime(times))



In [7]:

    
df.head()









    Out[7]:






  
    
      
      content
      topics
    
  
  
    
      2014-06-08 00:28:35
       7 Reasons America Should Succeed On Climate Ch...
                 General
    
    
      2014-06-08 02:03:23
       9 Political #Cartoons That Put #ClimateChange ...
                     NaN
    
    
      2014-06-08 00:11:24
       RT @CFAware: There is a concern that people wi...
       PoliticsBehaviour
    
    
      2014-06-08 05:20:05
       RT @washingtonpost: This map from 1893 explain...
                 General
    
    
      2014-06-08 01:50:59
       RT @RealBobbyHooper: Who wants to drink, smoke...
                  Energy



In [41]:

    
for a,b in df.groupby('topics'):
    b.resample('D',how='count')['content'].plot(label=a,legend=True,figsize=(20,10),logy=False)
#    print type(b)



In [13]:

    
b.resample('D',how='count')['content']
type(b.resample('D',how='count')['content'])









    Out[13]:





pandas.core.series.Series



In [17]:

    
b.resample('D',how='count')['content']+b.resample('D',how='count')['content']
#print '----------'
#b.resample('D',how='count')['content']









    Out[17]:





2014-06-05    14
2014-06-06     0
2014-06-07     0
2014-06-08    16
2014-06-09     0
2014-06-10     0
2014-06-11    22
2014-06-12     0
2014-06-13     0
2014-06-14     2
2014-06-15     0
2014-06-16     0
2014-06-17    12
Freq: D, Name: content, dtype: int64



In [39]:

    
bSeries=b.resample('D',how='count')['content']
#bSeries.combine(bSeries,func=lambda x,y:x+y,fill_value=0)
bSeries.add(bSeries)
#bSeries









    Out[39]:





2014-06-05    14
2014-06-06     0
2014-06-07     0
2014-06-08    16
2014-06-09     0
2014-06-10     0
2014-06-11    22
2014-06-12     0
2014-06-13     0
2014-06-14     2
2014-06-15     0
2014-06-16     0
2014-06-17    12
Freq: D, Name: content, dtype: int64



In [25]:

    
?bSeries.combine



In [44]:

    
a=df.groupby('topics')



In [53]:

    
pd.set_option('display.max_columns',1000)
pd.set_option('display.width',1000)
pd.set_option('display.max_colwidth',800)



In [52]:

    
?pd.set_option



In [54]:

    
a.describe()









    Out[54]:






  
    
      
      
      content
    
    
      topics
      
      
    
  
  
    
      AgricultureForesty
      count
                                                                                                                                                                  28
    
    
      unique
                                                                                                                                                                  28
    
    
      top
                     RT @InWinC: great discount http://t.co/usxfoRIfMs #gifts #giveaway #sundayfunday #environment #eco #birthdays #CHRISTMAS http://t.co/CCHbowf6Zw
    
    
      freq
                                                                                                                                                                   1
    
    
      Arctic
      count
                                                                                                                                                                  35
    
    
      unique
                                                                                                                                                                  34
    
    
      top
              RT @NASA_EO: Ponds predict Arctic sea-ice melt http://t.co/P2iCJWo12w http://t.co/a5LicRSe9w Background: http://t.co/Xwn4ssRNaH http://t.co/4frU2DmpE6
    
    
      freq
                                                                                                                                                                   2
    
    
      Carbon
      count
                                                                                                                                                                 266
    
    
      unique
                                                                                                                                                                 232
    
    
      top
                          RT @richardbranson: How can we reduce carbon footprint? Renewable jet fuel commercialisation http://t.co/bU5BSvTjvv http://t.co/fAoWuVvzGU
    
    
      freq
                                                                                                                                                                   8
    
    
      Disaster
      count
                                                                                                                                                                  43
    
    
      unique
                                                                                                                                                                  42
    
    
      top
                 RT @Omniclimate: Afghanistan-Flood Kills More Than 100 In Afghanistan #auspol #climateimpacts #climate #flood #Afghanistan - http://t.co/SneSJjaRxz
    
    
      freq
                                                                                                                                                                   2
    
    
      EconomicsBusiness
      count
                                                                                                                                                                  65
    
    
      unique
                                                                                                                                                                  58
    
    
      top
                                             RT @iMariaJohnsen: Avoid Glorifying Sycophant People in the Work Environment http://t.co/OoGg5GcDoK #HR #work #business
    
    
      freq
                                                                                                                                                                   5
    
    
      Energy
      count
                                                                                                                                                                  55
    
    
      unique
                                                                                                                                                                  52
    
    
      top
                     RT @NASA: Our next launch takes on CO2. #OCO2 briefing 6/12 2pET. http://t.co/ANi9i2W5Uz http://t.co/I7JZEJZHfG #climate http://t.co/SnE6Qsu4ea
    
    
      freq
                                                                                                                                                                   3
    
    
      General
      count
                                                                                                                                                                2066
    
    
      unique
                                                                                                                                                                1770
    
    
      top
                                                                                           RT @Fascinatingpics: Please Stop Global Warming... http://t.co/BGfWKv9tI8
    
    
      freq
                                                                                                                                                                  58
    
    
      Oceans/Water
      count
                                                                                                                                                                  21
    
    
      unique
                                                                                                                                                                  21
    
    
      top
                                                                                                         Micro-Climate\nDowntown SF : Sunny \nOcean Beach SF : Foggy
    
    
      freq
                                                                                                                                                                   1
    
    
      PoliticsBehaviour
      count
                                                                                                                                                                1192
    
    
      unique
                                                                                                                                                                1024
    
    
      top
                                   RT @DalaiLama: We need to consider how our actions affect wildlife and the environment, and how they are likely to affect others.
    
    
      freq
                                                                                                                                                                  52
    
    
      Risk
      count
                                                                                                                                                                 226
    
    
      unique
                                                                                                                                                                 206
    
    
      top
       RT @davidplouffe: As economy heals, wars end, bold climate steps are taken and millions have health care, grateful to all those who helped elect @BarackObama
    
    
      freq
                                                                                                                                                                   7
    
    
      Weather
      count
                                                                                                                                                                  33
    
    
      unique
                                                                                                                                                                  33
    
    
      top
                        @wattsupwiththat @bushido02 was a climate victim 2day - dethatched my lawn in 87 degrees with 73% humidity - melted into a lump on the floor
    
    
      freq
                                                                                                                                                                   1



In [1]:

    
from IPython.core.display import HTML
styles = open("../css/custom.css", "r").read()
HTML(styles)









    Out[1]:



In [ ]:

	content	topics
2014-06-08 00:28:35	7 Reasons America Should Succeed On Climate Ch...	General
2014-06-08 02:03:23	9 Political #Cartoons That Put #ClimateChange ...	NaN
2014-06-08 00:11:24	RT @CFAware: There is a concern that people wi...	PoliticsBehaviour
2014-06-08 05:20:05	RT @washingtonpost: This map from 1893 explain...	General
2014-06-08 01:50:59	RT @RealBobbyHooper: Who wants to drink, smoke...	Energy

		content
topics
AgricultureForesty	count	28
	unique	28
	top	RT @InWinC: great discount http://t.co/usxfoRIfMs #gifts #giveaway #sundayfunday #environment #eco #birthdays #CHRISTMAS http://t.co/CCHbowf6Zw
	freq	1
Arctic	count	35
	unique	34
	top	RT @NASA_EO: Ponds predict Arctic sea-ice melt http://t.co/P2iCJWo12w http://t.co/a5LicRSe9w Background: http://t.co/Xwn4ssRNaH http://t.co/4frU2DmpE6
	freq	2
Carbon	count	266
	unique	232
	top	RT @richardbranson: How can we reduce carbon footprint? Renewable jet fuel commercialisation http://t.co/bU5BSvTjvv http://t.co/fAoWuVvzGU
	freq	8
Disaster	count	43
	unique	42
	top	RT @Omniclimate: Afghanistan-Flood Kills More Than 100 In Afghanistan #auspol #climateimpacts #climate #flood #Afghanistan - http://t.co/SneSJjaRxz
	freq	2
EconomicsBusiness	count	65
	unique	58
	top	RT @iMariaJohnsen: Avoid Glorifying Sycophant People in the Work Environment http://t.co/OoGg5GcDoK #HR #work #business
	freq	5
Energy	count	55
	unique	52
	top	RT @NASA: Our next launch takes on CO2. #OCO2 briefing 6/12 2pET. http://t.co/ANi9i2W5Uz http://t.co/I7JZEJZHfG #climate http://t.co/SnE6Qsu4ea
	freq	3
General	count	2066
	unique	1770
	top	RT @Fascinatingpics: Please Stop Global Warming... http://t.co/BGfWKv9tI8
	freq	58
Oceans/Water	count	21
	unique	21
	top	Micro-Climate\nDowntown SF : Sunny \nOcean Beach SF : Foggy
	freq	1
PoliticsBehaviour	count	1192
	unique	1024
	top	RT @DalaiLama: We need to consider how our actions affect wildlife and the environment, and how they are likely to affect others.
	freq	52
Risk	count	226
	unique	206
	top	RT @davidplouffe: As economy heals, wars end, bold climate steps are taken and millions have health care, grateful to all those who helped elect @BarackObama
	freq	7
Weather	count	33
	unique	33
	top	@wattsupwiththat @bushido02 was a climate victim 2day - dethatched my lawn in 87 degrees with 73% humidity - melted into a lump on the floor
	freq	1