notebook.community

Edit and run



In [1]:

    
%matplotlib notebook
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.linear_model import LinearRegression
import matplotlib.pylab as plt
import pandas as pd
import numpy as np
from pandas.tools.plotting import scatter_matrix



In [4]:

    
import json
from pprint import pprint

df = []

with open('/Users/kershad1/Downloads/twitter-logit-geo') as data_file:
    for l in data_file:
        df.append(json.loads(l))
df = pd.DataFrame(df).drop('conf', 1)



In [17]:

    
df.head()









    Out[17]:






  
    
      
      combination
      observation_level
      result
      target
    
  
  
    
      0
      all
      196
      0.571429
      popularity_class
    
    
      1
      time
      196
      0.428571
      popularity_class
    
    
      2
      community
      196
      0.571429
      popularity_class
    
    
      3
      basic
      196
      0.714286
      popularity_class
    
    
      4
      cascades
      196
      0.714286
      popularity_class



In [19]:

    
%matplotlib inline
men = pd.pivot_table(df[(df["target"] == "user_popularity_class")], columns=["combination"], values="result", index=["observation_level"], aggfunc=np.mean)
std = pd.pivot_table(df[(df["target"] == "user_popularity_class")], columns=["combination"], values="result", index=["observation_level"], aggfunc=np.std)
men.plot(subplots=True, figsize=(20,20),yerr=std)









    Out[19]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x10ae0ad50>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x10d97d2d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x10e53c1d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x10e5a0090>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x10e6223d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x10e936b10>], dtype=object)



In [39]:

    
%matplotlib inline
men = pd.pivot_table(df[(df["target"] == "popularity_class")], columns=["combination"], values="result", index=["observation_level"], aggfunc=np.mean)
std = pd.pivot_table(df[(df["target"] == "popularity_class")], columns=["combination"], values="result", index=["observation_level"], aggfunc=np.std)
men[:50].plot(subplots=True, figsize=(20,20),yerr=std[:50])









    Out[39]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x1184b07d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1154f5c10>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x118a09e50>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x118a6da90>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x118aefdd0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x118b48490>], dtype=object)



In [21]:

    
combinations = {
    "time":["time_step_mean","time_step_cv"],
    "basic":["surface","number_activated_users","number_activations"],
    "community":["inffected_communities_normalised","activation_entorpy","activation_entorpy","usage_dominace","user_usage_dominance"],
    "exposure":["user_exposure_mean", "activateion_exposure_mean"],
    "cascades":["wiener_index_avrage","number_of_trees"],
    "all":["time_step_mean","time_step_cv","surface","number_activated_users","number_activations","inffected_communities_normalised","activation_entorpy","activation_entorpy","usage_dominace","user_usage_dominance","user_exposure_mean", "activateion_exposure_mean","wiener_index_avrage","number_of_trees"]
}



In [32]:

    
import json
from pprint import pprint

cofdf = []

with open('/Users/kershad1/Downloads/twitter-logit-geo') as data_file:
    for l in data_file:
        v = json.loads(l)
        for x, y in zip(v["conf"][0], combinations[v["combination"]]):
            z = v.copy()
            del z["result"]
            del z["conf"]
            z["messure"] = y
            z["value"] = x
            cofdf.append(z)
cofdf = pd.DataFrame(cofdf)



In [33]:

    
cofdf.head()









    Out[33]:






  
    
      
      combination
      messure
      observation_level
      target
      value
    
  
  
    
      0
      all
      time_step_mean
      196
      popularity_class
      -3.789833e-07
    
    
      1
      all
      time_step_cv
      196
      popularity_class
      7.629221e-08
    
    
      2
      all
      surface
      196
      popularity_class
      7.986281e-05
    
    
      3
      all
      number_activated_users
      196
      popularity_class
      5.856044e-06
    
    
      4
      all
      number_activations
      196
      popularity_class
      7.508608e-06



In [34]:

    
%matplotlib inline

t = cofdf[(cofdf["combination"] == "all") & (cofdf["target"] == "user_popularity_class")]
men = pd.pivot_table(t, columns=["messure"],index=["observation_level"], values=["value"], aggfunc=np.mean)
std = pd.pivot_table(t, columns=["messure"],index=["observation_level"], values=["value"], aggfunc=np.std)



In [38]:

    
men[:50].plot(subplots=True, figsize=(20,20),yerr=std[:50])









    Out[38]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x1177946d0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1177d0510>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1108b8a50>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1117d4b90>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x111851ed0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1177f3e10>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x117df9710>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x117e7d550>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11684db90>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1165d1cd0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x11635d450>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1160e0910>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x117eed350>], dtype=object)



In [ ]:

	combination	observation_level	result	target
0	all	196	0.571429	popularity_class
1	time	196	0.428571	popularity_class
2	community	196	0.571429	popularity_class
3	basic	196	0.714286	popularity_class
4	cascades	196	0.714286	popularity_class

	combination	messure	observation_level	target	value
0	all	time_step_mean	196	popularity_class	-3.789833e-07
1	all	time_step_cv	196	popularity_class	7.629221e-08
2	all	surface	196	popularity_class	7.986281e-05
3	all	number_activated_users	196	popularity_class	5.856044e-06
4	all	number_activations	196	popularity_class	7.508608e-06