In [1]:
#https://www.kaggle.com/mit/pantheon-project

import numpy as np
import sklearn as skl
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv("D:/kaggle/database.csv")

In [4]:
df.columns


Out[4]:
Index(['article_id', 'full_name', 'sex', 'birth_year', 'city', 'state',
       'country', 'continent', 'latitude', 'longitude', 'occupation',
       'industry', 'domain', 'article_languages', 'page_views',
       'average_views', 'historical_popularity_index'],
      dtype='object')

In [8]:
df.latitude


Out[8]:
0        40.33333
1        37.96667
2        32.50000
3        37.96667
4        40.80000
5        43.78333
6             NaN
7        41.90000
8        38.41861
9        37.75000
10       37.08333
11            NaN
12       21.41667
13       33.70000
14       48.25833
15       47.80000
16            NaN
17       52.19000
18       43.63333
19       41.90000
20       41.92670
21       52.80986
22       48.40000
23       44.41111
24       50.97611
25       43.78333
26       50.73399
27       27.53333
28       31.20000
29       51.51667
           ...   
11311   -26.31667
11312         NaN
11313   -26.20444
11314    33.76833
11315         NaN
11316    26.27056
11317    24.86000
11318    40.39528
11319    20.63972
11320    34.98000
11321   -14.46667
11322    34.78333
11323    40.40000
11324    17.98333
11325    14.61333
11326    55.75000
11327     6.66667
11328     2.03333
11329   -27.46667
11330    50.15556
11331    52.63300
11332         NaN
11333    30.25000
11334    40.26022
11335    51.91667
11336    52.41290
11337    29.16670
11338    54.90000
11339    48.14389
11340    34.15611
Name: latitude, Length: 11341, dtype: float64

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: