In [35]:
import pandas as pd
In [36]:
df = pd.read_json("https://data.cityofnewyork.us/resource/batu-qkuq.json?$limit=100")
In [37]:
df = df[["boro", "cuisine_description"]]
In [38]:
foo = df.groupby("boro")
In [39]:
df.groupby("boro").cuisine_description.count()
Out[39]:
boro
BRONX 13
BROOKLYN 52
MANHATTAN 9
QUEENS 26
Name: cuisine_description, dtype: int64
In [40]:
df["cuisine_description"].unique()
Out[40]:
array(['Bakery', 'Hamburgers', 'Irish', 'American', 'Delicatessen',
'Ice Cream, Gelato, Yogurt, Ices'], dtype=object)
In [44]:
df["Stupid"] = 1
df.groupby(["boro", "cuisine_description"]).count()
Out[44]:
Stupid
boro
cuisine_description
BRONX
American
1
Bakery
12
BROOKLYN
American
6
Delicatessen
26
Hamburgers
13
Ice Cream, Gelato, Yogurt, Ices
7
MANHATTAN
Irish
9
QUEENS
American
26
In [42]:
df
Out[42]:
boro
cuisine_description
Stupid
0
BRONX
Bakery
1
1
BRONX
Bakery
1
2
BRONX
Bakery
1
3
BRONX
Bakery
1
4
BRONX
Bakery
1
5
BRONX
Bakery
1
6
BRONX
Bakery
1
7
BRONX
Bakery
1
8
BRONX
Bakery
1
9
BRONX
Bakery
1
10
BRONX
Bakery
1
11
BRONX
Bakery
1
12
BROOKLYN
Hamburgers
1
13
BROOKLYN
Hamburgers
1
14
BROOKLYN
Hamburgers
1
15
BROOKLYN
Hamburgers
1
16
BROOKLYN
Hamburgers
1
17
BROOKLYN
Hamburgers
1
18
BROOKLYN
Hamburgers
1
19
BROOKLYN
Hamburgers
1
20
BROOKLYN
Hamburgers
1
21
BROOKLYN
Hamburgers
1
22
BROOKLYN
Hamburgers
1
23
BROOKLYN
Hamburgers
1
24
BROOKLYN
Hamburgers
1
25
MANHATTAN
Irish
1
26
MANHATTAN
Irish
1
27
MANHATTAN
Irish
1
28
MANHATTAN
Irish
1
29
MANHATTAN
Irish
1
...
...
...
...
70
BROOKLYN
Delicatessen
1
71
BROOKLYN
Delicatessen
1
72
BROOKLYN
Delicatessen
1
73
BROOKLYN
Delicatessen
1
74
BROOKLYN
Delicatessen
1
75
BROOKLYN
Delicatessen
1
76
BROOKLYN
Delicatessen
1
77
BROOKLYN
Delicatessen
1
78
BROOKLYN
Delicatessen
1
79
BROOKLYN
Delicatessen
1
80
BROOKLYN
Delicatessen
1
81
BROOKLYN
Delicatessen
1
82
BROOKLYN
Delicatessen
1
83
BROOKLYN
Delicatessen
1
84
BROOKLYN
Delicatessen
1
85
BROOKLYN
Delicatessen
1
86
BROOKLYN
Delicatessen
1
87
BROOKLYN
Delicatessen
1
88
BROOKLYN
Delicatessen
1
89
BROOKLYN
Delicatessen
1
90
BROOKLYN
Delicatessen
1
91
BROOKLYN
Delicatessen
1
92
BROOKLYN
Ice Cream, Gelato, Yogurt, Ices
1
93
BROOKLYN
Ice Cream, Gelato, Yogurt, Ices
1
94
BROOKLYN
Ice Cream, Gelato, Yogurt, Ices
1
95
BROOKLYN
Ice Cream, Gelato, Yogurt, Ices
1
96
BROOKLYN
Ice Cream, Gelato, Yogurt, Ices
1
97
BROOKLYN
Ice Cream, Gelato, Yogurt, Ices
1
98
BROOKLYN
Ice Cream, Gelato, Yogurt, Ices
1
99
BRONX
American
1
100 rows × 3 columns
In [29]:
df.pivot(index="boro", columns="cuisine_description", values="Stupid")
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-29-04cff8eddb9a> in <module>()
----> 1 df.pivot(index="boro", columns="cuisine_description", values="Stupid")
C:\Users\dbackus\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values)
3825 """
3826 from pandas.core.reshape import pivot
-> 3827 return pivot(self, index=index, columns=columns, values=values)
3828
3829 def stack(self, level=-1, dropna=True):
C:\Users\dbackus\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\reshape.py in pivot(self, index, columns, values)
328 indexed = Series(self[values].values,
329 index=MultiIndex.from_arrays([index, self[columns]]))
--> 330 return indexed.unstack(columns)
331
332
C:\Users\dbackus\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\series.py in unstack(self, level, fill_value)
2059 """
2060 from pandas.core.reshape import unstack
-> 2061 return unstack(self, level, fill_value)
2062
2063 # ----------------------------------------------------------------------
C:\Users\dbackus\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\reshape.py in unstack(obj, level, fill_value)
403 else:
404 unstacker = _Unstacker(obj.values, obj.index, level=level,
--> 405 fill_value=fill_value)
406 return unstacker.get_result()
407
C:\Users\dbackus\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\reshape.py in __init__(self, values, index, level, value_columns, fill_value)
97
98 self._make_sorted_values_labels()
---> 99 self._make_selectors()
100
101 def _make_sorted_values_labels(self):
C:\Users\dbackus\AppData\Local\Continuum\Anaconda3\lib\site-packages\pandas\core\reshape.py in _make_selectors(self)
135
136 if mask.sum() < len(self.index):
--> 137 raise ValueError('Index contains duplicate entries, '
138 'cannot reshape')
139
ValueError: Index contains duplicate entries, cannot reshape
In [ ]:
Content source: DaveBackus/Data_Bootcamp
Similar notebooks: