In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [5]:
df_price = pd.read_csv("data/Weed_Price.csv")
In [6]:
df_price.head()
Out[6]:
State
HighQ
HighQN
MedQ
MedQN
LowQ
LowQN
date
0
Alabama
339.06
1042
198.64
933
149.49
123
2014-01-01
1
Alaska
288.75
252
260.60
297
388.58
26
2014-01-01
2
Arizona
303.31
1941
209.35
1625
189.45
222
2014-01-01
3
Arkansas
361.85
576
185.62
544
125.87
112
2014-01-01
4
California
248.78
12096
193.56
12812
192.92
778
2014-01-01
In [7]:
df_price.tail()
Out[7]:
State
HighQ
HighQN
MedQ
MedQN
LowQ
LowQN
date
22894
Virginia
364.98
3513
293.12
3079
NaN
284
2014-12-31
22895
Washington
233.05
3337
189.92
3562
NaN
160
2014-12-31
22896
West Virginia
359.35
551
224.03
545
NaN
60
2014-12-31
22897
Wisconsin
350.52
2244
272.71
2221
NaN
167
2014-12-31
22898
Wyoming
322.27
131
351.86
197
NaN
12
2014-12-31
In [8]:
df_price.describe()
Out[8]:
HighQ
HighQN
MedQ
MedQN
LowQ
LowQN
count
22899.000000
22899.000000
22899.000000
22899.000000
12342.000000
22899.000000
mean
329.759854
2274.743657
247.618306
2183.737805
203.747847
202.804489
std
41.173167
2641.936586
44.276015
2789.902626
105.480774
220.531987
min
202.020000
93.000000
144.850000
134.000000
63.700000
11.000000
25%
303.780000
597.000000
215.775000
548.000000
147.117500
51.000000
50%
342.310000
1420.000000
245.800000
1320.000000
186.760000
139.000000
75%
356.550000
2958.000000
274.155000
2673.000000
221.360000
263.000000
max
415.700000
18492.000000
379.000000
22027.000000
734.650000
1287.000000
In [9]:
50*365
Out[9]:
18250
In [10]:
df_demo = pd.read_csv("data/Demographics_State.csv")
In [11]:
df_demo.describe()
Out[11]:
total_population
percent_white
percent_black
percent_asian
percent_hispanic
per_capita_income
median_rent
median_age
count
51.000000
51.000000
51.000000
51.000000
51.000000
51.000000
51.000000
51.000000
mean
6108560.666667
70.254902
10.823529
3.725490
10.803922
28053.803922
719.490196
37.639216
std
6904016.387730
16.116877
10.867761
5.355664
9.996038
4659.378182
189.820375
2.352367
min
570134.000000
23.000000
0.000000
1.000000
1.000000
20618.000000
448.000000
29.600000
25%
1712494.500000
59.500000
3.000000
1.000000
4.500000
24908.500000
566.000000
36.300000
50%
4361333.000000
74.000000
7.000000
2.000000
8.000000
26824.000000
664.000000
37.600000
75%
6712318.500000
82.500000
14.500000
4.000000
12.500000
30144.000000
839.000000
38.950000
max
37659181.000000
94.000000
49.000000
37.000000
47.000000
45290.000000
1220.000000
43.200000
In [12]:
df_pop = pd.read_csv("data/Population_State.csv")
In [13]:
df_pop.describe
Out[13]:
<bound method DataFrame.describe of region value
0 alabama 4777326
1 alaska 711139
2 arizona 6410979
3 arkansas 2916372
4 california 37325068
5 colorado 5042853
6 connecticut 3572213
7 delaware 900131
8 district of columbia 605759
9 florida 18885152
10 georgia 9714569
11 hawaii 1362730
12 idaho 1567803
13 illinois 12823860
14 indiana 6485530
15 iowa 3047646
16 kansas 2851183
17 kentucky 4340167
18 louisiana 4529605
19 maine 1329084
20 maryland 5785496
21 massachusetts 6560595
22 michigan 9897264
23 minnesota 5313081
24 mississippi 2967620
25 missouri 5982413
26 montana 990785
27 nebraska 1827306
28 nevada 2704204
29 new hampshire 1317474
30 new jersey 8793888
31 new mexico 2055287
32 new york 19398125
33 north carolina 9544249
34 north dakota 676253
35 ohio 11533561
36 oklahoma 3749005
37 oregon 3836628
38 pennsylvania 12699589
39 rhode island 1052471
40 south carolina 4630351
41 south dakota 815871
42 tennessee 6353226
43 texas 25208897
44 utah 2766233
45 vermont 625498
46 virginia 8014955
47 washington 6738714
48 west virginia 1850481
49 wisconsin 5687219
50 wyoming 562803>
In [ ]:
Content source: kracekumar/weed
Similar notebooks: