notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [2]:

    
import os, re
import pandas as pd
import numpy as np



In [3]:

    
wdir = "./weather/"
hash_date = {}
for fn in os.listdir(wdir):
    if re.search(".csv", fn):
        wfn = wdir+fn
        stat_date = fn.split(".")[0]
        stat = stat_date.split("-")[0]
        mdate = "-".join(stat_date.split("-")[1:])
        
        # check 是否是有 date 
        if mdate in hash_date:
            1
        else:
            hash_date[mdate] = {}
        
        # check 是否是有 stat 
        if stat in hash_date[mdate]:
            1
        else:
            hash_date[mdate][stat] = pd.read_csv(wfn, encoding="UTF8")



In [4]:

    
print ", ".join(hash_date.keys())









    



2014-09, 2014-08, 2014-05, 2014-04, 2014-07, 2014-06, 2014-01, 2014-03, 2014-02, 2013-04, 2013-08, 2013-09, 2015-10, 2015-11, 2013-05, 2013-06, 2013-07, 2013-01, 2013-02, 2013-03, 2015-12, 2016-10, 2016-11, 2016-12, 2015-06, 2015-07, 2015-04, 2015-05, 2015-02, 2015-03, 2015-01, 2014-12, 2014-10, 2014-11, 2015-08, 2015-09, 2013-12, 2013-11, 2013-10, 2016-07, 2016-06, 2016-05, 2016-04, 2016-03, 2016-02, 2016-01, 2016-00, 2016-09, 2016-08



In [5]:

    
no_stat = [ '467620', '467300', '467550', '466950', 'C0S730','C0H9C0' ]

df_weather = pd.DataFrame(columns=('date', 'avg_max_temp', 'avg_min_temp', 'avg_rain', 'max_wind'))
idx = 0

for month in hash_date.keys()[:]:
    for dd in hash_date[month][ hash_date[month].keys()[0] ].index:
        
        max_wind = 0
        avg_max_temp = []
        avg_min_temp = []
        avg_rain = []
        for stat in hash_date[month].keys()[:]:
            
            if stat in no_stat:
                continue 
                
            mpd = hash_date[month][stat]
            if dd in mpd.index:
                rows = [ x if not x == u'0.0' else 0. for x in mpd.loc[dd]]
                
                if rows[17] > max_wind:
                    max_wind = rows[17]
                    if max_wind>10:
                        print stat, max_wind
                avg_max_temp.append( rows[9])
                avg_min_temp.append( rows[11])
                avg_rain.append( rows[22])
        df_weather.loc[idx] = ["%s-%s"%(month, dd), np.average(avg_max_temp), np.average(avg_min_temp), np.average(avg_rain), max_wind]
        idx = idx + 1









    



/opt/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:20: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal






    



467050 10.3
467050 10.3
467770 10.1
467050 10.6
467050 12.6
467050 10.7
467050 10.1
467050 11.4
467590 11.6
467050 11.0
467050 11.7
467050 11.5
467050 10.2
467050 11.9
467770 10.1
467050 11.0
467050 10.3
467050 10.1
467050 10.3
466910 10.8
467050 11.5
467050 10.4
467050 10.1
467050 10.3
467050 11.0
467050 11.5
467050 10.3
467050 10.2
467050 10.1
467050 11.2
467050 11.0
467050 10.5
467050 11.7
467050 11.6
467770 11.1
467050 11.0
467770 11.2
467050 11.5
467350 11.1
467060 16.3
467770 10.2
C0M710 10.1
467060 13.2
467050 14.6
467770 14.7
467050 10.6
C0D560 10.1
467050 10.2
467660 11.7
C0X310 12.0
467050 11.3
467050 10.3
467050 10.2
467050 11.0
467050 11.2
467050 11.2
C0K291 10.1
467050 10.9
467770 11.9
467050 10.2



In [6]:

    
df_weather['avg_max_temp'].plot()









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f992bfe7ed0>



In [7]:

    
df_weather.to_csv("testing_weather.csv")



In [8]:

    
df_weather = pd.read_csv("testing_weather.csv")



In [9]:

    
ax = df_weather[['avg_max_temp', 'avg_min_temp']].plot()
ax.set_xticklabels(df_weather.date)









    Out[9]:





[<matplotlib.text.Text at 0x7f9928fc5650>,
 <matplotlib.text.Text at 0x7f9928f6de50>,
 <matplotlib.text.Text at 0x7f9928f67110>,
 <matplotlib.text.Text at 0x7f9928ef4a90>,
 <matplotlib.text.Text at 0x7f9928f06210>,
 <matplotlib.text.Text at 0x7f9928f06950>,
 <matplotlib.text.Text at 0x7f9928f0e0d0>,
 <matplotlib.text.Text at 0x7f9928f0e810>]



In [10]:

    
df_weather['avg_max_temp'].describe()









    Out[10]:





count    1312.000000
mean       23.253370
std         4.979950
min         6.702500
25%        19.318690
50%        24.093119
75%        27.524567
max        31.835833
Name: avg_max_temp, dtype: float64



In [11]:

    
df_weather['avg_min_temp'].describe()









    Out[11]:





count    1312.000000
mean       16.604408
std         4.490012
min         3.461345
25%        12.635548
50%        17.508903
75%        20.493597
max        23.673333
Name: avg_min_temp, dtype: float64



In [12]:

    
df_weather['avg_rain'].describe()









    Out[12]:





count    1312.000000
mean        6.025007
std        13.125653
min         0.000000
25%         0.393332
50%         1.760844
75%         6.229387
max       172.504348
Name: avg_rain, dtype: float64



In [13]:

    
df_weather['max_wind'].describe()









    Out[13]:





count    1312.000000
mean        5.865930
std         2.250408
min         2.100000
25%         4.000000
50%         5.500000
75%         7.500000
max        16.300000
Name: max_wind, dtype: float64



In [14]:

    
def label_max_temp(atemp):
    low_bound = df_weather['avg_max_temp'].describe()['25%']
    high_bound = df_weather['avg_max_temp'].describe()['75%']
    
    if atemp < low_bound:
        return "L"
    if atemp >= low_bound and atemp < high_bound:
        return "M"
    if atemp > high_bound:
        return "H"



In [15]:

    
def label_min_temp(atemp):
    low_bound = df_weather['avg_min_temp'].describe()['25%']
    high_bound = df_weather['avg_min_temp'].describe()['75%']
    
    if atemp < low_bound:
        return "L"
    if atemp >= low_bound and atemp < high_bound:
        return "M"
    if atemp > high_bound:
        return "H"



In [16]:

    
def label_avg_rain(atemp):
    low_bound = df_weather['avg_rain'].describe()['25%']
    high_bound = df_weather['avg_rain'].describe()['75%']
    
    if atemp < low_bound:
        return "L"
    if atemp >= low_bound and atemp < high_bound:
        return "M"
    if atemp > high_bound:
        return "H"



In [17]:

    
def label_max_wind(atemp):
    low_bound = df_weather['max_wind'].describe()['25%']
    high_bound = df_weather['max_wind'].describe()['75%']
    
    if atemp < low_bound:
        return "L"
    if atemp >= low_bound and atemp < high_bound:
        return "M"
    if atemp > high_bound:
        return "H"



In [18]:

    
df_weather['lbl_max_temp'] = [label_max_temp(x) for x in df_weather['avg_max_temp']]



In [19]:

    
df_weather['lbl_min_temp'] = [label_min_temp(x) for x in df_weather['avg_min_temp']]



In [20]:

    
df_weather['lbl_avg_rain'] = [label_avg_rain(x) for x in df_weather['avg_rain']]



In [21]:

    
df_weather['lbl_max_wind'] = [label_max_wind(x) for x in df_weather['max_wind']]



In [22]:

    
df_weather['max_wind'].describe()









    Out[22]:





count    1312.000000
mean        5.865930
std         2.250408
min         2.100000
25%         4.000000
50%         5.500000
75%         7.500000
max        16.300000
Name: max_wind, dtype: float64



In [23]:

    
df_weather['lbl_max_wind'].value_counts()









    Out[23]:





M    653
L    327
H    319
Name: lbl_max_wind, dtype: int64



In [24]:

    
df_weather.sort_values(['date'])









    Out[24]:






  
    
      
      Unnamed: 0
      date
      avg_max_temp
      avg_min_temp
      avg_rain
      max_wind
      lbl_max_temp
      lbl_min_temp
      lbl_avg_rain
      lbl_max_wind
    
  
  
    
      517
      517
      2013-01-0
      11.743158
      7.882105
      3.627368
      3.8
      L
      L
      M
      L
    
    
      518
      518
      2013-01-1
      15.709474
      10.197895
      0.774737
      5.5
      L
      L
      M
      M
    
    
      527
      527
      2013-01-10
      15.695789
      9.425263
      2.901053
      6.2
      L
      L
      M
      M
    
    
      528
      528
      2013-01-11
      14.658947
      11.333684
      10.854737
      8.3
      L
      L
      H
      H
    
    
      529
      529
      2013-01-12
      14.546316
      10.003158
      11.464211
      6.6
      L
      L
      H
      M
    
    
      530
      530
      2013-01-13
      13.172632
      8.042105
      0.397895
      8.3
      L
      L
      M
      H
    
    
      531
      531
      2013-01-14
      16.256842
      7.766316
      0.005263
      5.2
      L
      L
      L
      M
    
    
      532
      532
      2013-01-15
      16.474737
      8.344211
      2.792632
      5.0
      L
      L
      M
      M
    
    
      533
      533
      2013-01-16
      12.858947
      7.944211
      1.929474
      8.7
      L
      L
      M
      H
    
    
      534
      534
      2013-01-17
      11.618947
      7.433684
      0.176842
      7.9
      L
      L
      L
      H
    
    
      535
      535
      2013-01-18
      15.727368
      7.592632
      0.138947
      5.8
      L
      L
      L
      M
    
    
      536
      536
      2013-01-19
      17.133684
      9.269474
      2.295789
      4.1
      L
      L
      M
      M
    
    
      519
      519
      2013-01-2
      13.447368
      10.011579
      9.847368
      9.0
      L
      L
      H
      H
    
    
      537
      537
      2013-01-20
      18.596842
      10.765263
      1.484211
      3.4
      L
      L
      M
      L
    
    
      538
      538
      2013-01-21
      17.998947
      11.635789
      0.366316
      5.5
      L
      L
      L
      M
    
    
      539
      539
      2013-01-22
      15.248421
      11.616842
      1.245263
      7.3
      L
      L
      M
      M
    
    
      540
      540
      2013-01-23
      16.349474
      10.982105
      0.385263
      5.4
      L
      L
      L
      M
    
    
      541
      541
      2013-01-24
      13.811579
      10.100000
      1.717895
      8.7
      L
      L
      M
      H
    
    
      542
      542
      2013-01-25
      14.873684
      9.676842
      1.147368
      6.0
      L
      L
      M
      M
    
    
      543
      543
      2013-01-26
      13.496842
      9.575789
      1.889474
      7.8
      L
      L
      M
      H
    
    
      544
      544
      2013-01-27
      14.854737
      8.892632
      0.281053
      6.3
      L
      L
      L
      M
    
    
      545
      545
      2013-01-28
      16.565263
      8.896842
      0.042105
      3.7
      L
      L
      L
      L
    
    
      546
      546
      2013-01-29
      17.130526
      9.069474
      0.226316
      4.9
      L
      L
      L
      M
    
    
      520
      520
      2013-01-3
      14.110526
      9.501053
      5.434737
      7.2
      L
      L
      M
      M
    
    
      547
      547
      2013-01-30
      18.287368
      9.113684
      0.000000
      4.3
      L
      L
      L
      M
    
    
      521
      521
      2013-01-4
      14.681053
      10.364211
      4.203158
      6.9
      L
      L
      M
      M
    
    
      522
      522
      2013-01-5
      16.835789
      11.356842
      2.687368
      6.0
      L
      L
      M
      M
    
    
      523
      523
      2013-01-6
      16.027368
      11.806316
      1.596842
      7.0
      L
      L
      M
      M
    
    
      524
      524
      2013-01-7
      14.078947
      11.498947
      7.029474
      6.3
      L
      L
      H
      M
    
    
      525
      525
      2013-01-8
      12.255789
      9.985263
      7.702105
      8.3
      L
      L
      H
      H
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1108
      1108
      2016-07-13
      30.635000
      23.353333
      1.634167
      7.3
      H
      H
      M
      M
    
    
      1109
      1109
      2016-07-14
      30.307500
      23.150833
      0.095833
      8.9
      H
      H
      L
      H
    
    
      1110
      1110
      2016-07-15
      30.099167
      22.730000
      0.600000
      8.5
      H
      H
      M
      H
    
    
      1111
      1111
      2016-07-16
      30.790000
      23.463333
      2.955833
      5.1
      H
      H
      M
      M
    
    
      1112
      1112
      2016-07-17
      30.262500
      22.252500
      10.691667
      4.2
      H
      H
      H
      M
    
    
      1113
      1113
      2016-07-18
      29.883333
      21.750000
      7.045833
      5.8
      H
      H
      H
      M
    
    
      1114
      1114
      2016-07-19
      29.780833
      21.763333
      1.669167
      6.9
      H
      H
      M
      M
    
    
      1097
      1097
      2016-07-2
      30.057500
      21.993333
      8.795833
      6.1
      H
      H
      H
      M
    
    
      1115
      1115
      2016-07-20
      29.335000
      21.722500
      0.889167
      6.2
      H
      H
      M
      M
    
    
      1116
      1116
      2016-07-21
      31.013333
      23.029167
      1.050000
      5.3
      H
      H
      M
      M
    
    
      1117
      1117
      2016-07-22
      31.088333
      22.627500
      0.191667
      4.1
      H
      H
      L
      M
    
    
      1118
      1118
      2016-07-23
      30.424167
      22.732500
      0.270833
      2.9
      H
      H
      L
      L
    
    
      1119
      1119
      2016-07-24
      29.905833
      22.707500
      0.729167
      3.9
      H
      H
      M
      L
    
    
      1120
      1120
      2016-07-25
      30.083333
      22.594167
      0.508333
      3.1
      H
      H
      M
      L
    
    
      1121
      1121
      2016-07-26
      31.258333
      22.956667
      0.583333
      6.4
      H
      H
      M
      M
    
    
      1122
      1122
      2016-07-27
      31.710000
      22.810000
      1.312500
      5.3
      H
      H
      M
      M
    
    
      1123
      1123
      2016-07-28
      31.499167
      22.705000
      3.995833
      3.0
      H
      H
      M
      L
    
    
      1124
      1124
      2016-07-29
      30.570000
      22.405000
      2.965833
      4.4
      H
      H
      M
      M
    
    
      1098
      1098
      2016-07-3
      30.404167
      22.035000
      4.891667
      4.8
      H
      H
      M
      M
    
    
      1125
      1125
      2016-07-30
      29.334167
      21.688333
      5.228333
      7.3
      H
      H
      M
      M
    
    
      1099
      1099
      2016-07-4
      30.235000
      22.248333
      0.186667
      5.3
      H
      H
      L
      M
    
    
      1100
      1100
      2016-07-5
      31.521667
      23.673333
      3.024167
      4.6
      H
      H
      M
      M
    
    
      1101
      1101
      2016-07-6
      29.758333
      22.907500
      4.900000
      7.8
      H
      H
      M
      H
    
    
      1102
      1102
      2016-07-7
      25.826667
      21.942500
      73.234167
      12.0
      M
      H
      H
      H
    
    
      1103
      1103
      2016-07-8
      28.195000
      22.473333
      41.610833
      7.0
      H
      H
      H
      M
    
    
      1104
      1104
      2016-07-9
      28.353333
      22.444167
      27.856667
      5.6
      H
      H
      H
      M
    
    
      1308
      1308
      2016-08-0
      28.482500
      22.454167
      22.309167
      7.0
      H
      H
      H
      M
    
    
      1309
      1309
      2016-08-1
      30.759167
      22.625000
      9.970833
      5.1
      H
      H
      H
      M
    
    
      1310
      1310
      2016-08-2
      30.290833
      22.627500
      1.808333
      3.9
      H
      H
      M
      L
    
    
      1311
      1311
      2016-08-3
      30.984167
      23.248333
      0.645833
      3.9
      H
      H
      M
      L
    
  

1312 rows × 10 columns



In [ ]:

	Unnamed: 0	date	avg_max_temp	avg_min_temp	avg_rain	max_wind	lbl_max_temp	lbl_min_temp	lbl_avg_rain	lbl_max_wind
517	517	2013-01-0	11.743158	7.882105	3.627368	3.8	L	L	M	L
518	518	2013-01-1	15.709474	10.197895	0.774737	5.5	L	L	M	M
527	527	2013-01-10	15.695789	9.425263	2.901053	6.2	L	L	M	M
528	528	2013-01-11	14.658947	11.333684	10.854737	8.3	L	L	H	H
529	529	2013-01-12	14.546316	10.003158	11.464211	6.6	L	L	H	M
530	530	2013-01-13	13.172632	8.042105	0.397895	8.3	L	L	M	H
531	531	2013-01-14	16.256842	7.766316	0.005263	5.2	L	L	L	M
532	532	2013-01-15	16.474737	8.344211	2.792632	5.0	L	L	M	M
533	533	2013-01-16	12.858947	7.944211	1.929474	8.7	L	L	M	H
534	534	2013-01-17	11.618947	7.433684	0.176842	7.9	L	L	L	H
535	535	2013-01-18	15.727368	7.592632	0.138947	5.8	L	L	L	M
536	536	2013-01-19	17.133684	9.269474	2.295789	4.1	L	L	M	M
519	519	2013-01-2	13.447368	10.011579	9.847368	9.0	L	L	H	H
537	537	2013-01-20	18.596842	10.765263	1.484211	3.4	L	L	M	L
538	538	2013-01-21	17.998947	11.635789	0.366316	5.5	L	L	L	M
539	539	2013-01-22	15.248421	11.616842	1.245263	7.3	L	L	M	M
540	540	2013-01-23	16.349474	10.982105	0.385263	5.4	L	L	L	M
541	541	2013-01-24	13.811579	10.100000	1.717895	8.7	L	L	M	H
542	542	2013-01-25	14.873684	9.676842	1.147368	6.0	L	L	M	M
543	543	2013-01-26	13.496842	9.575789	1.889474	7.8	L	L	M	H
544	544	2013-01-27	14.854737	8.892632	0.281053	6.3	L	L	L	M
545	545	2013-01-28	16.565263	8.896842	0.042105	3.7	L	L	L	L
546	546	2013-01-29	17.130526	9.069474	0.226316	4.9	L	L	L	M
520	520	2013-01-3	14.110526	9.501053	5.434737	7.2	L	L	M	M
547	547	2013-01-30	18.287368	9.113684	0.000000	4.3	L	L	L	M
521	521	2013-01-4	14.681053	10.364211	4.203158	6.9	L	L	M	M
522	522	2013-01-5	16.835789	11.356842	2.687368	6.0	L	L	M	M
523	523	2013-01-6	16.027368	11.806316	1.596842	7.0	L	L	M	M
524	524	2013-01-7	14.078947	11.498947	7.029474	6.3	L	L	H	M
525	525	2013-01-8	12.255789	9.985263	7.702105	8.3	L	L	H	H
...	...	...	...	...	...	...	...	...	...	...
1108	1108	2016-07-13	30.635000	23.353333	1.634167	7.3	H	H	M	M
1109	1109	2016-07-14	30.307500	23.150833	0.095833	8.9	H	H	L	H
1110	1110	2016-07-15	30.099167	22.730000	0.600000	8.5	H	H	M	H
1111	1111	2016-07-16	30.790000	23.463333	2.955833	5.1	H	H	M	M
1112	1112	2016-07-17	30.262500	22.252500	10.691667	4.2	H	H	H	M
1113	1113	2016-07-18	29.883333	21.750000	7.045833	5.8	H	H	H	M
1114	1114	2016-07-19	29.780833	21.763333	1.669167	6.9	H	H	M	M
1097	1097	2016-07-2	30.057500	21.993333	8.795833	6.1	H	H	H	M
1115	1115	2016-07-20	29.335000	21.722500	0.889167	6.2	H	H	M	M
1116	1116	2016-07-21	31.013333	23.029167	1.050000	5.3	H	H	M	M
1117	1117	2016-07-22	31.088333	22.627500	0.191667	4.1	H	H	L	M
1118	1118	2016-07-23	30.424167	22.732500	0.270833	2.9	H	H	L	L
1119	1119	2016-07-24	29.905833	22.707500	0.729167	3.9	H	H	M	L
1120	1120	2016-07-25	30.083333	22.594167	0.508333	3.1	H	H	M	L
1121	1121	2016-07-26	31.258333	22.956667	0.583333	6.4	H	H	M	M
1122	1122	2016-07-27	31.710000	22.810000	1.312500	5.3	H	H	M	M
1123	1123	2016-07-28	31.499167	22.705000	3.995833	3.0	H	H	M	L
1124	1124	2016-07-29	30.570000	22.405000	2.965833	4.4	H	H	M	M
1098	1098	2016-07-3	30.404167	22.035000	4.891667	4.8	H	H	M	M
1125	1125	2016-07-30	29.334167	21.688333	5.228333	7.3	H	H	M	M
1099	1099	2016-07-4	30.235000	22.248333	0.186667	5.3	H	H	L	M
1100	1100	2016-07-5	31.521667	23.673333	3.024167	4.6	H	H	M	M
1101	1101	2016-07-6	29.758333	22.907500	4.900000	7.8	H	H	M	H
1102	1102	2016-07-7	25.826667	21.942500	73.234167	12.0	M	H	H	H
1103	1103	2016-07-8	28.195000	22.473333	41.610833	7.0	H	H	H	M
1104	1104	2016-07-9	28.353333	22.444167	27.856667	5.6	H	H	H	M
1308	1308	2016-08-0	28.482500	22.454167	22.309167	7.0	H	H	H	M
1309	1309	2016-08-1	30.759167	22.625000	9.970833	5.1	H	H	H	M
1310	1310	2016-08-2	30.290833	22.627500	1.808333	3.9	H	H	M	L
1311	1311	2016-08-3	30.984167	23.248333	0.645833	3.9	H	H	M	L