In [2]:
## https://chrisalbon.com/python/basics/strings_to_datetime/
#Step 1:
#Write a program to download stock data by calling this URL and save it inside a pandas dataframe
#https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&apikey=8WAC2EQWT6CHVMKY&datatype=csv

## Step2:
#Filter the dataframe to contain only the rows where the open price is highest open price during that week. 
#A week is defined as Monday - Friday

Save this filtered dataframe to an output file.
import pandas as pd

df =  pd.read_csv("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&apikey=8WAC2EQWT6CHVMKY&datatype=csv")

In [3]:
df.head()


Out[3]:
timestamp open high low close volume
0 2018-02-21 92.980 93.3595 91.49 91.49 26663828
1 2018-02-20 91.475 93.0600 91.01 92.72 30107057
2 2018-02-16 92.450 93.5000 91.80 92.00 30188535
3 2018-02-15 91.210 92.7200 90.62 92.66 27407914
4 2018-02-14 88.510 90.9900 88.41 90.81 34314101

In [13]:
df.shape


Out[13]:
(100, 6)

In [14]:
df.dtypes


Out[14]:
timestamp     object
open         float64
high         float64
low          float64
close        float64
volume         int64
dtype: object

In [18]:
df['datetime'] = pd.to_datetime(df.timestamp)

In [19]:
df.head()


Out[19]:
timestamp open high low close volume datetime
0 2018-02-21 92.980 93.3595 91.49 91.49 26663828 2018-02-21
1 2018-02-20 91.475 93.0600 91.01 92.72 30107057 2018-02-20
2 2018-02-16 92.450 93.5000 91.80 92.00 30188535 2018-02-16
3 2018-02-15 91.210 92.7200 90.62 92.66 27407914 2018-02-15
4 2018-02-14 88.510 90.9900 88.41 90.81 34314101 2018-02-14

In [20]:
df.dtypes


Out[20]:
timestamp            object
open                float64
high                float64
low                 float64
close               float64
volume                int64
datetime     datetime64[ns]
dtype: object

In [22]:
df.drop('datetime',axis=1,inplace=True)

In [23]:
df.head()


Out[23]:
timestamp open high low close volume
0 2018-02-21 92.980 93.3595 91.49 91.49 26663828
1 2018-02-20 91.475 93.0600 91.01 92.72 30107057
2 2018-02-16 92.450 93.5000 91.80 92.00 30188535
3 2018-02-15 91.210 92.7200 90.62 92.66 27407914
4 2018-02-14 88.510 90.9900 88.41 90.81 34314101

In [24]:
df['timestamp'] = pd.to_datetime(df.timestamp)

In [25]:
df.head()


Out[25]:
timestamp open high low close volume
0 2018-02-21 92.980 93.3595 91.49 91.49 26663828
1 2018-02-20 91.475 93.0600 91.01 92.72 30107057
2 2018-02-16 92.450 93.5000 91.80 92.00 30188535
3 2018-02-15 91.210 92.7200 90.62 92.66 27407914
4 2018-02-14 88.510 90.9900 88.41 90.81 34314101

In [26]:
df.dtypes


Out[26]:
timestamp    datetime64[ns]
open                float64
high                float64
low                 float64
close               float64
volume                int64
dtype: object

In [29]:
col1 = df.iloc[:,0]

In [31]:
df['weekOfYear'] = df['timestamp'].dt.weekofyear

In [32]:
df.head()


Out[32]:
timestamp open high low close volume weekOfYear
0 2018-02-21 92.980 93.3595 91.49 91.49 26663828 8
1 2018-02-20 91.475 93.0600 91.01 92.72 30107057 8
2 2018-02-16 92.450 93.5000 91.80 92.00 30188535 7
3 2018-02-15 91.210 92.7200 90.62 92.66 27407914 7
4 2018-02-14 88.510 90.9900 88.41 90.81 34314101 7

In [33]:
## http://hamelg.blogspot.in/2015/11/python-for-data-analysis-part-17.html
df['year'] = df['timestamp'].dt.year
df['month'] = df['timestamp'].dt.month

In [34]:
df.head()


Out[34]:
timestamp open high low close volume weekOfYear year month
0 2018-02-21 92.980 93.3595 91.49 91.49 26663828 8 2018 2
1 2018-02-20 91.475 93.0600 91.01 92.72 30107057 8 2018 2
2 2018-02-16 92.450 93.5000 91.80 92.00 30188535 7 2018 2
3 2018-02-15 91.210 92.7200 90.62 92.66 27407914 7 2018 2
4 2018-02-14 88.510 90.9900 88.41 90.81 34314101 7 2018 2

In [46]:
dfmaxOpen = df.groupby("weekOfYear").open.max()

In [47]:
dfmaxOpen


Out[47]:
weekOfYear
1     87.66
2     88.67
3     90.14
4     93.12
5     95.14
6     90.56
7     92.45
8     92.98
39    73.94
40    75.67
41    77.59
42    78.32
43    84.37
44    84.36
45    84.77
46    83.66
47    83.83
48    84.71
49    84.42
50    85.74
51    87.12
52    85.90
Name: open, dtype: float64

In [ ]:
##  http://hamelg.blogspot.in/2015/11/python-for-data-analysis-part-18.html

In [42]:
df.describe()


Out[42]:
open high low close volume weekOfYear year month
count 100.000000 100.000000 100.000000 100.00000 1.000000e+02 100.000000 100.000000 100.000000
mean 84.702650 85.387772 84.044334 84.74660 2.433133e+07 31.210000 2017.350000 7.580000
std 5.309266 5.526110 5.078314 5.26433 1.148191e+07 20.098392 0.479372 4.621382
min 73.540000 73.970000 73.310000 73.87000 7.425503e+06 1.000000 2017.000000 1.000000
25% 82.690000 83.135000 82.180000 82.52000 1.705897e+07 6.000000 2017.000000 2.000000
50% 84.565000 85.215000 83.975000 84.62500 2.142949e+07 42.000000 2017.000000 10.000000
75% 88.545000 89.780000 87.653450 88.23500 2.737084e+07 47.000000 2018.000000 11.000000
max 95.140000 96.070000 93.720000 95.01000 7.087735e+07 52.000000 2018.000000 12.000000

In [43]:
df.dtypes


Out[43]:
timestamp     datetime64[ns]
open                 float64
high                 float64
low                  float64
close                float64
volume                 int64
weekOfYear             int64
year                   int64
month                  int64
dtype: object

In [44]:
df.describe(include=['int64'])


Out[44]:
volume weekOfYear year month
count 1.000000e+02 100.000000 100.000000 100.000000
mean 2.433133e+07 31.210000 2017.350000 7.580000
std 1.148191e+07 20.098392 0.479372 4.621382
min 7.425503e+06 1.000000 2017.000000 1.000000
25% 1.705897e+07 6.000000 2017.000000 2.000000
50% 2.142949e+07 42.000000 2017.000000 10.000000
75% 2.737084e+07 47.000000 2018.000000 11.000000
max 7.087735e+07 52.000000 2018.000000 12.000000

In [45]:
df.head()


Out[45]:
timestamp open high low close volume weekOfYear year month
0 2018-02-21 92.980 93.3595 91.49 91.49 26663828 8 2018 2
1 2018-02-20 91.475 93.0600 91.01 92.72 30107057 8 2018 2
2 2018-02-16 92.450 93.5000 91.80 92.00 30188535 7 2018 2
3 2018-02-15 91.210 92.7200 90.62 92.66 27407914 7 2018 2
4 2018-02-14 88.510 90.9900 88.41 90.81 34314101 7 2018 2

In [48]:
########################################################################################################
########################################################################################################

In [49]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')

In [50]:
drinks.head()


Out[50]:
country beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol continent
0 Afghanistan 0 0 0 0.0 Asia
1 Albania 89 132 54 4.9 Europe
2 Algeria 25 0 14 0.7 Africa
3 Andorra 245 138 312 12.4 Europe
4 Angola 217 57 45 5.9 Africa

In [51]:
drinks.beer_servings.mean()    # accross the entire dataset


Out[51]:
106.16062176165804

In [53]:
## beer_serving mean by continent
drinks.groupby('continent').beer_servings.mean()  ##  Gives for each continent, the avg beer_servings


Out[53]:
continent
Africa            61.471698
Asia              37.045455
Europe           193.777778
North America    145.434783
Oceania           89.687500
South America    175.083333
Name: beer_servings, dtype: float64

In [55]:
##  lets fileter the dataset by Africa
drinks[drinks.continent=='Africa'].beer_servings.mean()


Out[55]:
61.471698113207545

In [56]:
drinks[drinks.continent=='Europe'].beer_servings.mean()


Out[56]:
193.77777777777777

In [57]:
drinks.groupby('continent').beer_servings.max()


Out[57]:
continent
Africa           376
Asia             247
Europe           361
North America    285
Oceania          306
South America    333
Name: beer_servings, dtype: int64

In [59]:
## We can use multiple aggregate functions
drinks.groupby('continent').beer_servings.agg(['count','min','max','mean'])   
## count gives the no of records in a particular continent


Out[59]:
count min max mean
continent
Africa 53 0 376 61.471698
Asia 44 0 247 37.045455
Europe 45 0 361 193.777778
North America 23 1 285 145.434783
Oceania 16 0 306 89.687500
South America 12 93 333 175.083333

In [60]:
drinks.groupby('continent').mean()  ## mean of all numeric cols by continent


Out[60]:
beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol
continent
Africa 61.471698 16.339623 16.264151 3.007547
Asia 37.045455 60.840909 9.068182 2.170455
Europe 193.777778 132.555556 142.222222 8.617778
North America 145.434783 165.739130 24.521739 5.995652
Oceania 89.687500 58.437500 35.625000 3.381250
South America 175.083333 114.750000 62.416667 6.308333

In [61]:
%matplotlib inline

In [62]:
drinks.groupby('continent').mean().plot(kind='bar')


Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x29d08c08b70>

In [ ]: