notebook.community

Edit and run



In [2]:

    
## https://chrisalbon.com/python/basics/strings_to_datetime/
#Step 1:
#Write a program to download stock data by calling this URL and save it inside a pandas dataframe
#https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&apikey=8WAC2EQWT6CHVMKY&datatype=csv

## Step2:
#Filter the dataframe to contain only the rows where the open price is highest open price during that week. 
#A week is defined as Monday - Friday

Save this filtered dataframe to an output file.
import pandas as pd

df =  pd.read_csv("https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&apikey=8WAC2EQWT6CHVMKY&datatype=csv")



In [3]:

    
df.head()



In [13]:

    
df.shape









    Out[13]:





(100, 6)



In [14]:

    
df.dtypes









    Out[14]:





timestamp     object
open         float64
high         float64
low          float64
close        float64
volume         int64
dtype: object



In [18]:

    
df['datetime'] = pd.to_datetime(df.timestamp)



In [19]:

    
df.head()



In [20]:

    
df.dtypes









    Out[20]:





timestamp            object
open                float64
high                float64
low                 float64
close               float64
volume                int64
datetime     datetime64[ns]
dtype: object



In [22]:

    
df.drop('datetime',axis=1,inplace=True)



In [23]:

    
df.head()



In [24]:

    
df['timestamp'] = pd.to_datetime(df.timestamp)



In [25]:

    
df.head()



In [26]:

    
df.dtypes









    Out[26]:





timestamp    datetime64[ns]
open                float64
high                float64
low                 float64
close               float64
volume                int64
dtype: object



In [29]:

    
col1 = df.iloc[:,0]



In [31]:

    
df['weekOfYear'] = df['timestamp'].dt.weekofyear



In [32]:

    
df.head()



In [33]:

    
## http://hamelg.blogspot.in/2015/11/python-for-data-analysis-part-17.html
df['year'] = df['timestamp'].dt.year
df['month'] = df['timestamp'].dt.month



In [34]:

    
df.head()



In [46]:

    
dfmaxOpen = df.groupby("weekOfYear").open.max()



In [47]:

    
dfmaxOpen









    Out[47]:





weekOfYear
1     87.66
2     88.67
3     90.14
4     93.12
5     95.14
6     90.56
7     92.45
8     92.98
39    73.94
40    75.67
41    77.59
42    78.32
43    84.37
44    84.36
45    84.77
46    83.66
47    83.83
48    84.71
49    84.42
50    85.74
51    87.12
52    85.90
Name: open, dtype: float64



In [ ]:

    
##  http://hamelg.blogspot.in/2015/11/python-for-data-analysis-part-18.html



In [42]:

    
df.describe()









    Out[42]:







  
    
      
      open
      high
      low
      close
      volume
      weekOfYear
      year
      month
    
  
  
    
      count
      100.000000
      100.000000
      100.000000
      100.00000
      1.000000e+02
      100.000000
      100.000000
      100.000000
    
    
      mean
      84.702650
      85.387772
      84.044334
      84.74660
      2.433133e+07
      31.210000
      2017.350000
      7.580000
    
    
      std
      5.309266
      5.526110
      5.078314
      5.26433
      1.148191e+07
      20.098392
      0.479372
      4.621382
    
    
      min
      73.540000
      73.970000
      73.310000
      73.87000
      7.425503e+06
      1.000000
      2017.000000
      1.000000
    
    
      25%
      82.690000
      83.135000
      82.180000
      82.52000
      1.705897e+07
      6.000000
      2017.000000
      2.000000
    
    
      50%
      84.565000
      85.215000
      83.975000
      84.62500
      2.142949e+07
      42.000000
      2017.000000
      10.000000
    
    
      75%
      88.545000
      89.780000
      87.653450
      88.23500
      2.737084e+07
      47.000000
      2018.000000
      11.000000
    
    
      max
      95.140000
      96.070000
      93.720000
      95.01000
      7.087735e+07
      52.000000
      2018.000000
      12.000000



In [43]:

    
df.dtypes









    Out[43]:





timestamp     datetime64[ns]
open                 float64
high                 float64
low                  float64
close                float64
volume                 int64
weekOfYear             int64
year                   int64
month                  int64
dtype: object



In [44]:

    
df.describe(include=['int64'])









    Out[44]:







  
    
      
      volume
      weekOfYear
      year
      month
    
  
  
    
      count
      1.000000e+02
      100.000000
      100.000000
      100.000000
    
    
      mean
      2.433133e+07
      31.210000
      2017.350000
      7.580000
    
    
      std
      1.148191e+07
      20.098392
      0.479372
      4.621382
    
    
      min
      7.425503e+06
      1.000000
      2017.000000
      1.000000
    
    
      25%
      1.705897e+07
      6.000000
      2017.000000
      2.000000
    
    
      50%
      2.142949e+07
      42.000000
      2017.000000
      10.000000
    
    
      75%
      2.737084e+07
      47.000000
      2018.000000
      11.000000
    
    
      max
      7.087735e+07
      52.000000
      2018.000000
      12.000000



In [45]:

    
df.head()



In [48]:

    
########################################################################################################
########################################################################################################



In [49]:

    
drinks = pd.read_csv('http://bit.ly/drinksbycountry')



In [50]:

    
drinks.head()









    Out[50]:







  
    
      
      country
      beer_servings
      spirit_servings
      wine_servings
      total_litres_of_pure_alcohol
      continent
    
  
  
    
      0
      Afghanistan
      0
      0
      0
      0.0
      Asia
    
    
      1
      Albania
      89
      132
      54
      4.9
      Europe
    
    
      2
      Algeria
      25
      0
      14
      0.7
      Africa
    
    
      3
      Andorra
      245
      138
      312
      12.4
      Europe
    
    
      4
      Angola
      217
      57
      45
      5.9
      Africa



In [51]:

    
drinks.beer_servings.mean()    # accross the entire dataset









    Out[51]:





106.16062176165804



In [53]:

    
## beer_serving mean by continent
drinks.groupby('continent').beer_servings.mean()  ##  Gives for each continent, the avg beer_servings









    Out[53]:





continent
Africa            61.471698
Asia              37.045455
Europe           193.777778
North America    145.434783
Oceania           89.687500
South America    175.083333
Name: beer_servings, dtype: float64



In [55]:

    
##  lets fileter the dataset by Africa
drinks[drinks.continent=='Africa'].beer_servings.mean()









    Out[55]:





61.471698113207545



In [56]:

    
drinks[drinks.continent=='Europe'].beer_servings.mean()









    Out[56]:





193.77777777777777



In [57]:

    
drinks.groupby('continent').beer_servings.max()









    Out[57]:





continent
Africa           376
Asia             247
Europe           361
North America    285
Oceania          306
South America    333
Name: beer_servings, dtype: int64



In [59]:

    
## We can use multiple aggregate functions
drinks.groupby('continent').beer_servings.agg(['count','min','max','mean'])   
## count gives the no of records in a particular continent









    Out[59]:







  
    
      
      count
      min
      max
      mean
    
    
      continent
      
      
      
      
    
  
  
    
      Africa
      53
      0
      376
      61.471698
    
    
      Asia
      44
      0
      247
      37.045455
    
    
      Europe
      45
      0
      361
      193.777778
    
    
      North America
      23
      1
      285
      145.434783
    
    
      Oceania
      16
      0
      306
      89.687500
    
    
      South America
      12
      93
      333
      175.083333



In [60]:

    
drinks.groupby('continent').mean()  ## mean of all numeric cols by continent









    Out[60]:







  
    
      
      beer_servings
      spirit_servings
      wine_servings
      total_litres_of_pure_alcohol
    
    
      continent
      
      
      
      
    
  
  
    
      Africa
      61.471698
      16.339623
      16.264151
      3.007547
    
    
      Asia
      37.045455
      60.840909
      9.068182
      2.170455
    
    
      Europe
      193.777778
      132.555556
      142.222222
      8.617778
    
    
      North America
      145.434783
      165.739130
      24.521739
      5.995652
    
    
      Oceania
      89.687500
      58.437500
      35.625000
      3.381250
    
    
      South America
      175.083333
      114.750000
      62.416667
      6.308333



In [61]:

    
%matplotlib inline



In [62]:

    
drinks.groupby('continent').mean().plot(kind='bar')









    Out[62]:





<matplotlib.axes._subplots.AxesSubplot at 0x29d08c08b70>



In [ ]:

	timestamp	open	high	low	close	volume
0	2018-02-21	92.980	93.3595	91.49	91.49	26663828
1	2018-02-20	91.475	93.0600	91.01	92.72	30107057
2	2018-02-16	92.450	93.5000	91.80	92.00	30188535
3	2018-02-15	91.210	92.7200	90.62	92.66	27407914
4	2018-02-14	88.510	90.9900	88.41	90.81	34314101

	timestamp	open	high	low	close	volume	datetime
0	2018-02-21	92.980	93.3595	91.49	91.49	26663828	2018-02-21
1	2018-02-20	91.475	93.0600	91.01	92.72	30107057	2018-02-20
2	2018-02-16	92.450	93.5000	91.80	92.00	30188535	2018-02-16
3	2018-02-15	91.210	92.7200	90.62	92.66	27407914	2018-02-15
4	2018-02-14	88.510	90.9900	88.41	90.81	34314101	2018-02-14

	timestamp	open	high	low	close	volume
0	2018-02-21	92.980	93.3595	91.49	91.49	26663828
1	2018-02-20	91.475	93.0600	91.01	92.72	30107057
2	2018-02-16	92.450	93.5000	91.80	92.00	30188535
3	2018-02-15	91.210	92.7200	90.62	92.66	27407914
4	2018-02-14	88.510	90.9900	88.41	90.81	34314101

	timestamp	open	high	low	close	volume
0	2018-02-21	92.980	93.3595	91.49	91.49	26663828
1	2018-02-20	91.475	93.0600	91.01	92.72	30107057
2	2018-02-16	92.450	93.5000	91.80	92.00	30188535
3	2018-02-15	91.210	92.7200	90.62	92.66	27407914
4	2018-02-14	88.510	90.9900	88.41	90.81	34314101

	timestamp	open	high	low	close	volume	weekOfYear
0	2018-02-21	92.980	93.3595	91.49	91.49	26663828	8
1	2018-02-20	91.475	93.0600	91.01	92.72	30107057	8
2	2018-02-16	92.450	93.5000	91.80	92.00	30188535	7
3	2018-02-15	91.210	92.7200	90.62	92.66	27407914	7
4	2018-02-14	88.510	90.9900	88.41	90.81	34314101	7

	open	high	low	close	volume	weekOfYear	year	month
count	100.000000	100.000000	100.000000	100.00000	1.000000e+02	100.000000	100.000000	100.000000
mean	84.702650	85.387772	84.044334	84.74660	2.433133e+07	31.210000	2017.350000	7.580000
std	5.309266	5.526110	5.078314	5.26433	1.148191e+07	20.098392	0.479372	4.621382
min	73.540000	73.970000	73.310000	73.87000	7.425503e+06	1.000000	2017.000000	1.000000
25%	82.690000	83.135000	82.180000	82.52000	1.705897e+07	6.000000	2017.000000	2.000000
50%	84.565000	85.215000	83.975000	84.62500	2.142949e+07	42.000000	2017.000000	10.000000
75%	88.545000	89.780000	87.653450	88.23500	2.737084e+07	47.000000	2018.000000	11.000000
max	95.140000	96.070000	93.720000	95.01000	7.087735e+07	52.000000	2018.000000	12.000000

	country	beer_servings	spirit_servings	wine_servings	total_litres_of_pure_alcohol	continent
0	Afghanistan	0	0	0	0.0	Asia
1	Albania	89	132	54	4.9	Europe
2	Algeria	25	0	14	0.7	Africa
3	Andorra	245	138	312	12.4	Europe
4	Angola	217	57	45	5.9	Africa

	count	min	max	mean
continent
Africa	53	0	376	61.471698
Asia	44	0	247	37.045455
Europe	45	0	361	193.777778
North America	23	1	285	145.434783
Oceania	16	0	306	89.687500
South America	12	93	333	175.083333