In [1]:
from pandas import DataFrame, Series
def create_dataframe():
'''
Create a pandas dataframe called 'olympic_medal_counts_df' containing
the data from the table of 2014 Sochi winter olympics medal counts.
The columns for this dataframe should be called
'country_name', 'gold', 'silver', and 'bronze'.
There is no need to specify row indexes for this dataframe
(in this case, the rows will automatically be assigned numbered indexes).
'''
countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
'Netherlands', 'Germany', 'Switzerland', 'Belarus',
'Austria', 'France', 'Poland', 'China', 'Korea',
'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']
gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
olympic_medal_counts_df = DataFrame({'country_name' : Series(countries),
'gold' : Series(gold),
'silver' : Series(silver),
'bronze' : Series(bronze)})
return olympic_medal_counts_df
In [ ]:
from pandas import DataFrame, Series
import numpy as np
def avg_medal_count():
#Compute the average number of bronze medals earned by countries who
#earned at least one gold medal.
countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
'Netherlands', 'Germany', 'Switzerland', 'Belarus',
'Austria', 'France', 'Poland', 'China', 'Korea',
'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']
gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
df = DataFrame({'countries' : Series(countries),
'gold' : Series(gold),
'silver' : Series(silver),
'bronze' : Series(bronze)})
goldwinners = df[df['gold'].map(lambda x: x >= 1)]
avg_bronze_at_least_one_gold = goldwinners['bronze'].mean()
return avg_bronze_at_least_one_gold
In [ ]:
import numpy
from pandas import DataFrame, Series
def avg_medal_count():
'''
Using the dataframe's apply method, create a new Series called
avg_medal_count that indicates the average number of gold, silver,
and bronze medals earned amongst countries who earned at
least one medal at the 2014 Sochi olympics.
'''
countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
'Netherlands', 'Germany', 'Switzerland', 'Belarus',
'Austria', 'France', 'Poland', 'China', 'Korea',
'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']
gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
df = DataFrame({'countries' : Series(countries),
'gold' : Series(gold),
'silver' : Series(silver),
'bronze' : Series(bronze)})
at_least_one_medal = df[(df['gold'] > 0) | (df['silver'] > 0) | (df['bronze'] > 0)]
avg_medal_count = numpy.mean(at_least_one_medal)
return avg_medal_count
In [ ]:
import numpy as np
from pandas import DataFrame, Series
def numpy_dot():
'''
Imagine a point system in which each country is awarded 4 points for each
gold medal, 2 points for each silver medal, and one point for each
bronze medal.
Using the numpy.dot function, create a new dataframe called
'olympic_points_df' that includes:
a) a column called 'country_name' with the country name
b) a column called 'points' with the total number of points the country
earned at the Sochi olympics.
'''
countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
'Netherlands', 'Germany', 'Switzerland', 'Belarus',
'Austria', 'France', 'Poland', 'China', 'Korea',
'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']
gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
medals = np.vstack((gold, silver, bronze))
weights = np.array([4, 2, 1])
points = weights.dot(medals)
olympic_points_df = DataFrame({'country_name' : countries,
'points' : points})
return olympic_points_df
In [ ]:
import pandas as pd
def add_full_name(path_to_csv, path_to_new_csv):
#Assume you will be reading in a csv file with the same columns that the
#Lahman baseball data set has -- most importantly, there are columns
#called 'nameFirst' and 'nameLast'.
#1) Write a function that reads a csv
#located at "path_to_csv" into a pandas dataframe and adds a new column
#called 'nameFull' with a player's full name.
#
#For example:
# for Hank Aaron, nameFull would be 'Hank Aaron',
#
#2) Write the data in the pandas dataFrame to a new csv file located at
#path_to_new_csv
df = pd.read_csv(path_to_csv)
df['nameFull'] = df['nameFirst'] + " " + df['nameLast']
df.to_csv(path_to_new_csv)