In [1]:
# https://ist256.github.io/spring2020/readings/Data-Analysis-With-Pandas
# Data Analysis with Pandas By Michael Fudge
In [2]:
import pandas as pd
# this turns off warning messages
import warnings
warnings.filterwarnings('ignore')
In [3]:
grades = pd.Series(data = [100,80,100,90,80,70], name = "Grades", dtype='int')
grades
Out[3]:
In [4]:
print("first grade:", grades[0])
print("last grade:", grades[5])
In [5]:
names = pd.Series( data = ['Allen','Bob','Chris','Dave','Ed','Frank','Gus'])
gpas = pd.Series( data = [4.0, 3.0, 3.4, 2.8, 2.5, 3.8, 3.0])
years = pd.Series( data = ['So', 'Fr', 'Fr', 'Jr', 'Sr', 'Sr', 'Fr'])
series_dict = { 'Name': names, 'GPA': gpas, 'Year' : years } # dict of Series, keys are the series names
students = pd.DataFrame( series_dict )
students
Out[5]:
In [6]:
students = pd.DataFrame( series_dict, columns = ['Name', 'GPA', 'Year'] )
students
Out[6]:
In [7]:
deans_list = students[ ['Name', 'GPA'] ][ students.GPA >= 3.4 ]
deans_list
Out[7]:
In [8]:
students[ (students.GPA >=2.5) & (students.GPA <= 3.0)]
Out[8]:
In [9]:
deans_list = students[ ['Name', 'GPA'] ][ students.GPA >= 3.4 ]
deans_list[['Name']].head(2)
Out[9]:
In [10]:
students['Deans List'] = 'No'
students
Out[10]:
In [11]:
students['Deans List'][ students['GPA'] >= 3.4 ] = 'Yes'
students
Out[11]:
In [12]:
customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')
customers.head()
Out[12]:
In [14]:
weather = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/weather/syracuse-ny.csv')
weather.head()
Out[14]:
In [15]:
weather[ weather['EST'] == '2015-7-4'][['EST','Min TemperatureF', 'Mean TemperatureF', 'Max TemperatureF']]
Out[15]:
In [16]:
data = pd.read_html('https://ischool.syr.edu/classes/', attrs = {'id': 'classSchedule'} )
schedule = data[0]
schedule.head()
Out[16]:
In [17]:
json_data = '[{"GPA":3.0,"Name":"Bob"},{"GPA":3.7,"Name":"Sue"},{"GPA":2.4,"Name":"Tom"}]'
tweets = pd.read_json(json_data, orient='records')
tweets.head()
Out[17]:
In [18]:
students_list_of_dict = [
{ 'Name' : 'Bob', 'GPA' : 3.0 },
{ 'Name' : 'Sue', 'GPA' : 3.7 },
{ 'Name' : 'Tom', 'GPA' : 2.4 }
]
students_df = pd.DataFrame(students_list_of_dict)
students_df
Out[18]:
In [19]:
students = students_df.to_dict(orient='records')
students
Out[19]:
In [20]:
from IPython.display import display
customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')
choice = input("Customers: Would you like to see the first 2, the last 2, or 2 at random? [first, last, random]?").lower()
if choice == 'first':
display(customers.head(2))
elif choice == 'last':
display(customers.tail(2))
else:
display(customers.sample(2))
print("And we're done here!")
In [21]:
customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')
customer_list = customers.sample(5)
print("CUSTOMER INQUIRY")
display(customer_list[['Email']])
email = input("Enter a customer email to retrieve their complete record: ").lower()
print("CUSTOMER DETAILS FOR " + email )
display(customer_list[ customer_list['Email'] == email])
In [ ]: