In [1]:
# https://ist256.github.io/spring2020/readings/Data-Analysis-With-Pandas
# Data Analysis with Pandas By Michael Fudge

In [2]:
import pandas as pd
# this turns off warning messages
import warnings
warnings.filterwarnings('ignore')

In [3]:
grades = pd.Series(data = [100,80,100,90,80,70], name = "Grades", dtype='int')
grades


Out[3]:
0    100
1     80
2    100
3     90
4     80
5     70
Name: Grades, dtype: int64

In [4]:
print("first grade:", grades[0])
print("last grade:", grades[5])


first grade: 100
last grade: 70

In [5]:
names = pd.Series( data = ['Allen','Bob','Chris','Dave','Ed','Frank','Gus'])
gpas = pd.Series( data = [4.0, 3.0, 3.4, 2.8, 2.5, 3.8, 3.0])
years = pd.Series( data = ['So', 'Fr', 'Fr', 'Jr', 'Sr', 'Sr', 'Fr'])
series_dict = { 'Name':  names, 'GPA': gpas, 'Year' : years }  # dict of Series, keys are the series names
students = pd.DataFrame( series_dict )
students


Out[5]:
Name GPA Year
0 Allen 4.0 So
1 Bob 3.0 Fr
2 Chris 3.4 Fr
3 Dave 2.8 Jr
4 Ed 2.5 Sr
5 Frank 3.8 Sr
6 Gus 3.0 Fr

In [6]:
students = pd.DataFrame( series_dict, columns = ['Name', 'GPA', 'Year'] )
students


Out[6]:
Name GPA Year
0 Allen 4.0 So
1 Bob 3.0 Fr
2 Chris 3.4 Fr
3 Dave 2.8 Jr
4 Ed 2.5 Sr
5 Frank 3.8 Sr
6 Gus 3.0 Fr

In [7]:
deans_list = students[ ['Name', 'GPA'] ][ students.GPA >= 3.4 ]
deans_list


Out[7]:
Name GPA
0 Allen 4.0
2 Chris 3.4
5 Frank 3.8

In [8]:
students[ (students.GPA >=2.5) & (students.GPA <= 3.0)]


Out[8]:
Name GPA Year
1 Bob 3.0 Fr
3 Dave 2.8 Jr
4 Ed 2.5 Sr
6 Gus 3.0 Fr

In [9]:
deans_list = students[ ['Name', 'GPA'] ][ students.GPA >= 3.4 ]
deans_list[['Name']].head(2)


Out[9]:
Name
0 Allen
2 Chris

In [10]:
students['Deans List'] = 'No'
students


Out[10]:
Name GPA Year Deans List
0 Allen 4.0 So No
1 Bob 3.0 Fr No
2 Chris 3.4 Fr No
3 Dave 2.8 Jr No
4 Ed 2.5 Sr No
5 Frank 3.8 Sr No
6 Gus 3.0 Fr No

In [11]:
students['Deans List'][ students['GPA'] >= 3.4 ] = 'Yes'
students


Out[11]:
Name GPA Year Deans List
0 Allen 4.0 So Yes
1 Bob 3.0 Fr No
2 Chris 3.4 Fr Yes
3 Dave 2.8 Jr No
4 Ed 2.5 Sr No
5 Frank 3.8 Sr Yes
6 Gus 3.0 Fr No

In [12]:
customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')
customers.head()


Out[12]:
First Last Email Gender Last IP Address City State Total Orders Total Purchased Months Customer
0 Al Fresco afresco@dayrep.com M 74.111.18.161 Syracuse NY 1 45 1
1 Abby Kuss akuss@rhyta.com F 23.80.125.101 Phoenix AZ 1 25 2
2 Arial Photo aphoto@dayrep.com F 24.0.14.56 Newark NJ 1 680 1
3 Bette Alott balott@rhyta.com F 56.216.127.219 Raleigh NC 6 560 18
4 Barb Barion bbarion@superrito.com F 38.68.15.223 Dallas TX 4 1590 1

In [14]:
weather = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/weather/syracuse-ny.csv')
weather.head()


Out[14]:
EST Max TemperatureF Mean TemperatureF Min TemperatureF Max Dew PointF MeanDew PointF Min DewpointF Max Humidity Mean Humidity Min Humidity ... Max VisibilityMiles Mean VisibilityMiles Min VisibilityMiles Max Wind SpeedMPH Mean Wind SpeedMPH Max Gust SpeedMPH PrecipitationIn CloudCover Events WindDirDegrees
0 1997-1-1 27 12.0 -2 22 4 -8 92 74 59 ... 10 9 1 14 5 NaN 0.05 6 Snow 89
1 1997-1-2 34 28.0 23 33 29 21 100 96 88 ... 9 2 0 8 4 NaN 0.08 8 Fog-Rain-Snow 82
2 1997-1-3 44 40.0 36 44 38 34 100 96 89 ... 10 4 0 15 6 NaN 0.09 8 Fog-Rain 273
3 1997-1-4 48 40.0 34 44 36 33 96 90 83 ... 10 10 8 13 4 NaN 0.00 8 Rain 80
4 1997-1-5 55 46.0 37 50 43 29 89 81 73 ... 10 10 10 21 11 30.0 0.13 8 Rain 199

5 rows × 23 columns


In [15]:
weather[ weather['EST'] == '2015-7-4'][['EST','Min TemperatureF', 'Mean TemperatureF', 'Max TemperatureF']]


Out[15]:
EST Min TemperatureF Mean TemperatureF Max TemperatureF
6664 2015-7-4 60 69.0 78

In [16]:
data = pd.read_html('https://ischool.syr.edu/classes/', attrs = {'id': 'classSchedule'} )
schedule = data[0]
schedule.head()


Out[16]:
Course Section Class Credits Title Instructor(s) Time Day Room(s)
0 IDS401 U800 72532 3.0 What's the Big Idea? Bruce Kingma 12:00am - 2:30pm MTuWThF Online Online
1 IDS403 M800 71739 3.0 Startup Sandbox NaN 12:00am - 12:00am NaN NaN
2 IST256 M800 71740 3.0 Appl.Prog.For Information Syst Laurie A Ferger 12:00am - 12:00am NaN Online Online
3 IST344 M001 72263 3.0 Info Reporting & Presentation Bruce Kingma 12:00am - 2:30pm MTuWThF Online Online
4 IST359 M800 71741 3.0 Intro to Data Base Mgmt Systs Blythe Scherrer 12:00am - 12:00am NaN Online Online

In [17]:
json_data = '[{"GPA":3.0,"Name":"Bob"},{"GPA":3.7,"Name":"Sue"},{"GPA":2.4,"Name":"Tom"}]'

tweets = pd.read_json(json_data, orient='records')
tweets.head()


Out[17]:
GPA Name
0 3.0 Bob
1 3.7 Sue
2 2.4 Tom

In [18]:
students_list_of_dict = [
    { 'Name' : 'Bob', 'GPA' : 3.0 },
    { 'Name' : 'Sue', 'GPA' : 3.7 },
    { 'Name' : 'Tom', 'GPA' : 2.4 }
]

students_df = pd.DataFrame(students_list_of_dict)
students_df


Out[18]:
Name GPA
0 Bob 3.0
1 Sue 3.7
2 Tom 2.4

In [19]:
students = students_df.to_dict(orient='records')
students


Out[19]:
[{'Name': 'Bob', 'GPA': 3.0},
 {'Name': 'Sue', 'GPA': 3.7},
 {'Name': 'Tom', 'GPA': 2.4}]

In [20]:
from IPython.display import display

customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')

choice = input("Customers: Would you like to see the first 2, the last 2, or 2 at random? [first, last, random]?").lower()
if choice == 'first':
    display(customers.head(2))
elif choice == 'last':
    display(customers.tail(2))
else:
    display(customers.sample(2))
print("And we're done here!")


Customers: Would you like to see the first 2, the last 2, or 2 at random? [first, last, random]?df
First Last Email Gender Last IP Address City State Total Orders Total Purchased Months Customer
14 Jean Poole jpoole@dayrep.com F 23.182.25.40 Kingston NY 7 185 12
8 Carol Ling cling@superrito.com F 23.180.242.66 Syracuse NY 2 440 6
And we're done here!

In [21]:
customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')

customer_list = customers.sample(5)

print("CUSTOMER INQUIRY")
display(customer_list[['Email']])
email = input("Enter a customer email to retrieve their complete record: ").lower()

print("CUSTOMER DETAILS FOR " + email )
display(customer_list[ customer_list['Email'] == email])


CUSTOMER INQUIRY
Email
3 balott@rhyta.com
1 akuss@rhyta.com
23 sbellum@superrito.com
16 lkarfurless@dayrep.com
12 etasomthin@superrito.com
Enter a customer email to retrieve their complete record: lkarfurless@dayrep.com
CUSTOMER DETAILS FOR lkarfurless@dayrep.com
First Last Email Gender Last IP Address City State Total Orders Total Purchased Months Customer
16 Lisa Karfurless lkarfurless@dayrep.com F 172.189.252.8 Fairfax VA 6 250 27

In [ ]: