notebook.community

Edit and run



In [1]:

    
# https://ist256.github.io/spring2020/readings/Data-Analysis-With-Pandas
# Data Analysis with Pandas By Michael Fudge



In [2]:

    
import pandas as pd
# this turns off warning messages
import warnings
warnings.filterwarnings('ignore')



In [3]:

    
grades = pd.Series(data = [100,80,100,90,80,70], name = "Grades", dtype='int')
grades









    Out[3]:





0    100
1     80
2    100
3     90
4     80
5     70
Name: Grades, dtype: int64



In [4]:

    
print("first grade:", grades[0])
print("last grade:", grades[5])









    



first grade: 100
last grade: 70



In [5]:

    
names = pd.Series( data = ['Allen','Bob','Chris','Dave','Ed','Frank','Gus'])
gpas = pd.Series( data = [4.0, 3.0, 3.4, 2.8, 2.5, 3.8, 3.0])
years = pd.Series( data = ['So', 'Fr', 'Fr', 'Jr', 'Sr', 'Sr', 'Fr'])
series_dict = { 'Name':  names, 'GPA': gpas, 'Year' : years }  # dict of Series, keys are the series names
students = pd.DataFrame( series_dict )
students



In [6]:

    
students = pd.DataFrame( series_dict, columns = ['Name', 'GPA', 'Year'] )
students



In [7]:

    
deans_list = students[ ['Name', 'GPA'] ][ students.GPA >= 3.4 ]
deans_list



In [8]:

    
students[ (students.GPA >=2.5) & (students.GPA <= 3.0)]



In [9]:

    
deans_list = students[ ['Name', 'GPA'] ][ students.GPA >= 3.4 ]
deans_list[['Name']].head(2)



In [10]:

    
students['Deans List'] = 'No'
students



In [11]:

    
students['Deans List'][ students['GPA'] >= 3.4 ] = 'Yes'
students



In [12]:

    
customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')
customers.head()









    Out[12]:







  
    
      
      First
      Last
      Email
      Gender
      Last IP Address
      City
      State
      Total Orders
      Total Purchased
      Months Customer
    
  
  
    
      0
      Al
      Fresco
      afresco@dayrep.com
      M
      74.111.18.161
      Syracuse
      NY
      1
      45
      1
    
    
      1
      Abby
      Kuss
      akuss@rhyta.com
      F
      23.80.125.101
      Phoenix
      AZ
      1
      25
      2
    
    
      2
      Arial
      Photo
      aphoto@dayrep.com
      F
      24.0.14.56
      Newark
      NJ
      1
      680
      1
    
    
      3
      Bette
      Alott
      balott@rhyta.com
      F
      56.216.127.219
      Raleigh
      NC
      6
      560
      18
    
    
      4
      Barb
      Barion
      bbarion@superrito.com
      F
      38.68.15.223
      Dallas
      TX
      4
      1590
      1



In [14]:

    
weather = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/weather/syracuse-ny.csv')
weather.head()









    Out[14]:







  
    
      
      EST
      Max TemperatureF
      Mean TemperatureF
      Min TemperatureF
      Max Dew PointF
      MeanDew PointF
      Min DewpointF
      Max Humidity
      Mean Humidity
      Min Humidity
      ...
      Max VisibilityMiles
      Mean VisibilityMiles
      Min VisibilityMiles
      Max Wind SpeedMPH
      Mean Wind SpeedMPH
      Max Gust SpeedMPH
      PrecipitationIn
      CloudCover
      Events
      WindDirDegrees
    
  
  
    
      0
      1997-1-1
      27
      12.0
      -2
      22
      4
      -8
      92
      74
      59
      ...
      10
      9
      1
      14
      5
      NaN
      0.05
      6
      Snow
      89
    
    
      1
      1997-1-2
      34
      28.0
      23
      33
      29
      21
      100
      96
      88
      ...
      9
      2
      0
      8
      4
      NaN
      0.08
      8
      Fog-Rain-Snow
      82
    
    
      2
      1997-1-3
      44
      40.0
      36
      44
      38
      34
      100
      96
      89
      ...
      10
      4
      0
      15
      6
      NaN
      0.09
      8
      Fog-Rain
      273
    
    
      3
      1997-1-4
      48
      40.0
      34
      44
      36
      33
      96
      90
      83
      ...
      10
      10
      8
      13
      4
      NaN
      0.00
      8
      Rain
      80
    
    
      4
      1997-1-5
      55
      46.0
      37
      50
      43
      29
      89
      81
      73
      ...
      10
      10
      10
      21
      11
      30.0
      0.13
      8
      Rain
      199
    
  

5 rows × 23 columns



In [15]:

    
weather[ weather['EST'] == '2015-7-4'][['EST','Min TemperatureF', 'Mean TemperatureF', 'Max TemperatureF']]









    Out[15]:







  
    
      
      EST
      Min TemperatureF
      Mean TemperatureF
      Max TemperatureF
    
  
  
    
      6664
      2015-7-4
      60
      69.0
      78



In [16]:

    
data = pd.read_html('https://ischool.syr.edu/classes/', attrs = {'id': 'classSchedule'} )
schedule = data[0]
schedule.head()









    Out[16]:







  
    
      
      Course
      Section
      Class
      Credits
      Title
      Instructor(s)
      Time
      Day
      Room(s)
    
  
  
    
      0
      IDS401
      U800
      72532
      3.0
      What's the Big Idea?
      Bruce Kingma
      12:00am - 2:30pm
      MTuWThF
      Online  Online
    
    
      1
      IDS403
      M800
      71739
      3.0
      Startup Sandbox
      NaN
      12:00am - 12:00am
      NaN
      NaN
    
    
      2
      IST256
      M800
      71740
      3.0
      Appl.Prog.For Information Syst
      Laurie A Ferger
      12:00am - 12:00am
      NaN
      Online  Online
    
    
      3
      IST344
      M001
      72263
      3.0
      Info Reporting & Presentation
      Bruce Kingma
      12:00am - 2:30pm
      MTuWThF
      Online  Online
    
    
      4
      IST359
      M800
      71741
      3.0
      Intro to Data Base Mgmt Systs
      Blythe Scherrer
      12:00am - 12:00am
      NaN
      Online  Online



In [17]:

    
json_data = '[{"GPA":3.0,"Name":"Bob"},{"GPA":3.7,"Name":"Sue"},{"GPA":2.4,"Name":"Tom"}]'

tweets = pd.read_json(json_data, orient='records')
tweets.head()



In [18]:

    
students_list_of_dict = [
    { 'Name' : 'Bob', 'GPA' : 3.0 },
    { 'Name' : 'Sue', 'GPA' : 3.7 },
    { 'Name' : 'Tom', 'GPA' : 2.4 }
]

students_df = pd.DataFrame(students_list_of_dict)
students_df



In [19]:

    
students = students_df.to_dict(orient='records')
students









    Out[19]:





[{'Name': 'Bob', 'GPA': 3.0},
 {'Name': 'Sue', 'GPA': 3.7},
 {'Name': 'Tom', 'GPA': 2.4}]



In [20]:

    
from IPython.display import display

customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')

choice = input("Customers: Would you like to see the first 2, the last 2, or 2 at random? [first, last, random]?").lower()
if choice == 'first':
    display(customers.head(2))
elif choice == 'last':
    display(customers.tail(2))
else:
    display(customers.sample(2))
print("And we're done here!")









    



Customers: Would you like to see the first 2, the last 2, or 2 at random? [first, last, random]?df






    







  
    
      
      First
      Last
      Email
      Gender
      Last IP Address
      City
      State
      Total Orders
      Total Purchased
      Months Customer
    
  
  
    
      14
      Jean
      Poole
      jpoole@dayrep.com
      F
      23.182.25.40
      Kingston
      NY
      7
      185
      12
    
    
      8
      Carol
      Ling
      cling@superrito.com
      F
      23.180.242.66
      Syracuse
      NY
      2
      440
      6
    
  








    



And we're done here!



In [21]:

    
customers = pd.read_csv('https://raw.githubusercontent.com/mafudge/datasets/master/customers/customers.csv')

customer_list = customers.sample(5)

print("CUSTOMER INQUIRY")
display(customer_list[['Email']])
email = input("Enter a customer email to retrieve their complete record: ").lower()

print("CUSTOMER DETAILS FOR " + email )
display(customer_list[ customer_list['Email'] == email])









    



CUSTOMER INQUIRY






    







  
    
      
      Email
    
  
  
    
      3
      balott@rhyta.com
    
    
      1
      akuss@rhyta.com
    
    
      23
      sbellum@superrito.com
    
    
      16
      lkarfurless@dayrep.com
    
    
      12
      etasomthin@superrito.com
    
  








    



Enter a customer email to retrieve their complete record: lkarfurless@dayrep.com
CUSTOMER DETAILS FOR lkarfurless@dayrep.com






    







  
    
      
      First
      Last
      Email
      Gender
      Last IP Address
      City
      State
      Total Orders
      Total Purchased
      Months Customer
    
  
  
    
      16
      Lisa
      Karfurless
      lkarfurless@dayrep.com
      F
      172.189.252.8
      Fairfax
      VA
      6
      250
      27



In [ ]:

	Name	GPA	Year
0	Allen	4.0	So
1	Bob	3.0	Fr
2	Chris	3.4	Fr
3	Dave	2.8	Jr
4	Ed	2.5	Sr
5	Frank	3.8	Sr
6	Gus	3.0	Fr

	First	Last	Email	Gender	Last IP Address	City	State	Total Orders	Total Purchased	Months Customer
0	Al	Fresco	afresco@dayrep.com	M	74.111.18.161	Syracuse	NY	1	45	1
1	Abby	Kuss	akuss@rhyta.com	F	23.80.125.101	Phoenix	AZ	1	25	2
2	Arial	Photo	aphoto@dayrep.com	F	24.0.14.56	Newark	NJ	1	680	1
3	Bette	Alott	balott@rhyta.com	F	56.216.127.219	Raleigh	NC	6	560	18
4	Barb	Barion	bbarion@superrito.com	F	38.68.15.223	Dallas	TX	4	1590	1

	EST	Max TemperatureF	Mean TemperatureF	Min TemperatureF	Max Dew PointF	MeanDew PointF	Min DewpointF	Max Humidity	Mean Humidity	Min Humidity	...	Max VisibilityMiles	Mean VisibilityMiles	Min VisibilityMiles	Max Wind SpeedMPH	Mean Wind SpeedMPH	Max Gust SpeedMPH	PrecipitationIn	CloudCover	Events	WindDirDegrees
0	1997-1-1	27	12.0	-2	22	4	-8	92	74	59	...	10	9	1	14	5	NaN	0.05	6	Snow	89
1	1997-1-2	34	28.0	23	33	29	21	100	96	88	...	9	2	0	8	4	NaN	0.08	8	Fog-Rain-Snow	82
2	1997-1-3	44	40.0	36	44	38	34	100	96	89	...	10	4	0	15	6	NaN	0.09	8	Fog-Rain	273
3	1997-1-4	48	40.0	34	44	36	33	96	90	83	...	10	10	8	13	4	NaN	0.00	8	Rain	80
4	1997-1-5	55	46.0	37	50	43	29	89	81	73	...	10	10	10	21	11	30.0	0.13	8	Rain	199

	Course	Section	Class	Credits	Title	Instructor(s)	Time	Day	Room(s)
0	IDS401	U800	72532	3.0	What's the Big Idea?	Bruce Kingma	12:00am - 2:30pm	MTuWThF	Online Online
1	IDS403	M800	71739	3.0	Startup Sandbox	NaN	12:00am - 12:00am	NaN	NaN
2	IST256	M800	71740	3.0	Appl.Prog.For Information Syst	Laurie A Ferger	12:00am - 12:00am	NaN	Online Online
3	IST344	M001	72263	3.0	Info Reporting & Presentation	Bruce Kingma	12:00am - 2:30pm	MTuWThF	Online Online
4	IST359	M800	71741	3.0	Intro to Data Base Mgmt Systs	Blythe Scherrer	12:00am - 12:00am	NaN	Online Online

	First	Last	Email	Gender	Last IP Address	City	State	Total Orders	Total Purchased	Months Customer
14	Jean	Poole	jpoole@dayrep.com	F	23.182.25.40	Kingston	NY	7	185	12
8	Carol	Ling	cling@superrito.com	F	23.180.242.66	Syracuse	NY	2	440	6

	Email
3	balott@rhyta.com
1	akuss@rhyta.com
23	sbellum@superrito.com
16	lkarfurless@dayrep.com
12	etasomthin@superrito.com