In [ ]:
'''
LESSION 2
- Reading from TXT
- Exporting to TXT
- Selecting top/bottom records
- Descriptive statistics
- Grouping/sorting data
'''
In [2]:
# Import all libraries needed for the tutorial
import pandas as pd
from numpy import random
import matplotlib.pyplot as plt
import sys #only needed to determine Python version number
import matplotlib #only needed to determine Matplotlib version number
# Enable inline plotting
%matplotlib inline
In [3]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)
In [109]:
'''
Create Data
'''
# Lista inicial de los nombres de los bebes
names = ['Bob','Jessica','Mary','John','Mel']
In [110]:
'''
Vamos a generar aleatoriamente 1000 bebes
Existen diferentes funciones:
+ seed
+ randint
+ len
+ range
+ zip
'''
Out[110]:
In [5]:
# This will ensure the random samples below can be reproduced.
# This means the random samples will always be identical.
random.seed?
In [13]:
range?
In [111]:
#Creamos la semilla
random.seed(500)
random_names = [names[random.randint(low=0,high=len(names))] for i in range(1000)]
len(random_names)
Out[111]:
In [112]:
# The number of births per name for the year 1880
births = [random.randint(low=0,high=1000) for i in range(1000)]
births[:10]
Out[112]:
In [113]:
# The number of births per name for the year 1880
births = [random.randint(low=0,high=1000) for i in range(1000)]
births[:10]
Out[113]:
In [114]:
BabyDataSet = list(zip(random_names,births))
BabyDataSet[:10]
Out[114]:
In [115]:
df = pd.DataFrame(data = BabyDataSet, columns=['Names', 'Births'])
df[:10]
Out[115]:
In [80]:
df.to_csv?
In [116]:
df.to_csv('births1880.txt',index=False,header=False)
In [117]:
'''
Get Data
'''
pd.read_csv?
In [118]:
Location = r'C:\Users\cr\Documents\UCM 4\MD\teamMin\tutorial_pandas\births1880.txt'
df = pd.read_csv(Location)
In [85]:
df.info()
In [119]:
df.head()
Out[119]:
In [120]:
df = pd.read_csv(Location, header=None, names=['Names','Births'])
df.head(5)
Out[120]:
In [121]:
import os
os.remove(Location)
In [122]:
'''
Prepare Data
'''
# Method 1:
df['Names'].unique()
Out[122]:
In [123]:
# If you actually want to print the unique values:
for x in df['Names'].unique():
print(x)
In [96]:
# Method 2:
print(df['Names'].describe())
In [97]:
df.groupby?
In [124]:
# Create a groupby object
name = df.groupby('Names')
# Apply the sum function to the groupby object
df = name.sum()
df
Out[124]:
In [125]:
'''
Analyze Data
'''
# Method 1:
Sorted = df.sort_values(['Births'], ascending=False)
Sorted.head(1)
Out[125]:
In [131]:
# Method 2:
df['Births'].max()
Out[131]:
In [132]:
'''
Present Data
'''
# Create graph
df['Births'].plot.bar()
print("The most popular name")
df.sort_values(by='Births', ascending=False)
Out[132]:
In [ ]:
In [ ]:
In [ ]: