Python provides a native module to perform CSV file operations.
Official documentation: CSV module in Python 2.7
In [3]:
# For reading/writing CSV files
import csv
# For listing system file folders
from subprocess import check_output
# Use with open to ensure file is closed when block ends
# The wb flag opens file for writing
with open('data/fileops/vehicles.csv', 'wb') as csv_file:
# Prepare csv writer
wtr = csv.writer(csv_file, delimiter=',', quotechar='"',
quoting=csv.QUOTE_MINIMAL)
# Write CSV header row
wtr.writerow(['type', 'wheels', 'speed', 'weight', 'invented'])
# Write CSV data rows
wtr.writerow(['Scooter', 2, 150, 109.78, 1817])
wtr.writerow(['Car', 4, 250, 1818.45, 1885])
wtr.writerow(['Plane', 10, 850, 270000, 1903])
# Check file created
print(check_output(["ls", "data/fileops"]).decode("utf8"))
In [4]:
# The rb flag opens file for reading
with open('data/fileops/vehicles.csv', 'rb') as csv_file:
rdr = csv.reader(csv_file, delimiter=',', quotechar='"')
for row in rdr:
print '\t'.join(row)
In [5]:
# Dictionary data structures can be used to represent rows
game1_scores = {'Game':'Quarter', 'Team A': 45, 'Team B': 90}
game2_scores = {'Game':'Semi', 'Team A': 80, 'Team B': 32}
game3_scores = {'Game':'Final', 'Team A': 70, 'Team B': 68}
headers = ['Game', 'Team A', 'Team B']
# Create CSV from dictionaries
with open('data/fileops/game-scores.csv', 'wb') as df:
dict_wtr = csv.DictWriter(df, fieldnames=headers)
dict_wtr.writeheader()
dict_wtr.writerow(game1_scores)
dict_wtr.writerow(game2_scores)
dict_wtr.writerow(game3_scores)
print(check_output(["ls", "data/fileops"]).decode("utf8"))
In [6]:
# Read CSV into dictionary data structure
with open('data/fileops/game-scores.csv', 'rb') as df:
dict_rdr = csv.DictReader(df)
for row in dict_rdr:
print('\t'.join([row['Game'], row['Team A'], row['Team B']]))
print('\t'.join(row.keys()))
Pandas goal is to become the most powerful and flexible open source data analysis / manipulation tool available in any language. Pandas includes file operations capabilities for CSV, among other formats.
CSV operations in Pandas are much faster than in native Python.
In [12]:
import pandas as pd
# Create a DataFrame
df = pd.DataFrame({
'Name' : ['Josh', 'Eli', 'Ram', 'Bil'],
'Sales' : [34.32, 12.1, 4.77, 31.63],
'Region' : ['North', 'South', 'West', 'East'],
'Product' : ['PC', 'Phone', 'SW', 'Cloud']})
df
Out[12]:
In [31]:
# DataFrame to CSV
df.to_csv('data/fileops/sales.csv', index=False)
print(check_output(["ls", "data/fileops"]).decode("utf8"))
In [28]:
# CSV to DataFrame
df2 = pd.read_csv('data/fileops/sales.csv')
df2
Out[28]:
In [30]:
# DataFrame to XLSX Excel file
df.to_excel('data/fileops/sales.xlsx', index=False)
print(check_output(["ls", "data/fileops"]).decode("utf8"))
In [29]:
# Excel to DataFrame
df3 = pd.read_excel('data/fileops/sales.xlsx')
df3
Out[29]: