In [2]:
import csv
csv.reader(
csvfile, dialect='excel', **fmtparams
)
return a reader
object which will iterate over lines in the give csvfile
newline=''
csv.writer(
csvfile, dialect='excel', **fmtparams
)
return a writer
object responsible for converting the user's data into delimited strings on the give file-like object.
newline=''
Group parameters that control how to parse a csv file.
In [5]:
csv.list_dialects()
Out[5]:
In [ ]:
# %load data/testdata.pipes
"Title 1"|"Title 2"|"Title 3"
1|"first line
second line"|08/18/07
In [6]:
csv.register_dialect('pipes', delimiter='|')
with open("data/testdata.pipes",'r') as f:
reader = csv.reader(f,dialect='pipes')
for row in reader:
print(row)
In [15]:
csv.unregister_dialect("pipes")
In [9]:
dialect_pipes = csv.get_dialect("pipes")
In [11]:
dialect_pipes.delimiter, dialect_pipes.doublequote
Out[11]:
Attribute | Default | Meaning |
---|---|---|
delimiter | , |
Field separator (one character) |
doublequote | True | Flag controlling whether quotechar instances are doubled |
escapechar | None | Character used to indicate an escape sequence |
lineterminator | \r\n |
String used by writer to terminate a line |
quotechar | " |
String to surround fields containing special values (one character) |
quoting | QUOTE_MINIMAL |
Controls quoting behavior described earlier |
skipinitialspace | False | Ignore whitespace after the field delimiter |
Examples:
In [16]:
import csv
import sys
csv.register_dialect('escaped',
escapechar='\\',
doublequote=False,
quoting=csv.QUOTE_NONE,
)
csv.register_dialect('singlequote',
quotechar="'",
quoting=csv.QUOTE_ALL,
)
quoting_modes = {
getattr(csv, n): n
for n in dir(csv)
if n.startswith('QUOTE_')
}
TEMPLATE = '''\
Dialect: "{name}"
delimiter = {dl!r:<6} skipinitialspace = {si!r}
doublequote = {dq!r:<6} quoting = {qu}
quotechar = {qc!r:<6} lineterminator = {lt!r}
escapechar = {ec!r:<6}
'''
for name in sorted(csv.list_dialects()):
dialect = csv.get_dialect(name)
print(TEMPLATE.format(
name=name,
dl=dialect.delimiter,
si=dialect.skipinitialspace,
dq=dialect.doublequote,
qu=quoting_modes[dialect.quoting],
qc=dialect.quotechar,
lt=dialect.lineterminator,
ec=dialect.escapechar,
))
writer = csv.writer(sys.stdout, dialect=dialect)
writer.writerow(
('col1', 1, '10/01/2010',
'Special chars: " \' {} to parse'.format(
dialect.delimiter))
)
print()
for data where the dialect parameters are unknown, the Sniffer
class can be used to make an educated guess. Sniffer
class takes a sample of the input data and an optional argument giving the possible delimiter characters.
In [17]:
import csv
from io import StringIO
import textwrap
csv.register_dialect('escaped',
escapechar='\\',
doublequote=False,
quoting=csv.QUOTE_NONE)
csv.register_dialect('singlequote',
quotechar="'",
quoting=csv.QUOTE_ALL)
# Generate sample data for all known dialects
samples = []
for name in sorted(csv.list_dialects()):
buffer = StringIO()
dialect = csv.get_dialect(name)
writer = csv.writer(buffer, dialect=dialect)
writer.writerow(
('col1', 1, '10/01/2010',
'Special chars " \' {} to parse'.format(
dialect.delimiter))
)
samples.append((name, dialect, buffer.getvalue()))
# Guess the dialect for a given sample, and then use the results
# to parse the data.
sniffer = csv.Sniffer()
for name, expected, sample in samples:
print('Dialect: "{}"'.format(name))
print('In: {}'.format(sample.rstrip()))
dialect = sniffer.sniff(sample, delimiters=',\t')
reader = csv.reader(StringIO(sample), dialect=dialect)
print('Parsed:\n {}\n'.format(
'\n '.join(repr(r) for r in next(reader))))
translate rows to dictionaries instead of lists
In [21]:
with open("data/csv_data.csv",'r') as f:
reader = csv.DictReader(f)
for row in reader:
print(row)
Note: The writer must be given a list of field names so it knows how to order the columns in the output.
In [22]:
with open('data/csv_dict_writer.csv', 'w') as csvfile:
fieldnames = ['first_name', 'last_name']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'})
writer.writerow({'first_name': 'Lovely', 'last_name': 'Spam'})
writer.writerow({'first_name': 'Wonderful', 'last_name': 'Spam'})
In [ ]:
# %load data/csv_dict_writer.csv
first_name,last_name
Baked,Beans
Lovely,Spam
Wonderful,Spam
There are four different quoting options, defined as constants in the csv module.
In [ ]: