In [2]:

    
import csv

reader

csv.reader(
csvfile, dialect='excel', **fmtparams
)

return a reader object which will iterate over lines in the give csvfile

if csvfile is a file object, it should be opened with newline=''

writer

csv.writer(
csvfile, dialect='excel', **fmtparams
)

return a writer object responsible for converting the user's data into delimited strings on the give file-like object.

if csvfile is a file object, it should be opened with newline=''

Dialect

Group parameters that control how to parse a csv file.

Get list of existing `Dialect`



In [5]:

    
csv.list_dialects()









    Out[5]:





['excel', 'excel-tab', 'unix']

excel: microsoft excel, libreoffice
unix: quotes field with double-quotes and use \n as record separator

Register/Unregister a dialect

csv.register_dialect(name[, dialect[, **fmtparams]])

csv.unregister_dialect(name)



In [ ]:

    
# %load data/testdata.pipes
"Title 1"|"Title 2"|"Title 3"
1|"first line
second line"|08/18/07



In [6]:

    
csv.register_dialect('pipes', delimiter='|')
with open("data/testdata.pipes",'r') as f:
    reader = csv.reader(f,dialect='pipes')
    for row in reader:
        print(row)









    



['Title 1', 'Title 2', 'Title 3']
['1', 'first line\nsecond line', '08/18/07']



In [15]:

    
csv.unregister_dialect("pipes")

get_dialect

csv.get_dialect(name)



In [9]:

    
dialect_pipes = csv.get_dialect("pipes")



In [11]:

    
dialect_pipes.delimiter, dialect_pipes.doublequote









    Out[11]:





('|', 1)

Dialect Paramters

CSV Dialect Parameters¶
Attribute	Default	Meaning
delimiter	`,`	Field separator (one character)
doublequote	True	Flag controlling whether quotechar instances are doubled
escapechar	None	Character used to indicate an escape sequence
lineterminator	`\r\n`	String used by writer to terminate a line
quotechar	`"`	String to surround fields containing special values (one character)
quoting	`QUOTE_MINIMAL`	Controls quoting behavior described earlier
skipinitialspace	False	Ignore whitespace after the field delimiter

Examples:



In [16]:

    
import csv
import sys

csv.register_dialect('escaped',
                     escapechar='\\',
                     doublequote=False,
                     quoting=csv.QUOTE_NONE,
                     )
csv.register_dialect('singlequote',
                     quotechar="'",
                     quoting=csv.QUOTE_ALL,
                     )

quoting_modes = {
    getattr(csv, n): n
    for n in dir(csv)
    if n.startswith('QUOTE_')
}

TEMPLATE = '''\
Dialect: "{name}"

  delimiter   = {dl!r:<6}    skipinitialspace = {si!r}
  doublequote = {dq!r:<6}    quoting          = {qu}
  quotechar   = {qc!r:<6}    lineterminator   = {lt!r}
  escapechar  = {ec!r:<6}
'''

for name in sorted(csv.list_dialects()):
    dialect = csv.get_dialect(name)

    print(TEMPLATE.format(
        name=name,
        dl=dialect.delimiter,
        si=dialect.skipinitialspace,
        dq=dialect.doublequote,
        qu=quoting_modes[dialect.quoting],
        qc=dialect.quotechar,
        lt=dialect.lineterminator,
        ec=dialect.escapechar,
    ))

    writer = csv.writer(sys.stdout, dialect=dialect)
    writer.writerow(
        ('col1', 1, '10/01/2010',
         'Special chars: " \' {} to parse'.format(
             dialect.delimiter))
    )
    print()









    



Dialect: "escaped"

  delimiter   = ','       skipinitialspace = 0
  doublequote = 0         quoting          = QUOTE_NONE
  quotechar   = '"'       lineterminator   = '\r\n'
  escapechar  = '\\'  

col1,1,10/01/2010,Special chars: \" ' \, to parse

Dialect: "excel"

  delimiter   = ','       skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_MINIMAL
  quotechar   = '"'       lineterminator   = '\r\n'
  escapechar  = None  

col1,1,10/01/2010,"Special chars: "" ' , to parse"

Dialect: "excel-tab"

  delimiter   = '\t'      skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_MINIMAL
  quotechar   = '"'       lineterminator   = '\r\n'
  escapechar  = None  

col1	1	10/01/2010	"Special chars: "" ' 	 to parse"

Dialect: "singlequote"

  delimiter   = ','       skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_ALL
  quotechar   = "'"       lineterminator   = '\r\n'
  escapechar  = None  

'col1','1','10/01/2010','Special chars: " '' , to parse'

Dialect: "unix"

  delimiter   = ','       skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_ALL
  quotechar   = '"'       lineterminator   = '\n'
  escapechar  = None  

"col1","1","10/01/2010","Special chars: "" ' , to parse"

Automatically Detect Dialects

for data where the dialect parameters are unknown, the Sniffer class can be used to make an educated guess. Sniffer class takes a sample of the input data and an optional argument giving the possible delimiter characters.



In [17]:

    
import csv
from io import StringIO
import textwrap

csv.register_dialect('escaped',
                     escapechar='\\',
                     doublequote=False,
                     quoting=csv.QUOTE_NONE)
csv.register_dialect('singlequote',
                     quotechar="'",
                     quoting=csv.QUOTE_ALL)

# Generate sample data for all known dialects
samples = []
for name in sorted(csv.list_dialects()):
    buffer = StringIO()
    dialect = csv.get_dialect(name)
    writer = csv.writer(buffer, dialect=dialect)
    writer.writerow(
        ('col1', 1, '10/01/2010',
         'Special chars " \' {} to parse'.format(
             dialect.delimiter))
    )
    samples.append((name, dialect, buffer.getvalue()))

# Guess the dialect for a given sample, and then use the results
# to parse the data.
sniffer = csv.Sniffer()
for name, expected, sample in samples:
    print('Dialect: "{}"'.format(name))
    print('In: {}'.format(sample.rstrip()))
    dialect = sniffer.sniff(sample, delimiters=',\t')
    reader = csv.reader(StringIO(sample), dialect=dialect)
    print('Parsed:\n  {}\n'.format(
          '\n  '.join(repr(r) for r in next(reader))))









    



Dialect: "escaped"
In: col1,1,10/01/2010,Special chars \" ' \, to parse
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars \\" \' \\'
  ' to parse'

Dialect: "excel"
In: col1,1,10/01/2010,"Special chars "" ' , to parse"
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' , to parse'

Dialect: "excel-tab"
In: col1	1	10/01/2010	"Special chars "" ' 	 to parse"
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' \t to parse'

Dialect: "singlequote"
In: 'col1','1','10/01/2010','Special chars " '' , to parse'
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' , to parse'

Dialect: "unix"
In: "col1","1","10/01/2010","Special chars "" ' , to parse"
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' , to parse'

Using field name: DictReader/DictWriter

translate rows to dictionaries instead of lists

DictReader

csv.DictReader(csvfile, fieldnames=None, restkey=None, restval=None, dialect=’excel’, *args, **kwds)



In [21]:

    
with open("data/csv_data.csv",'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row)









    



OrderedDict([('title', 'a'), ('number', '2'), ('price', '5.0')])
OrderedDict([('title', 'b'), ('number', '89'), ('price', '9.9')])
OrderedDict([('title', 'c'), ('number', '78'), ('price', '12.0')])

DictWriter

csv.DictWriter(csvfile, fieldnames, restval=”, extrasaction=’raise’, dialect=’excel’, *args, **kwds)

Note: The writer must be given a list of field names so it knows how to order the columns in the output.



In [22]:

    
with open('data/csv_dict_writer.csv', 'w') as csvfile:
    fieldnames = ['first_name', 'last_name']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'})
    writer.writerow({'first_name': 'Lovely', 'last_name': 'Spam'})
    writer.writerow({'first_name': 'Wonderful', 'last_name': 'Spam'})



In [ ]:

    
# %load data/csv_dict_writer.csv
first_name,last_name
Baked,Beans
Lovely,Spam
Wonderful,Spam

Quoting

There are four different quoting options, defined as constants in the csv module.

QUOTE_ALL Quote everything, regardless of type.
QUOTE_MINIMAL Quote fields with special characters (anything that would confuse a parser configured with the same dialect and options). This is the default
QUOTE_NONNUMERIC Quote all fields that are not integers or floats. When used with the reader, input fields that are not quoted are converted to floats.
QUOTE_NONE Do not quote anything on output. When used with the reader, quote characters are included in the field values (normally, they are treated as delimiters and stripped).



In [ ]: