In [15]:
import pandas as pd

pd.read_csv(r'../../buschmais-spring-petclinic/git.log', names=["data"]).head()


Out[15]:
data
0 \t\t\t56098dd\t1488121379\tDirk Mahler
1 \t\t\t5dade92\t1487800842\tMarkus Harrer
2 1\t1\tdocs/README.md
3 \t\t\t9f07e8c\t1487800618\tMarkus Harrer
4 76\t0\tdocs/README.md

In [5]:
pd.read_csv("../../linux/git_linux.bz2", sep="#", compression="bz2")


---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_tokens (pandas\_libs\parsers.c:14858)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_with_dtype (pandas\_libs\parsers.c:17119)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._string_convert (pandas\_libs\parsers.c:17347)()

pandas\_libs\parsers.pyx in pandas._libs.parsers._string_box_utf8 (pandas\_libs\parsers.c:23041)()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 2: invalid continuation byte

During handling of the above exception, another exception occurred:

UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-5-fbd25ade74db> in <module>()
----> 1 pd.read_csv("../../linux/git_linux.bz2", sep="#", compression="bz2")

C:\dev\apps\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    653                     skip_blank_lines=skip_blank_lines)
    654 
--> 655         return _read(filepath_or_buffer, kwds)
    656 
    657     parser_f.__name__ = name

C:\dev\apps\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    409 
    410     try:
--> 411         data = parser.read(nrows)
    412     finally:
    413         parser.close()

C:\dev\apps\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
    980                 raise ValueError('skipfooter not supported for iteration')
    981 
--> 982         ret = self._engine.read(nrows)
    983 
    984         if self.options.get('as_recarray'):

C:\dev\apps\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
   1717     def read(self, nrows=None):
   1718         try:
-> 1719             data = self._reader.read(nrows)
   1720         except StopIteration:
   1721             if self._first_chunk:

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader.read (pandas\_libs\parsers.c:10862)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory (pandas\_libs\parsers.c:11138)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._read_rows (pandas\_libs\parsers.c:12175)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_column_data (pandas\_libs\parsers.c:14136)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_tokens (pandas\_libs\parsers.c:14972)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._convert_with_dtype (pandas\_libs\parsers.c:17119)()

pandas\_libs\parsers.pyx in pandas._libs.parsers.TextReader._string_convert (pandas\_libs\parsers.c:17347)()

pandas\_libs\parsers.pyx in pandas._libs.parsers._string_box_utf8 (pandas\_libs\parsers.c:23041)()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 2: invalid continuation byte

In [35]:
pd.read_csv(r"C:\Temp\git.log", encoding="latin-1", sep="#", header=None)


Out[35]:
0
0 1504030581 Linus Torvalds
1 1504030432 Linus Torvalds
2 1504023066 Linus Torvalds
3 1504021018 Tejun Heo
4 1504010526 Christoph Hellwig
5 1503963940 Linus Torvalds
6 1503957087 Tejun Heo
7 1503957838 Alexey Brodkin
8 1503520620 Helge Deller
9 1503944146 Linus Torvalds
10 1503901688 Christoph Hellwig
11 1503879640 Linus Torvalds
12 1503879034 Linus Torvalds
13 1503878917 Linus Torvalds
14 1503878613 Linus Torvalds
15 1503878514 Linus Torvalds
16 1503876309 Linus Torvalds
17 1503867312 Linus Torvalds
18 1503861145 Linus Torvalds
19 1503776909 Linus Torvalds
20 1503776774 Linus Torvalds
21 1503763588 Linus Torvalds
22 1503763338 Linus Torvalds
23 1503763190 Linus Torvalds
24 1503709347 Linus Torvalds
25 1503708383 Linus Torvalds
26 1503708003 Linus Torvalds
27 1503707555 Linus Torvalds
28 1503707246 Linus Torvalds
29 1503706953 Linus Torvalds
... ...
693293 1113690257 Paul Mackerras
693294 1113690257 Paul Mackerras
693295 1113690256 Paul Mackerras
693296 1113690255 Eugene Surovegin
693297 1113690254 Paul Mackerras
693298 1113690253 James Morris
693299 1113690251 Jean Tourrilhes
693300 1113690250 Herbert Xu
693301 1113690249 Arnaldo Carvalho de Melo
693302 1113690249 David S. Miller
693303 1113690248 Martin Hicks
693304 1113690247 akpm@osdl.org
693305 1113690246 akpm@osdl.org
693306 1113690245 Andrea Arcangeli
693307 1113690245 Jeff Moyer
693308 1113690244 Dave Airlie
693309 1113690243 James Morris
693310 1113690242 akpm@osdl.org
693311 1113690241 David S. Miller
693312 1113690241 Stas Sergeev
693313 1113690240 akpm@osdl.org
693314 1113690239 James Bottomley
693315 1113690238 Artem B. Bityuckiy
693316 1113690237 akpm@osdl.org
693317 1113690237 akpm@osdl.org
693318 1113690236 akpm@osdl.org
693319 1113690235 akpm@osdl.org
693320 1113690234 Neil Brown
693321 1113690233 Christoph Lameter
693322 1113690036 Linus Torvalds

693323 rows × 1 columns


In [31]:
pd.read_csv("../../linux/git.log", encoding="latin-1", sep="#")


Out[31]:
40c6d1b9e2fc Tue Aug 15 12:49:43 2017 -0700 1502826583 Linus Torvalds
0 fcd07350007b Mon Aug 14 13:09:59 2017 -0700 1502741399 Linus Torvalds
1 6b9d1c24e051 Mon Aug 14 11:35:56 2017 -0700 1502735756 Linus Torvalds
2 ef954844c7ac Sun Aug 13 16:01:32 2017 -0700 1502665292 Linus Torvalds
3 b2298fc900f8 Sun Aug 13 15:34:28 2017 -0700 1502663668 Linus Torvalds
4 c9dc281d91ae Sun Aug 13 12:44:18 2017 -0700 1502653458 Linus Torvalds
5 ce7ba95cf078 Sun Aug 13 12:41:58 2017 -0700 1502653318 Linus Torvalds
6 438630ef5b3c Sun Aug 13 12:33:35 2017 -0700 1502652815 Linus Torvalds
7 dd95f1860768 Sun Aug 13 12:30:17 2017 -0700 1502652617 Linus Torvalds
8 10cec917d000 Sun Aug 13 12:27:42 2017 -0700 1502652462 Linus Torvalds
9 89a55278dee4 Sat Aug 12 16:19:43 2017 -0700 1502579983 Linus Torvalds
10 9a51544774a5 Wed Aug 2 18:03:05 2017 +0530 1501677185 Abhishek Sahu
11 a99bcdce8395 Sat Aug 12 12:08:59 2017 -0700 1502564939 Linus Torvalds
12 043cd07c555f Sat Aug 12 09:01:36 2017 -0700 1502553696 Linus Torvalds
13 afc1f55ca44e Fri Aug 11 20:34:45 2017 -0700 1502508885 Shaohua Li
14 216e4a1def29 Fri Aug 11 13:54:09 2017 -0700 1502484849 Linus Torvalds
15 e0d0e045b862 Fri Aug 11 12:26:49 2017 -0700 1502479609 Linus Torvalds
16 0993133bb8e7 Fri Aug 11 11:56:54 2017 -0700 1502477814 Linus Torvalds
17 7eb97ba611f5 Fri Aug 11 11:44:18 2017 -0700 1502477058 Linus Torvalds
18 2bfc37cdef08 Fri Aug 11 11:20:48 2017 -0700 1502475648 Linus Torvalds
19 7d7a827ba92c Fri Aug 11 11:15:51 2017 -0700 1502475351 Linus Torvalds
20 8a9d6e964d31 Sat Aug 5 10:59:14 2017 +0200 1501923554 Christoph Hellwig
21 8001a975f955 Fri Aug 11 08:56:01 2017 -0700 1502466961 Linus Torvalds
22 622b2fbe625b Wed Aug 9 15:59:10 2017 -0600 1502315950 Shuah Khan
23 a7990c647b35 Tue Aug 8 12:26:02 2017 +0200 1502187962 Artem Savkov
24 020db9d3c1dc Sun Jul 30 00:59:57 2017 +0800 1501347597 Liu Shuo
25 529871bb3c06 Fri Jul 28 16:53:55 2017 +0200 1501253635 Juergen Gross
26 4a8b53be6404 Fri Aug 11 08:07:19 2017 -0600 1502460439 Jens Axboe
27 4ca83dcf4e3b Fri Jul 28 12:23:14 2017 +0200 1501237394 Juergen Gross
28 10231f69eb03 Fri Jul 28 12:23:13 2017 +0200 1501237393 Juergen Gross
29 c138d81163d8 Fri Jul 28 12:23:12 2017 +0200 1501237392 Juergen Gross
... ... ... ... ...
692854 6c26e03b2db4 Sat Apr 16 15:24:17 2005 -0700 1113690257 Paul Mackerras
692855 e378cc16b0d3 Sat Apr 16 15:24:17 2005 -0700 1113690257 Paul Mackerras
692856 6460b4cceba0 Sat Apr 16 15:24:16 2005 -0700 1113690256 Paul Mackerras
692857 35b535d9cc8d Sat Apr 16 15:24:15 2005 -0700 1113690255 Eugene Surovegin
692858 16acbc624e2b Sat Apr 16 15:24:14 2005 -0700 1113690254 Paul Mackerras
692859 0c9b79429c83 Sat Apr 16 15:24:13 2005 -0700 1113690253 James Morris
692860 7e5c6bc0a600 Sat Apr 16 15:24:11 2005 -0700 1113690251 Jean Tourrilhes
692861 6775cab98b89 Sat Apr 16 15:24:10 2005 -0700 1113690250 Herbert Xu
692862 2a27805127ae Sat Apr 16 15:24:09 2005 -0700 1113690249 Arnaldo Carvalho de Melo
692863 9f3786dc8b1d Sat Apr 16 15:24:09 2005 -0700 1113690249 David S. Miller
692864 4c4c402d6cab Sat Apr 16 15:24:08 2005 -0700 1113690248 Martin Hicks
692865 76c3073a888a Sat Apr 16 15:24:07 2005 -0700 1113690247 akpm@osdl.org
692866 323aca6c0bda Sat Apr 16 15:24:06 2005 -0700 1113690246 akpm@osdl.org
692867 79befd0c08c4 Sat Apr 16 15:24:05 2005 -0700 1113690245 Andrea Arcangeli
692868 d345734267db Sat Apr 16 15:24:05 2005 -0700 1113690245 Jeff Moyer
692869 41aac24f8fb5 Sat Apr 16 15:24:04 2005 -0700 1113690244 Dave Airlie
692870 388c69789a2a Sat Apr 16 15:24:03 2005 -0700 1113690243 James Morris
692871 1db7fc75a410 Sat Apr 16 15:24:02 2005 -0700 1113690242 akpm@osdl.org
692872 51410d3c53d8 Sat Apr 16 15:24:01 2005 -0700 1113690241 David S. Miller
692873 5df240826c90 Sat Apr 16 15:24:01 2005 -0700 1113690241 Stas Sergeev
692874 e493073d8d05 Sat Apr 16 15:24:00 2005 -0700 1113690240 akpm@osdl.org
692875 81ddef77bb77 Sat Apr 16 15:23:59 2005 -0700 1113690239 James Bottomley
692876 9ffb7146f0aa Sat Apr 16 15:23:58 2005 -0700 1113690238 Artem B. Bityuckiy
692877 d42ce812b8a3 Sat Apr 16 15:23:57 2005 -0700 1113690237 akpm@osdl.org
692878 7a228aaa879c Sat Apr 16 15:23:57 2005 -0700 1113690237 akpm@osdl.org
692879 7aa52f5128b0 Sat Apr 16 15:23:56 2005 -0700 1113690236 akpm@osdl.org
692880 2d137c24e9f4 Sat Apr 16 15:23:55 2005 -0700 1113690235 akpm@osdl.org
692881 baaa2c512dc1 Sat Apr 16 15:23:54 2005 -0700 1113690234 Neil Brown
692882 8d38eadb7a97 Sat Apr 16 15:23:53 2005 -0700 1113690233 Christoph Lameter
692883 1da177e4c3f4 Sat Apr 16 15:20:36 2005 -0700 1113690036 Linus Torvalds

692884 rows × 4 columns


In [30]:
with open("../../linux/git.log", encoding="latin-1") as log_file:
    log_file.read()
    for i, line in enumerate(log_file.readline()):
        print(i,line)