In [1]:
from PyPDF2 import PdfFileReader

In [2]:
pdfDec = PdfFileReader(file( "../2016/12/2016年12月上映影片全國票房資訊_20170203.pdf", "rb"))

In [3]:
pdfDec.documentInfo


Out[3]:
{'/Author': u'shiao',
 '/CreationDate': u"D:20170203152918+08'00'",
 '/Creator': u'Microsoft\xae Excel\xae 2013',
 '/ModDate': u"D:20170203152918+08'00'",
 '/Producer': u'Microsoft\xae Excel\xae 2013'}

In [5]:
pdfDec.getNumPages()


Out[5]:
7

In [40]:
page1 = pdfDec.getPage(pageNumber = 1)
print page1.extractText().encode("ascii", "ignore")


.65/A&1\oY
5

&!&"
5/A#
556
DZsb
!$i
555 

LYb_$

LYb_$
ayg
22
56
+)
2016/11/18

TOEI COMPANY, LTD.
25
13
3,005
692,766
23
56
POKE'MONTHE
<


2016/11/18


32
14
12,499
2,844,260
24
-2iT
 9V4j#
2016/11/18


LA PIEDRA FILMS, S.A.
42
11
1,447
239,500
25
S;&
/kf5c
2016/11/18


47
27
10,206
2,247,718
26
S;&
]ghP
2016/11/18


#

SJ/Y>&fC [

50
37
23,918
5,294,955
27
R&
8E$%
2016/11/18

MIRAMAX UK LIMITED
55
34
6,589
1,316,001
28
?[<

2016/11/21
#"-5/"'ay6

10
2
79
17,320
29
iA=

2016/11/25

5Is




10
1
269
60,180
30
S;&
OO'v
2016/11/25



13
1
679
150,495
31
S;&
#f
2016/11/25


14
3
877
181,080
32


2016/11/25

ARRI MEDIA GMBH
14
2
577
113,930
33
/_&

2016/11/25

BETA  CINEMA GMBH
20
6
1,327
268,740
34
S;&
:J?bD
2016/11/25





21
11
1,825
388,800
35
56

55 Y5
2016/11/25

FUJI CREATIVE CORPORATION
23
6
5,476
1,295,762
36
<}&
PZf1
2016/11/25

EUROPA CORP
26
45
23,472
5,273,898
37
<}&
55_W6
2016/11/25

ELLE DRIVER
27
7
2,390
502,085
g



In [ ]: