In [1]:
from wand.image import Image as WImage
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from PIL import Image
%matplotlib inline

In [2]:
chandon_log = WImage(filename='../data/Chandon1_WCR.pdf')

In [3]:
# Converting first page into JPG
chandon_log.save(filename="./temp.jpg")
chandon_jpg = Image.open('temp.jpg')
plt.imshow(chandon_jpg)


Out[3]:
<matplotlib.image.AxesImage at 0x7fb51f6d15c0>

In [4]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/ubuntu/gkey/raslas-859d594053e6.json"

In [5]:
import io
import os

# Imports the Google Cloud client library
from google.cloud import vision
from google.cloud.vision import types

# Instantiates a client
client = vision.ImageAnnotatorClient()

In [6]:
# The name of the image file to annotate
Chandonjpg = 'temp.jpg'

# Loads the image into memory
with io.open(Chandonjpg, 'rb') as image_file:
    content = image_file.read()

image = types.Image(content=content)

# Performs label detection on the image file
response = client.label_detection(image=image)
labels = response.label_annotations

print('Labels:')
for label in labels:
    print(label.description)


Labels:
text
line
area
font
angle
diagram
point
plot
parallel

In [7]:
def detect_text(path):
    """Detects text in the file."""
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = types.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    print('Texts:')
    ltext = []
    lvs = []
    for text in texts:
        print('\n"{}"'.format(text.description))
        ltext.append(text.description)
        vertices = (['({},{})'.format(vertex.x, vertex.y)
                    for vertex in text.bounding_poly.vertices])
        lvs.append(vertices)
        print('bounds: {}'.format(','.join(vertices)))
        
    return ltext, lvs

In [8]:
lt, lv = detect_text(Chandonjpg)


Texts:

"Evaluation Summary Plot
Well: CHANDON-1
LATITUDE: 019 34' 32.210" S
LONGITUDE: 114 07' 41.250" E
X COORDINATE:-
Y COORDINATE:
HORIZONTAL UNITS: METRES
MEASUREMENT REF. (KB/RT):-
TD DRILLER: 3124.0
VERTICAL UNITS: METRES
DATE LOGGED: 30-Jun-2006
VERTICAL SCALE: 1:500
DATE PLOTTED: 24-Jan-2007
GeneralDepth ResistvityPorosity&
Uithelegy
Core &
Test Dot0
Formation Pore Volume
Permeability Pay Summery Daio
Date
Break Down SaturationsMobilayand Intervols
2775
2875
2925
2950
> 10cc
2975
3025
3050
3075
RLO.ER Cer
panst nv(sireux corected〕
Evaluation Summary Plot
Well: CHANDON-1
"MUNGAROO FM UPPER"
A SAND BASE
Evaluated by
"
bounds: (39,56),(799,56),(799,3145),(39,3145)

"Evaluation"
bounds: (268,57),(409,57),(409,81),(268,81)

"Summary"
bounds: (420,57),(547,57),(547,85),(420,85)

"Plot"
bounds: (557,56),(610,56),(610,80),(557,80)

"Well:"
bounds: (271,123),(349,123),(349,149),(271,149)

"CHANDON-1"
bounds: (362,123),(576,123),(576,149),(362,149)

"LATITUDE:"
bounds: (90,192),(146,192),(146,202),(90,202)

"019"
bounds: (155,192),(178,192),(178,202),(155,202)

"34'"
bounds: (188,192),(205,192),(205,202),(188,202)

"32.210""
bounds: (214,192),(262,192),(262,202),(214,202)

"S"
bounds: (271,192),(277,192),(277,202),(271,202)

"LONGITUDE:"
bounds: (90,221),(158,221),(158,233),(90,233)

"114"
bounds: (164,221),(189,221),(189,233),(164,233)

"07'"
bounds: (197,221),(217,221),(217,233),(197,233)

"41.250""
bounds: (222,221),(273,221),(273,233),(222,233)

"E"
bounds: (278,221),(287,221),(287,233),(278,233)

"X"
bounds: (90,249),(96,249),(96,258),(90,258)

"COORDINATE:-"
bounds: (105,249),(200,249),(200,259),(105,259)

"Y"
bounds: (90,277),(96,277),(96,286),(90,286)

"COORDINATE"
bounds: (105,277),(180,277),(180,287),(105,287)

":"
bounds: (181,280),(182,280),(182,287),(181,287)

"HORIZONTAL"
bounds: (90,305),(163,305),(163,315),(90,315)

"UNITS:"
bounds: (173,305),(209,305),(209,315),(173,315)

"METRES"
bounds: (219,305),(266,305),(266,315),(219,315)

"MEASUREMENT"
bounds: (445,192),(534,192),(534,204),(445,204)

"REF."
bounds: (540,192),(566,192),(566,204),(540,204)

"(KB/RT):-"
bounds: (575,192),(647,192),(647,204),(575,204)

"TD"
bounds: (445,221),(457,221),(457,231),(445,231)

"DRILLER:"
bounds: (465,221),(516,221),(516,231),(465,231)

"3124.0"
bounds: (526,221),(568,221),(568,231),(526,231)

"VERTICAL"
bounds: (445,249),(499,249),(499,259),(445,259)

"UNITS:"
bounds: (507,249),(544,249),(544,259),(507,259)

"METRES"
bounds: (554,249),(601,249),(601,259),(554,259)

"DATE"
bounds: (445,277),(471,277),(471,287),(445,287)

"LOGGED:"
bounds: (481,277),(531,277),(531,287),(481,287)

"30-Jun-2006"
bounds: (540,277),(629,277),(629,287),(540,287)

"VERTICAL"
bounds: (445,305),(499,305),(499,315),(445,315)

"SCALE:"
bounds: (507,305),(547,305),(547,315),(507,315)

"1:500"
bounds: (557,305),(589,305),(589,315),(557,315)

"DATE"
bounds: (443,333),(471,333),(471,343),(443,343)

"PLOTTED:"
bounds: (479,333),(535,333),(535,343),(479,343)

"24-Jan-2007"
bounds: (543,333),(633,333),(633,343),(543,343)

"GeneralDepth"
bounds: (39,376),(136,376),(136,386),(39,386)

"ResistvityPorosity&"
bounds: (148,376),(300,376),(300,386),(148,386)

"Uithelegy"
bounds: (253,387),(297,389),(297,397),(253,395)

"Core"
bounds: (341,376),(361,376),(361,384),(341,384)

"&"
bounds: (371,376),(376,376),(376,384),(371,384)

"Test"
bounds: (336,387),(353,387),(353,396),(336,396)

"Dot0"
bounds: (362,387),(383,387),(383,396),(362,396)

"Formation"
bounds: (421,376),(469,376),(469,385),(421,385)

"Pore"
bounds: (479,376),(516,376),(516,385),(479,385)

"Volume"
bounds: (525,376),(561,376),(561,385),(525,385)

"Permeability"
bounds: (648,376),(706,377),(706,385),(648,384)

"Pay"
bounds: (716,377),(730,377),(730,385),(716,385)

"Summery"
bounds: (736,377),(774,378),(774,386),(736,385)

"Daio"
bounds: (782,379),(798,379),(798,387),(782,387)

"Date"
bounds: (47,387),(68,387),(68,396),(47,396)

"Break"
bounds: (498,385),(522,385),(522,394),(498,394)

"Down"
bounds: (531,385),(555,385),(555,394),(531,394)

"SaturationsMobilayand"
bounds: (567,385),(743,387),(743,396),(567,394)

"Intervols"
bounds: (749,388),(782,388),(782,397),(749,397)

"2775"
bounds: (115,863),(138,863),(138,873),(115,873)

"2875"
bounds: (114,1429),(138,1430),(138,1439),(114,1438)

"2925"
bounds: (114,1713),(139,1713),(139,1723),(114,1723)

"2950"
bounds: (114,1855),(137,1855),(137,1864),(114,1864)

">"
bounds: (347,1887),(350,1887),(349,1896),(346,1896)

"10cc"
bounds: (362,1890),(384,1892),(383,1898),(361,1896)

"2975"
bounds: (114,1996),(136,1998),(135,2006),(113,2004)

"3025"
bounds: (115,2280),(138,2280),(138,2289),(115,2289)

"3050"
bounds: (115,2421),(136,2421),(136,2431),(115,2431)

"3075"
bounds: (114,2564),(137,2564),(137,2573),(114,2573)

"RLO.ER"
bounds: (285,3020),(327,3020),(327,3026),(285,3026)

"Cer"
bounds: (335,3018),(347,3018),(347,3025),(335,3025)

"panst"
bounds: (358,3020),(389,3020),(389,3026),(358,3026)

"nv"
bounds: (394,3018),(406,3018),(406,3027),(394,3027)

"("
bounds: (413,3018),(414,3018),(414,3027),(413,3027)

"sireux"
bounds: (416,3018),(441,3018),(441,3027),(416,3027)

"corected"
bounds: (446,3020),(485,3020),(485,3026),(446,3026)

"〕"
bounds: (487,3018),(488,3018),(488,3027),(487,3027)

"Evaluation"
bounds: (590,3045),(654,3045),(654,3055),(590,3055)

"Summary"
bounds: (659,3045),(718,3045),(718,3057),(659,3057)

"Plot"
bounds: (725,3045),(748,3045),(748,3055),(725,3055)

"Well:"
bounds: (597,3067),(631,3067),(631,3079),(597,3079)

"CHANDON-1"
bounds: (637,3067),(732,3067),(732,3079),(637,3079)

""MUNGAROO"
bounds: (587,3086),(668,3086),(668,3095),(587,3095)

"FM"
bounds: (673,3086),(690,3086),(690,3095),(673,3095)

"UPPER""
bounds: (695,3086),(740,3086),(740,3095),(695,3095)

"A"
bounds: (622,3111),(628,3111),(628,3121),(622,3121)

"SAND"
bounds: (633,3111),(662,3111),(662,3121),(633,3121)

"BASE"
bounds: (672,3111),(701,3111),(701,3121),(672,3121)

"Evaluated"
bounds: (525,3133),(573,3133),(573,3145),(525,3145)

"by"
bounds: (579,3133),(589,3133),(589,3145),(579,3145)

In [9]:
lv[0][0].strip('(').strip(')').split(',')


Out[9]:
['39', '56']

In [10]:
lt[0]


Out[10]:
'Evaluation Summary Plot\nWell: CHANDON-1\nLATITUDE: 019 34\' 32.210" S\nLONGITUDE: 114 07\' 41.250" E\nX COORDINATE:-\nY COORDINATE:\nHORIZONTAL UNITS: METRES\nMEASUREMENT REF. (KB/RT):-\nTD DRILLER: 3124.0\nVERTICAL UNITS: METRES\nDATE LOGGED: 30-Jun-2006\nVERTICAL SCALE: 1:500\nDATE PLOTTED: 24-Jan-2007\nGeneralDepth ResistvityPorosity&\nUithelegy\nCore &\nTest Dot0\nFormation Pore Volume\nPermeability Pay Summery Daio\nDate\nBreak Down SaturationsMobilayand Intervols\n2775\n2875\n2925\n2950\n> 10cc\n2975\n3025\n3050\n3075\nRLO.ER Cer\npanst nv(sireux corected〕\nEvaluation Summary Plot\nWell: CHANDON-1\n"MUNGAROO FM UPPER"\nA SAND BASE\nEvaluated by\n'

In [11]:
import matplotlib.pyplot as plt
img = plt.imread("temp.jpg")
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(img)
for i in range(len(lt)):
    if i!=0:
        tmp = lv[i][0].strip('(').strip(')').split(',')
        ax.text(float(tmp[0]),float(tmp[1]),lt[i],color="r")

plt.gcf().set_size_inches(14,10)
# plt.show()
plt.savefig("finalfull.jpg")



In [ ]: