In [ ]:
'''
Written by Jesse Pisel 2016/2017 jessepisel@gmail.com
This version is written in Python 3, requirements are listed in the cell below
'''
In [5]:
import requests, bs4, re, time, os, wget
import numpy as np
from semantic.numbers import NumberService
import pandas as pd
In [17]:
'''
to start you need to save your query from the COGCC website to a local drive.
Once you have saved it you need to direct the lines below to that path.
It will open that html file and parse out the well API numbers and create a list of links to download them
'''
with open('...\example\wherever\you\saved\LiveQuery.html', 'r') as f:
wells=f.read()
linked=[]
links=[]
soup=bs4.BeautifulSoup(wells, "lxml")
for link in soup.find_all('a'):
links.append(link.get('href'))
for i in range(0,len(links),2):
linked.append(links[i])
number_of_wells=len(linked)
In [3]:
directional_link=[]
las_link=[]
well_id_list=[]
fail=[]
for k in range(number_of_wells):
well_id=re.findall(r'\d+',linked[k]) #pulls well API number from the well index
response1 = requests.get('http://cogcc.state.co.us/weblink/results.aspx?id='+str(well_id[0])) #scrapes the individual well docs page
soup2=bs4.BeautifulSoup(response1.text, "lxml")
searching=soup2.find_all('tr') #selects the rows from the scraped data
try:
well_page = [t for t in searching if t.findAll(text=re.compile('DIRECTIONAL DATA'))] #selects and assigns the data in the directional row
well_page_las = [t for t in searching if t.findAll(text=re.compile('LAS'))] #selects and assigns the data in the LAS row
las_link.append(well_page_las[0].find_all('a')[2].get('href')) #chooses the link for the LAS logging while drilling
directional_link.append(well_page[1].find_all('a')[2].get('href')) #chooses the link for the directional data
well_id_list.append(well_id[0])
except:
fail.append(k)
pass
time.sleep(5)
print(str((1-(max(fail)/float(number_of_wells)))*100)+'% of the requested wells were sucessfully added to the list')
Out[3]:
In [ ]:
#below you need to set a path to where you want all the logs saved, in this case we make new folders inside the well logs folder
for well in well_id_list:
newpath = ((r'creates path to .../well logs/%s') % (well))
if not os.path.exists(newpath): os.makedirs(newpath) #makes folders named with the well api missing 50 on the front
In [ ]:
for l in range(len(well_id_list)):
wget.download('http://cogcc.state.co.us/weblink/'+str(directional_link[l]),
out='path to .../well logs/'+str(well_id_list[l]))
wget.download('http://cogcc.state.co.us/weblink/'+str(las_link[l]),
out='path to .../well logs/'+str(well_id_list[l]))
time.sleep(10) #do not change this line or you'll get banned