In [ ]:
'''
Written by Jesse Pisel 2016/2017 jessepisel@gmail.com

This version is written in Python 3, requirements are listed in the cell below

'''

In [5]:
import requests, bs4, re, time, os, wget
import numpy as np
from semantic.numbers import NumberService
import pandas as pd

In [17]:
'''
to start you need to save your query from the COGCC website to a local drive.
Once you have saved it you need to direct the lines below to that path.
It will open that html file and parse out the well API numbers and create a list of links to download them 
'''
with open('...\example\wherever\you\saved\LiveQuery.html', 'r') as f:
    wells=f.read()
linked=[]
links=[]
soup=bs4.BeautifulSoup(wells, "lxml")
for link in soup.find_all('a'):
    links.append(link.get('href'))
for i in range(0,len(links),2):
    linked.append(links[i])
number_of_wells=len(linked)

In [3]:
directional_link=[]
las_link=[]
well_id_list=[]
fail=[]
for k in range(number_of_wells):
    well_id=re.findall(r'\d+',linked[k]) #pulls well API number from the well index
    response1 = requests.get('http://cogcc.state.co.us/weblink/results.aspx?id='+str(well_id[0])) #scrapes the individual well docs page
    soup2=bs4.BeautifulSoup(response1.text, "lxml")
    searching=soup2.find_all('tr') #selects the rows from the scraped data
    try:
        well_page = [t for t in searching if t.findAll(text=re.compile('DIRECTIONAL DATA'))] #selects and assigns the data in the directional row
        well_page_las = [t for t in searching if t.findAll(text=re.compile('LAS'))] #selects and assigns the data in the LAS row
        las_link.append(well_page_las[0].find_all('a')[2].get('href')) #chooses the link for the LAS logging while drilling
        directional_link.append(well_page[1].find_all('a')[2].get('href')) #chooses the link for the directional data
        well_id_list.append(well_id[0])
        
    except:
        fail.append(k)
        pass
    time.sleep(5)
print(str((1-(max(fail)/float(number_of_wells)))*100)+'% of the requested wells were sucessfully added to the list')


Out[3]:
884

In [ ]:
#below you need to set a path to where you want all the logs saved, in this case we make new folders inside the well logs folder
for well in well_id_list:
    newpath = ((r'creates path to .../well logs/%s') % (well)) 
    if not os.path.exists(newpath): os.makedirs(newpath) #makes folders named with the well api missing 50 on the front

In [ ]:
for l in range(len(well_id_list)):
    wget.download('http://cogcc.state.co.us/weblink/'+str(directional_link[l]),
              out='path to .../well logs/'+str(well_id_list[l]))
    wget.download('http://cogcc.state.co.us/weblink/'+str(las_link[l]),
              out='path to .../well logs/'+str(well_id_list[l]))
    time.sleep(10) #do not change this line or you'll get banned