In [69]:
import pandas as pd
import bs4 as bs
In [108]:
dfs=pd.read_html('https://en.wikipedia.org/wiki/Research_stations_in_Antarctica#List_of_research_stations')
In [109]:
dfr=pd.read_html('https://en.wikipedia.org/wiki/Antarctic_field_camps')
In [110]:
df=dfs[1][1:]
In [111]:
df.columns=dfs[1].loc[0].values
In [112]:
df.to_excel('bases.xlsx')
In [113]:
import requests
In [114]:
url='https://en.wikipedia.org/wiki/Research_stations_in_Antarctica'
f=requests.get(url).content
soup = bs.BeautifulSoup(f, 'lxml')
parsed_table = soup.find_all('table')[1]
data = [[''.join(td.strings)+'#'+td.a['href'] if td.find('a') else
''.join(td.strings)
for td in row.find_all('td')]
for row in parsed_table.find_all('tr')]
headers=[''.join(row.strings)
for row in parsed_table.find_all('th')]
df = pd.DataFrame(data[1:], columns=headers)
In [279]:
stations=[]
for i in df.T.iteritems():
helper={}
dummy=i[1][0].split('#')
dummy0=dummy[0].split('[')[0].replace('\n',' ').replace('\n',' ').replace('\n',' ')
helper['name']=dummy0
helper['link']='https://en.wikipedia.org'+dummy[1]
dummy=i[1][2].replace('\n',' ').replace('\n',' ').replace('\n',' ')
if 'ummer since' in dummy:dummy='Permanent'
dummy=dummy.split('[')[0]
if 'emporary summer' in dummy:dummy='Summer'
if 'intermittently Summer' in dummy:dummy='Summer'
helper['type']=dummy
dummy=i[1][3].split('#')[0].replace('\n',' |').replace(']','').replace('| |','|')[1:]
if '' == dummy:dummy='Greenpeace'
helper['country']=dummy
dummy=i[1][4].replace('\n',' ').replace('\n',' ').replace('\n',' ').split(' ')[0]
if 'eteo' in dummy:dummy='1958'
helper['opened']=dummy
dummy=i[1][5].split('#')[0].replace('\n',' | ').replace('| and |','|').split('[')[0].replace('.','')
helper['program']=dummy
dummy=i[1][6].split('#')[0].replace('\n',', ').replace('| and |','|').split('[')[0].replace('.','')
helper['location']=dummy
dummy=i[1][7].replace('\n',' ')
if ' ' in dummy:
if 'Active' in dummy: dummy='Active'
elif 'Relocated to Union Glacier' in dummy: dummy='2014'
elif 'Unmanned activity' in dummy: dummy='Active'
elif 'Abandoned and lost' in dummy: dummy='1999'
elif 'Dismantled 1992' in dummy: dummy='1992'
elif 'Temporary abandoned since March 2017' in dummy: dummy='Active'
elif 'Reopened 23 November 2017' in dummy: dummy='Active'
elif 'Abandoned and lost' in dummy: dummy='1999'
else: dummy=dummy.split(' ')[1]
if dummy=='Active':
helper['active']=True
helper['closed']='9999'
else:
helper['active']=False
helper['closed']=dummy
if dummy=='Closed':
helper['active']=True
helper['closed']='9999'
dummy=i[1][8].replace('\n',', ').split('/')[2].split('(')[0].split('#')[0].split(',')[0].split('Coor')[0].split(u'\ufeff')[0].split(';')
helper['latitude']=dummy[0][1:]
helper['longitude']=dummy[1][1:]#.replace(' 0',' 0.001')[1:]
stations.append(helper)
In [280]:
dta=pd.DataFrame(stations)
dta.to_excel('stations.xlsx')
In [281]:
import cesiumpy
In [282]:
dta
Out[282]:
In [283]:
iso2=pd.read_html('https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2')[2]
In [284]:
iso22=iso2[1:].set_index(1)[[0]]
In [285]:
def cc(c):
d=c.split('|')[0].strip()
if d=='Czech Republic': return 'CZ'
elif d=='Greenpeace': return 'AQ'
elif d=='Soviet Union': return 'RU'
elif d=='Russia': return 'RU'
elif d=='United States': return 'US'
elif d=='East Germany': return 'DE'
elif d=='United Kingdom': return 'GB'
elif d=='South Korea': return 'KR'
else: return iso22.loc[d][0]
flags=[]
for i in dta['country']:
flags.append('flags/glass2/'+cc(i).lower()+'.png')
In [286]:
dta['flag']=flags
In [287]:
dta[['name','link','active','type']].to_excel('links.xlsx')
Manually filled pop.xlsx
In [292]:
pop=pd.read_excel('pop.xlsx')
In [293]:
dta['summer']=pop['summer']
dta['winter']=pop['winter']
In [294]:
dta.to_excel('alldata.xlsx')
In [430]:
dta.set_index('name').T.to_json('antarctica.json')
In [431]:
v = cesiumpy.Viewer(animation=False, baseLayerPicker=True, fullscreenButton=True,
geocoder=False, homeButton=False, infoBox=True, sceneModePicker=True,
selectionIndicator=True, navigationHelpButton=False,
timeline=False, navigationInstructionsInitiallyVisible=True)
x=dta[dta['active']]
for i, row in x.iterrows():
r=0.7
t=10000
lon=float(row['longitude'])
lat=float(row['latitude'])
l0 = float(1**r)*t
cyl = cesiumpy.Cylinder(position=[lon, lat, l0/2.], length=l0,
topRadius=2.5e4, bottomRadius=2.5e4, material='grey',\
name=row['name'])
v.entities.add(cyl)
l1 = (float(row['summer'])**r)*t
cyl = cesiumpy.Cylinder(position=[lon, lat, l1/2.], length=l1*1.1,
topRadius=3e4, bottomRadius=3e4, material='crimson',\
name=row['name'])
v.entities.add(cyl)
l2 = float(row['winter']**r)*t
cyl = cesiumpy.Cylinder(position=[lon, lat, l2/2.], length=l2*1.2,
topRadius=6e4, bottomRadius=6e4, material='royalBlue',\
name=row['name'])
v.entities.add(cyl)
pin = cesiumpy.Pin.fromText(row['name'], color=cesiumpy.color.GREEN)
b = cesiumpy.Billboard(position=[float(row['longitude']), float(row['latitude']), l1*1.1+70000], \
image = row['flag'], scale=0.6,\
name=row['name'], pixelOffset = (0,0))
v.entities.add(b)
label = cesiumpy.Label(position=[float(row['longitude']), float(row['latitude']), l1*1.1+70000],\
text=row['name'], scale=0.6, name=row['name'],
pixelOffset = (0,22))
v.entities.add(label)
with codecs.open("index.html", "w", encoding="utf-8") as f:
f.write(v.to_html())
v
Out[431]: