In [1]:
# -*- coding: UTF-8 -*-
# Render our plots inline
%matplotlib inline
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import shutil
pd.set_option('display.max_columns', None) # Display all the columns
# Reference for color palettes: http://web.stanford.edu/~mwaskom/software/seaborn/tutorial/color_palettes.html
# Change the font
matplotlib.rcParams.update({'font.family': 'Source Sans Pro'})
In [2]:
from makerlabs import fablabs_io
from makerlabs import diybio_org
from makerlabs import hackerspaces_org
In [3]:
flio = fablabs_io.get_labs(format="pandas")
In [4]:
# Add the OpenCage API Key here, from https://geocoder.opencagedata.com/api
diybio = diybio_org.get_labs(format="pandas", open_cage_api_key="xxxxxxx")
In [5]:
# Add the OpenCage API Key here, from https://geocoder.opencagedata.com/api
hackerspaces = hackerspaces_org.get_labs(format="pandas", open_cage_api_key="xxxxxxx")
In [6]:
flio.head()
Out[6]:
In [7]:
diybio.head()
Out[7]:
In [8]:
hackerspaces.head()
Out[8]:
In [9]:
flio.to_csv("flio.csv", encoding='utf-8')
In [10]:
hackerspaces.to_csv("hs.csv", encoding='utf-8')
In [11]:
diybio.to_csv("diybio.csv", encoding='utf-8')
In [12]:
flio["country_code"].unique()
Out[12]:
In [13]:
flio["country_code"].value_counts()
Out[13]:
In [14]:
flio["country_code"].value_counts().plot(kind="barh", figsize=(20,30))
Out[14]:
In [15]:
flio["country"].value_counts().plot(kind="barh", figsize=(20,30))
Out[15]:
In [16]:
flio["city"].value_counts().plot(kind="barh", figsize=(20,30))
Out[16]:
In [17]:
flio["continent"].value_counts().plot(kind="barh", figsize=(20,10))
Out[17]:
In [18]:
# Concatenate the three DataFrames, in order to handle global data
total_data = pd.concat([flio, diybio, hackerspaces])
In [19]:
# Number of each type of labs
number_of_labs = total_data["lab_type"].value_counts()
number_of_labs.plot(kind="barh", figsize=(10,5))
Out[19]:
In [20]:
# number_of_labs is a Series, convert it into a DataFrame with a cluster column and save it
csv0 = pd.DataFrame()
csv0["count"] = number_of_labs
csv0["lab_type"] = number_of_labs.index
csv0.insert(0, "cluster", "total")
csv0.set_index('cluster', inplace=True)
csv0 = csv0[["lab_type", "count"]]
In [21]:
csv0
Out[21]:
In [22]:
csv0.to_csv("00.csv", encoding='utf-8')
In [23]:
# Number of each type of labs per continent
csv1 = total_data.groupby(['continent', 'lab_type']).size().to_frame('count')
csv1.plot(kind="barh", figsize=(10,5))
Out[23]:
In [24]:
csv1
Out[24]:
In [25]:
csv1.to_csv("01.csv", encoding='utf-8', header=True)
In [26]:
# Number of each type of labs per country
csv2 = total_data.groupby(['country', 'lab_type']).size().to_frame('count')
csv2.plot(kind="barh", figsize=(10,5))
Out[26]:
In [27]:
csv2.head()
Out[27]:
In [28]:
csv2.to_csv("02. csv", encoding='utf-8', header=True)
In [29]:
# Number of each type of labs per city
csv3 = total_data.groupby(['city', 'lab_type']).size().to_frame('count')
In [30]:
csv3.tail()
Out[30]:
In [31]:
csv3.sort_values(by="count").head()
Out[31]:
In [32]:
csv3.to_csv("03.csv", encoding='utf-8', header=True)
In [33]:
# Count labs
csv4 = flio.groupby(["country", "country_code"])['country'].count().reset_index(name="count").set_index("country")
In [34]:
# Reorder columns for plotly
csv4 = csv4[["count", "country_code"]]
In [35]:
# Check the data
csv4.head()
Out[35]:
In [36]:
# Save file
csv4.to_csv("04.csv", encoding='utf-8', header=True)
In [37]:
# Count labs
csv5 = hackerspaces.groupby(["country", "country_code"])['country'].count().reset_index(name="count").set_index("country")
In [38]:
# Reorder columns for plotly
csv5 = csv5[["count", "country_code"]]
In [39]:
# Check the data
csv5.head()
Out[39]:
In [40]:
# Save file
csv5.to_csv("05.csv", encoding='utf-8', header=True)
In [41]:
# Count labs
csv6 = diybio.groupby(["country", "country_code"])['country'].count().reset_index(name="count").set_index("country")
In [42]:
# Reorder columns for plotly
csv6 = csv6[["count", "country_code"]]
In [43]:
# Check the data
csv6.head()
Out[43]:
In [44]:
# Save file
csv6.to_csv("06.csv", encoding='utf-8', header=True)
In [ ]: