Table Conversions



In [1]:

    
import requests



In [2]:

    
from io import BytesIO



In [3]:

    
import pandas as pd



In [4]:

    
spreadsheet_url = "https://docs.google.com/spreadsheets/d/1WbYov7KrliIvh9Ei485zxPF27Wx7-CYFZliNj3hZ9WE"



In [5]:

    
stream = requests.get("{0}/export?format=xlsx".format(spreadsheet_url))



In [6]:

    
full_table = pd.read_excel(BytesIO(stream.content), sheetname="Project-DB")



In [7]:

    
full_table.head()









    Out[7]:






  
    
      
      url
      title
      kind
      area
      status
      description
      logo
      hashtags
      categories
      orgacontact_name
      ...
      code_repository
      programming_languages
      languages
      organization_type
      organization_name
      code_license
      releasedate
      entrydate
      software_development_needs
      random_generated_key
    
  
  
    
      0
      http://afeefa.de/
      Afeefa
      website
      Dresden
      NaN
      commented map for Dresden, including lots of d...
      NaN
      NaN
      information, map, event, donation, material
      NaN
      ...
      NaN
      NaN
      German, English, Arabic, Farsi, French, Serbia...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      http://asyl-in-moenchengladbach.de
      Asyl in Mönchengladbach
      website
      Mönchengladbach
      NaN
      Vernetzungsportal für Hilfsangebote und Hilfes...
      NaN
      NaN
      coordination, donation, volunteer, information
      NaN
      ...
      NaN
      NaN
      Google Translate
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      http://familylinks.icrc.org/europe/en/Pages/Ho...
      Trace the face
      website
      international
      NaN
      A number of National Red Cross Societies in Eu...
      NaN
      NaN
      coordination, family reunion,
      NaN
      ...
      NaN
      NaN
      English, Arabic, French, Spanish
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      http://germany-says-welcome.de/
      Germany Says Welcome
      Website, App,
      Germany
      NaN
      Helping refugees by providing a map with all a...
      NaN
      NaN
      information, map, volunteer, refugee
      NaN
      ...
      NaN
      Weblanguages, Android Framework, Java (so far)
      English, German (Translation part is Arabic too)
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      http://github.com/finngaida/refugees
      Moin Refugees!
      App
      Hamburg so far
      NaN
      Help refugees and helpers communicate via the ...
      NaN
      NaN
      instant help
      NaN
      ...
      NaN
      Objective-C, Java EE
      English, german
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 32 columns



In [8]:

    
curated = full_table[:38]



In [9]:

    
curated.columns









    Out[9]:





Index(['url', 'title', 'kind', 'area', 'status', 'description', 'logo',
       'hashtags', 'categories', 'orgacontact_name', 'orgacontact_email',
       'orgacontact_language', 'contact_email', 'contact_phone',
       'contact_socialmedia_fb', 'contact_socialmedia_twitter',
       'contact_adress_street', 'contact_adress_housenr',
       'contact_adress_postalcode', 'contact_adress_city',
       'contact_adress_state', 'contact_adress_country', 'code_repository',
       'programming_languages', 'languages', 'organization_type',
       'organization_name', 'code_license', 'releasedate', 'entrydate',
       'software_development_needs', 'random_generated_key'],
      dtype='object')

The following columns are ignored for later processing:

kind, is a category
area, is geospatial information
status, has categories
hashtags, become project tags
categories, need to be merged with our list
orgacontact_*, will become part of contact information
contact_*, will become part of contact information
programming_languages, are categories
languages, part of contact details
organization_type, is a category
entrydate, automatic DB entry
software_development_needs, is now a Boolean 'dev_support_needed' combined with categories of platform, etc.
random_generated_key, should be part of the user information who is entitled to modifying the DB

Create Primary 'projects' Table



In [10]:

    
matching = {
    "url": "web_url",
    "title": "title",
    "description": "description_content",
    "logo": "logo_url",
    "code_repository": "repository_url",
    "organization_name": "organization_name",
    "code_license": "code_license",
    "releasedate": "publishedAt"
}



In [11]:

    
projects_columns = ["id", "user_id", "title", "description_tech", "description_content",
              "code_license", "data_license", "logo_url", "web_url", "repository_url",
              "organization_name", "dev_support_needed", "general_support_needed",
              "publishedAt", "createddAt", "updatedAt"]



In [12]:

    
projects_export = curated[list(matching.keys())].copy()
projects_export.columns = [matching[key] for key in projects_export.columns]
projects_export["id"] = projects_export.index + 1



In [13]:

    
for key in projects_columns:
    if key not in projects_export.columns:
        projects_export[key] = None



In [14]:

    
projects_export.head()









    Out[14]:






  
    
      
      repository_url
      web_url
      code_license
      title
      logo_url
      publishedAt
      organization_name
      description_content
      id
      user_id
      description_tech
      data_license
      dev_support_needed
      general_support_needed
      createddAt
      updatedAt
    
  
  
    
      0
      NaN
      http://afeefa.de/
      NaN
      Afeefa
      NaN
      NaN
      NaN
      commented map for Dresden, including lots of d...
      1
      None
      None
      None
      None
      None
      None
      None
    
    
      1
      NaN
      http://asyl-in-moenchengladbach.de
      NaN
      Asyl in Mönchengladbach
      NaN
      NaN
      NaN
      Vernetzungsportal für Hilfsangebote und Hilfes...
      2
      None
      None
      None
      None
      None
      None
      None
    
    
      2
      NaN
      http://familylinks.icrc.org/europe/en/Pages/Ho...
      NaN
      Trace the face
      NaN
      NaN
      NaN
      A number of National Red Cross Societies in Eu...
      3
      None
      None
      None
      None
      None
      None
      None
    
    
      3
      NaN
      http://germany-says-welcome.de/
      NaN
      Germany Says Welcome
      NaN
      NaN
      NaN
      Helping refugees by providing a map with all a...
      4
      None
      None
      None
      None
      None
      None
      None
    
    
      4
      NaN
      http://github.com/finngaida/refugees
      NaN
      Moin Refugees!
      NaN
      NaN
      NaN
      Help refugees and helpers communicate via the ...
      5
      None
      None
      None
      None
      None
      None
      None



In [15]:

    
projects_export.to_csv("projects.csv", index=False, header=False, columns=projects_columns)

Create 'contact_informations'



In [ ]:

Create 'projects_categories' Table



In [16]:

    
s = pd.DataFrame(curated["categories"].str.split(',').tolist()).stack()
print(len(s))
s.head()









    



89






    Out[16]:





0  0    information
   1            map
   2          event
   3       donation
   4       material
dtype: object



In [17]:

    
s.index = s.index.droplevel(-1)



In [18]:

    
s.name = 'category'



In [19]:

    
s.head()









    Out[19]:





0    information
0            map
0          event
0       donation
0       material
Name: category, dtype: object



In [27]:

    
category_join = projects_export.join(s)



In [28]:

    
category_join.head()









    Out[28]:






  
    
      
      repository_url
      web_url
      code_license
      title
      logo_url
      publishedAt
      organization_name
      description_content
      id
      user_id
      description_tech
      data_license
      dev_support_needed
      general_support_needed
      createddAt
      updatedAt
      category
    
  
  
    
      0
      NaN
      http://afeefa.de/
      NaN
      Afeefa
      NaN
      NaN
      NaN
      commented map for Dresden, including lots of d...
      1
      None
      None
      None
      None
      None
      None
      None
      information
    
    
      0
      NaN
      http://afeefa.de/
      NaN
      Afeefa
      NaN
      NaN
      NaN
      commented map for Dresden, including lots of d...
      1
      None
      None
      None
      None
      None
      None
      None
      map
    
    
      0
      NaN
      http://afeefa.de/
      NaN
      Afeefa
      NaN
      NaN
      NaN
      commented map for Dresden, including lots of d...
      1
      None
      None
      None
      None
      None
      None
      None
      event
    
    
      0
      NaN
      http://afeefa.de/
      NaN
      Afeefa
      NaN
      NaN
      NaN
      commented map for Dresden, including lots of d...
      1
      None
      None
      None
      None
      None
      None
      None
      donation
    
    
      0
      NaN
      http://afeefa.de/
      NaN
      Afeefa
      NaN
      NaN
      NaN
      commented map for Dresden, including lots of d...
      1
      None
      None
      None
      None
      None
      None
      None
      material

Load categories. Create dict to ID. Columns:

project_id
category_node_id



In [22]:

    
category_nodes_translations_columns = ["id", "category_node_id", "title", "locale"]



In [23]:

    
categories_german = pd.read_csv("category_nodes_translations.csv", header=None, names=category_nodes_translations_columns)



In [24]:

    
categories_german.head()









    Out[24]:






  
    
      
      id
      category_node_id
      title
      locale
    
  
  
    
      0
      1
      1
      ...
      de
    
    
      1
      2
      2
      Abschiebung
      de
    
    
      2
      3
      3
      Ankunft
      de
    
    
      3
      4
      4
      Arbeit
      de
    
    
      4
      5
      5
      Art d. V.
      de



In [25]:

    
title2id = dict(categories_german[["title", "category_node_id"]].itertuples(index=False))

Try to find categories from project table in the existing list. Will probably introduce a lot of missing values. A real string search (using different languages) would yield better results.



In [29]:

    
category_join["category_node_id"] = [title2id.get(category) for category in category_join["category"]]



In [32]:

    
project_categories_export = category_join[["id", "category_node_id", "category"]].copy()



In [34]:

    
project_categories_export.rename(columns={"id": "project_id"}, inplace=True)



In [36]:

    
project_categories_export.dropna(subset=["category_node_id"])









    Out[36]:






  
    
      
      project_id
      category_node_id
      category

Empty table.

	url	title	kind	area	status	description	logo	hashtags	categories	orgacontact_name	...	code_repository	programming_languages	languages	organization_type	organization_name	code_license	releasedate	entrydate	software_development_needs	random_generated_key
0	http://afeefa.de/	Afeefa	website	Dresden	NaN	commented map for Dresden, including lots of d...	NaN	NaN	information, map, event, donation, material	NaN	...	NaN	NaN	German, English, Arabic, Farsi, French, Serbia...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	http://asyl-in-moenchengladbach.de	Asyl in Mönchengladbach	website	Mönchengladbach	NaN	Vernetzungsportal für Hilfsangebote und Hilfes...	NaN	NaN	coordination, donation, volunteer, information	NaN	...	NaN	NaN	Google Translate	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	http://familylinks.icrc.org/europe/en/Pages/Ho...	Trace the face	website	international	NaN	A number of National Red Cross Societies in Eu...	NaN	NaN	coordination, family reunion,	NaN	...	NaN	NaN	English, Arabic, French, Spanish	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	http://germany-says-welcome.de/	Germany Says Welcome	Website, App,	Germany	NaN	Helping refugees by providing a map with all a...	NaN	NaN	information, map, volunteer, refugee	NaN	...	NaN	Weblanguages, Android Framework, Java (so far)	English, German (Translation part is Arabic too)	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	http://github.com/finngaida/refugees	Moin Refugees!	App	Hamburg so far	NaN	Help refugees and helpers communicate via the ...	NaN	NaN	instant help	NaN	...	NaN	Objective-C, Java EE	English, german	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	id	category_node_id	title	locale
0	1	1	...	de
1	2	2	Abschiebung	de
2	3	3	Ankunft	de
3	4	4	Arbeit	de
4	5	5	Art d. V.	de