This file covers german power plants. It downloads the power plant list from the German Federal Network Agency (BNetzA) and augments it with more information.
In [ ]:
# Import all functions from external file
from download_and_process_DE_functions import *
# Jupyter functions
%matplotlib inline
The original data can either be downloaded from the original data sources as specified below or from the opsd-Server. Default option is to download from the original sources as the aim of the project is to stay as close to original sources as possible. However, if problems with downloads e.g. due to changing urls occur, you can still run the script with the original data from the opsd_server.
In [ ]:
download_from = 'original_sources'
#download_from = 'opsd_server'
In [ ]:
if download_from == 'original_sources':
# BNetzA Power plant list
url_bnetza = ('http://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/'
'Sachgebiete/Energie/Unternehmen_Institutionen/Versorgungssicherheit/'
'Erzeugungskapazitaeten/Kraftwerksliste/Kraftwerksliste_CSV.csv'
'?__blob=publicationFile&v=16')
# UBA Power plant list
url_uba = ('https://www.umweltbundesamt.de/sites/default/files/medien/372/dokumente/kraftwerke_de_ab_100_mw_0.xls')
In [ ]:
if download_from == 'opsd_server':
# Specify direction to original_data folder on the opsd data server
# BNetzA Power plant list
url_bnetza = 'http://data.open-power-system-data.org/conventional_power_plants/'
url_bnetza = url_bnetza + '2018-12-20'
url_bnetza = url_bnetza +'/original_data/Kraftwerksliste_CSV.csv'
# UBA Power plant list
url_uba = 'http://data.open-power-system-data.org/conventional_power_plants/'
url_uba = url_uba + '2018-12-20'
url_uba = url_uba +'/original_data/kraftwerke_de_ab_100_mw_0.xls'
Functions used multiple times within this script are now located in a separate file called download_and_process_DE_functions.py
This section downloads the BNetzA power plant list and converts it to a pandas data frame
In [ ]:
plantlist = getbnetzalist(url_bnetza)
# clean unnamed columns
plantlist.drop([c for c in plantlist.columns if 'Unnamed:' in c], axis=1, inplace=True)
plantlist.head()
This section downloads the power plant list from the German Federal Environment Agency (UBA) and converts it to a pandas data frame.
In [ ]:
plantlist_uba = getubalist(url_uba)
plantlist_uba.head()
A dictionary with the original column names to the new column names is created. This dictionary is used to translate the column names.
In [ ]:
dict_columns = {
'Kraftwerksnummer Bundesnetzagentur':
'id',
'Unternehmen':
'company',
'Kraftwerksname':
'name',
'PLZ\n(Standort Kraftwerk)':
'postcode',
'Ort\n(Standort Kraftwerk)':
'city',
'Straße und Hausnummer (Standort Kraftwerk)':
'street',
'Bundesland':
'state',
'Blockname':
'block',
('Aufnahme der kommerziellen Stromerzeugung der derzeit in Betrieb '
'befindlichen Erzeugungseinheit\n(Datum/Jahr)'):
'commissioned',
('Kraftwerksstatus \n(in Betrieb/\nvorläufig stillgelegt/\nsaisonale '
'Konservierung\nReservekraftwerk/\nSonderfall)'):
'status',
('Kraftwerksstatus \n(in Betrieb/\nvorläufig stillgelegt/\nsaisonale '
'Konservierung\nGesetzlich an Stilllegung gehindert/\nSonderfall)'):
'status',
('Kraftwerksstatus \n(in Betrieb/\nvorläufig stillgelegt/\nsaisonale '
'Konservierung\nNetzreserve/ Sicherheitsbereitschaft/\nSonderfall)'):
'status',
'Energieträger':
'fuel_basis',
('Spezifizierung "Mehrere Energieträger" und "Sonstige Energieträger" - '
'Hauptbrennstoff'): 'fuel_multiple1',
'Spezifizierung "Mehrere Energieträger" - Zusatz- / Ersatzbrennstoffe':
'fuel_multiple2',
('Auswertung\nEnergieträger (Zuordnung zu einem Hauptenergieträger bei '
'Mehreren Energieträgern)'):
'fuel',
'Vergütungsfähig nach EEG\n(ja/nein)':
'eeg',
'Wärmeauskopplung (KWK)\n(ja/nein)':
'chp',
'Netto-Nennleistung (elektrische Wirkleistung) in MW':
'capacity',
('Bezeichnung Verknüpfungspunkt (Schaltanlage) mit dem Stromnetz der '
'Allgemeinen Versorgung gemäß Netzbetreiber'):
'network_node',
'Netz- oder Umspannebene des Anschlusses in kV':
'voltage',
'Name Stromnetzbetreiber':
'network_operator',
'Kraftwerksname / Standort':
'uba_name',
'Betreiber ':
'uba_company',
'Standort-PLZ':
'uba_postcode',
'Kraftwerksstandort':
'uba_city',
'Elektrische Bruttoleistung (MW)':
'uba_capacity',
'Fernwärme-leistung (MW)':
'uba_chp_capacity',
'Inbetriebnahme (ggf. Ertüchtigung)':
'uba_commissioned',
'Anlagenart':
'uba_technology',
'Primärenergieträger':
'uba_fuel',
}
plantlist.rename(columns=dict_columns, inplace=True)
# Check if all columns have been translated
for columnnames in plantlist.columns:
# if columnnames not in dict_columns.values():
if columnnames not in dict_columns.values():
logger.error("Untranslated column: "+ columnnames)
In [ ]:
# first remove line breaks
plantlist['fuel'] = plantlist['fuel'].str.replace('\n', ' ')
# Delete entries without fuel and name
plantlist = plantlist.dropna(subset = ['fuel','name'])
dict_fuels = {
'Steinkohle': 'Hard coal',
'Erdgas': 'Natural gas',
'Braunkohle': 'Lignite',
'Kernenergie': 'Nuclear',
'Pumpspeicher': 'Hydro PSP',
'Biomasse': 'Biomass and biogas',
'Mineralölprodukte': 'Oil',
'Laufwasser': 'Hydro',
'Sonstige Energieträger (nicht erneuerbar) ': 'Other fuels',
'Abfall': 'Waste',
'Speicherwasser (ohne Pumpspeicher)': 'Hydro reservoir',
'Unbekannter Energieträger (nicht erneuerbar)': 'Other fuels',
'Sonstige Energieträger (nicht erneuerbar)': 'Other fuels',
'Mehrere Energieträger (nicht erneuerbar)': 'Mixed fossil fuels',
'Deponiegas': 'Sewage and landfill gas',
'Windenergie (Onshore-Anlage)': 'Onshore',
'Windenergie (Onshore-Anlage)neu': 'Onshore',
'Windenergie (Offshore-Anlage)': 'Offshore',
'Solare Strahlungsenergie': 'Solar',
'Klärgas': 'Sewage and landfill gas',
'Geothermie': 'Geothermal',
'Grubengas': 'Other fossil fuels',
'Sonstige Speichertechnologien': 'Storage Technologies'
}
plantlist["fuel"].replace(dict_fuels, inplace=True)
# Check if all fuels have been translated
for fuelnames in plantlist["fuel"].unique():
if fuelnames not in dict_fuels.values():
print(dict_fuels.values(), fuelnames)
logger.error("Untranslated fuel: " + fuelnames)
In [ ]:
dict_plantstatus = {
'in Betrieb': 'operating',
'In Betrieb': 'operating',
'vorläufig stillgelegt': 'shutdown_temporary',
'Vorläufig stillgelegt': 'shutdown_temporary',
'Vorläufig Stillgelegt': 'shutdown_temporary',
'Sonderfall': 'special_case',
'saisonale Konservierung': 'seasonal_conservation',
'Saisonale Konservierung': 'seasonal_conservation',
'Reservekraftwerk':'reserve',
'Endgültig Stillgelegt 2011': 'shutdown_2011',
'Endgültig Stillgelegt 2012': 'shutdown_2012',
'Endgültig Stillgelegt 2013': 'shutdown_2013',
'Endgültig Stillgelegt 2014': 'shutdown_2014',
'Endgültig Stillgelegt 2015': 'shutdown_2015',
'Endgültig stillgelegt 2015': 'shutdown_2015',
'Endgültig Stillgelegt 2016': 'shutdown_2016',
'Gesetzlich an Stilllegung gehindert': 'operating',
'Endgültig Stillgelegt 2011 (ohne StA)': 'shutdown_2011',
'Endgültig Stillgelegt 2012 (ohne StA)': 'shutdown_2012',
'Endgültig Stillgelegt 2013 (mit StA)': 'shutdown_2013',
'Endgültig Stillgelegt 2013 (ohne StA)': 'shutdown_2013',
'Endgültig Stillgelegt 2014 (mit StA)': 'shutdown_2014',
'Endgültig Stillgelegt 2014 (ohne StA)': 'shutdown_2014',
'Endgültig Stillgelegt 2015 (mit StA)': 'shutdown_2015',
'Endgültig Stillgelegt 2015 (ohne StA)': 'shutdown_2015',
'Endgültig Stillgelegt 2016 (mit StA)': 'shutdown_2016',
'Sicherheitsbereitschaft': 'reserve',
'Vorläufig Stillgelegt (mit StA)': 'shutdown_temporary',
'Vorläufig Stillgelegt (ohne StA)': 'shutdown_temporary',
'Endgültig Stillgelegt 2016 (ohne StA)': 'shutdown_2016',
'Endgültig Stillgelegt 2017 (mit StA)' : 'shutdown_2017',
'Endgültig Stillgelegt 2017 (ohne StA)': 'shutdown_2017',
'Endgültig Stillgelegt 2018 (mit StA)' : 'shutdown_2018',
'Endgültig Stillgelegt 2018 (ohne StA)': 'shutdown_2018',
'gesetzlich an Stilllegung gehindert' : 'operating',
'Netzreserve' : 'reserve',
}
plantlist['status'].replace(dict_plantstatus, inplace=True)
# Check if all fuels have been translated
for statusnames in plantlist['status'].unique():
if statusnames not in dict_plantstatus.values():
logger.error('Untranslated plant status: '+ statusnames)
In [ ]:
dict_yesno ={
'Nein': 'no',
'nein': 'no',
'Ja': 'yes',
'ja': 'yes',
}
plantlist['chp'].replace(dict_yesno, inplace=True)
# Check if all fuels have been translated
for chpnames in plantlist['chp'].unique():
if (chpnames not in dict_yesno.values()) & (str(chpnames) != 'nan'):
logger.error('Untranslated chp capability: ' + str(chpnames))
In [ ]:
plantlist['eeg'].replace(dict_yesno, inplace=True)
# Check if all fuels have been translated
for eegnames in plantlist['eeg'].unique():
if (eegnames not in dict_yesno.values()) & (str(eegnames) != 'nan'):
logger.error('Untranslated EEG type: ' + str(eegnames))
Translate the UBA Column names
In [ ]:
dict_uba_columns = {
'Kraftwerksname / Standort': 'uba_name',
'Betreiber ': 'uba_company',
'Standort-PLZ': 'uba_postcode',
'Kraftwerksstandort': 'uba_city',
'Elektrische Bruttoleistung (MW)': 'uba_capacity',
'Fernwärme-leistung (MW)': 'uba_chp_capacity',
'Inbetriebnahme (ggf. Ertüchtigung)': 'uba_commissioned',
'Anlagenart': 'uba_technology',
'Primärenergieträger': 'uba_fuel',
'Bundesland':'uba_state',
}
plantlist_uba.rename(columns=dict_uba_columns, inplace=True)
# Check if all columns have been translated
for columnnames in plantlist_uba.columns:
if columnnames not in dict_uba_columns.values():
logger.error('Untranslated column: ' + columnnames)
# Prepare for matching
plantlist_uba['uba_id_string'] = (plantlist_uba['uba_name']
+ '_'
+ plantlist_uba['uba_fuel'])
In [ ]:
# Set Index of BNetzA power plant list to Kraftwerksnummer_Bundesnetzagentur
plantlist['bnetza_id'] = plantlist['id']
plantlist = plantlist.set_index('id')
# remove line breaks in some columns
plantlist['network_node'] = plantlist['network_node'].str.replace('\n', ' ')
plantlist['company'] = plantlist['company'].str.replace('\n', ' ')
plantlist['name'] = plantlist['name'].str.replace('\n', ' ')
plantlist['fuel'] = plantlist['fuel'].str.replace('\n', ' ')
plantlist['block'] = plantlist['block'].str.replace('\n', ' ')
plantlist['network_operator'] = plantlist['network_operator'].str.replace('\n', ' ')
plantlist['street'] = plantlist['street'].str.replace('\n', ' ')
plantlist['commissioned'] = plantlist['commissioned'].str.replace('\n', ' ')
plantlist.head()
Manual adjustments:
In [ ]:
plantlist.loc[plantlist['bnetza_id'] == 'BNA0834', 'fuel'] = 'Natural gas'
plantlist.loc[plantlist['bnetza_id'] == 'BNA0662a', 'fuel'] = 'Hard coal'
plantlist.loc[plantlist['bnetza_id'] == 'BNA0662b', 'fuel'] = 'Hard coal'
In this section a hand-researched list is used to match the power plants from the UBA list to the BNetzA list.
In [ ]:
# read matching list
matchinglist = getmatchinglist()
matchinglist.head()
Matching: 1-1 One BNetzA ID to one UBA-ID
In [ ]:
match1t1 = matchinglist[
(matchinglist.duplicated(subset=['uba_id_string'], keep=False) == False)
& (matchinglist.duplicated(subset=['ID BNetzA'], keep=False) == False)]
match1t1 = pd.merge(match1t1, plantlist_uba,
left_on='uba_id_string',
right_on='uba_id_string',
how='left')
match1t1 = match1t1.set_index('ID BNetzA')
#Add comment
match1t1['merge_comment'] = ('List matching type: Single UBA power plant '
'assigned to single BNetzA power plant')
match1t1.head()
Match multiple BNetza IDs to one UBA ID
In [ ]:
# Matching structure (example):
# bnetza_id uba_id
# 1 1
# 2 1
# 3 1
# 4 2
# 5 2
# Get relevant entries from the matchinglist and merge the corresponding
# UBA Data to the list.
matchnt1= matchinglist[
(matchinglist.duplicated(subset=['uba_id_string'], keep=False) == True)
& (matchinglist.duplicated(subset=['ID BNetzA'], keep=False)== False)]
matchnt1 = pd.merge(matchnt1, plantlist_uba,
left_on='uba_id_string', right_on='uba_id_string', how='left')
matchnt1 = matchnt1.set_index('ID BNetzA')
# Import BNetzA Capacities and CHP criterion into matchnt1 dataframe
plantlist_capacities = pd.DataFrame(plantlist[['capacity', 'chp']]).rename(
columns={'capacity': 'capacity_bnetza', 'chp': 'chp_bnetza'})
matchnt1 = pd.merge(matchnt1, plantlist_capacities,
left_index=True, right_index=True, how='left')
# Get sum of BNetzA Capacitites for each UBA Index and merge into matchnt1 dataframe
plantlist_uba_capacitysum = pd.DataFrame(
matchnt1.groupby('uba_id_string').sum()['capacity_bnetza']).rename(
columns={'capacity_bnetza': 'capacity_bnetza_aggregate'})
matchnt1 = pd.merge(matchnt1, plantlist_uba_capacitysum,
left_on='uba_id_string', right_index=True, how='left')
# Scale UBA Capacities based BNetzA Data
matchnt1['uba_capacity_scaled'] = (matchnt1['uba_capacity']
* matchnt1['capacity_bnetza']
/ matchnt1['capacity_bnetza_aggregate'])
# determine sum of capacities with chp capability and add to matchnt1
plantlist_uba_chp_capacities = matchnt1[(matchnt1['chp_bnetza'] == 'yes')]
plantlist_uba_chp_capacitysum = pd.DataFrame(
plantlist_uba_chp_capacities.groupby('uba_id_string')
.sum()['capacity_bnetza'])
plantlist_uba_chp_capacitysum = plantlist_uba_chp_capacitysum.rename(
columns={'capacity_bnetza': 'capacity_bnetza_with_chp'})
matchnt1 = pd.merge(matchnt1, plantlist_uba_chp_capacitysum,
left_on='uba_id_string', right_index=True, how='left',)
matchnt1['uba_chp_capacity'] = pd.to_numeric(matchnt1['uba_chp_capacity'], errors='coerce')
matchnt1['uba_chp_capacity_scaled'] = (matchnt1['uba_chp_capacity']
* matchnt1['capacity_bnetza']
/ matchnt1['capacity_bnetza_with_chp'])
# Change column names for merge later on
matchnt1['uba_chp_capacity_original'] = matchnt1['uba_chp_capacity']
matchnt1['uba_chp_capacity'] = matchnt1['uba_chp_capacity_scaled']
matchnt1['uba_capacity_original'] = matchnt1['uba_capacity']
matchnt1['uba_capacity'] = matchnt1['uba_capacity_scaled']
#Add comment
matchnt1['merge_comment'] = ('List matching type: UBA capacity distributed '
'proportionally to multiple BNetzA power plants')
matchnt1.head()
1-n Case here
In [ ]:
# The resulting DataFrame should be called "match1tn"
# Matching structure:
# bnetza_id uba_id
# 1 1
# 1 2
# 1 3
# 2 4
# 2 5
# Get relevant entries from the matchinglist and merge the corresponding UBA Data to the list.
match1tn= matchinglist[
(matchinglist.duplicated(subset=['ID BNetzA'], keep=False) == True) &
(matchinglist.duplicated(subset=['uba_id_string'], keep=False)== False)]
match1tn = pd.merge(match1tn, plantlist_uba,
left_on='uba_id_string', right_on='uba_id_string', how='left')
match1tn = match1tn.set_index('ID BNetzA')
match1tn.head()
# Import BNetzA Capacities and CHP criterion into match1tn dataframe
plantlist_capacities = pd.DataFrame(plantlist[['capacity','chp']]).rename(
columns = {'capacity': 'capacity_bnetza', 'chp': 'chp_bnetza'})
match1tn = pd.merge(match1tn, plantlist_capacities,
left_index=True, right_index=True, how='left')
match1tn.index.names=['ID BNetzA']
match1tn.head()
# Get sum of UBA Capacitites per BNetzA Index and merge to match1tn dataframe
plantlist_bnetza_capacitysum = pd.DataFrame(
match1tn.groupby(match1tn.index).sum()['uba_capacity'])
plantlist_bnetza_capacitysum = plantlist_bnetza_capacitysum.rename(
columns={'uba_capacity':'uba_capacity_aggregate'})
match1tn = pd.merge(match1tn, plantlist_bnetza_capacitysum,
left_index=True, right_index=True, how='left')
match1tn['uba_chp_capacity'] = pd.to_numeric(match1tn['uba_chp_capacity'], errors='coerce')
match1tn
# Get sum of UBA CHP Capacities per BNetzA Index and merge to match1tn dataframe
plantlist_bnetza_chp_capacitysum = pd.DataFrame(
match1tn.groupby(match1tn.index).sum()['uba_chp_capacity'])
plantlist_bnetza_chp_capacitysum = plantlist_bnetza_chp_capacitysum.rename(
columns={'uba_chp_capacity': 'uba_chp_capacity_aggregate'})
match1tn = pd.merge(match1tn, plantlist_bnetza_chp_capacitysum,
left_index=True, right_index=True, how='left')
# Get UBA Technology for each BNetzA Index and merge into match1tn dataframe
## Option 1: Take all technologies and merge them
#match1tn['uba_technology_aggregate'] = pd.DataFrame(
# match1tn.groupby(match1tn.index)
# .transform(lambda x: ', '.join(x))['uba_technology'])
## Option 2 (currently preferred): Take technology with highest occurence
match1tn['uba_technology_aggregate'] = pd.DataFrame(
match1tn.groupby(match1tn.index)['uba_technology']
.agg(lambda x: x.value_counts().index[0]))
# Get UBA Plant name
match1tn['uba_name_aggregate'] = pd.DataFrame(
match1tn.groupby(match1tn.index).transform(lambda x: ', '.join(x))['uba_name'])
# Get UBA company name
match1tn['uba_company_aggregate'] = pd.DataFrame(
match1tn.groupby(match1tn.index)['uba_company']
.agg(lambda x:x.value_counts().index[0]))
# Change column names for merge later on
match1tn = match1tn.rename(
columns={'uba_chp_capacity': 'uba_chp_capacity_original',
'uba_capacity': 'uba_capacity_original',
'uba_chp_capacity_aggregate': 'uba_chp_capacity',
'uba_capacity_aggregate': 'uba_capacity'})
#Add comment
match1tn['merge_comment'] = ('List matching type: Multiple UBA capacities '
'aggregated to single BNetzA power plant')
# Drop duplicate rows and keep first entry
match1tn = match1tn.reset_index().drop_duplicates(subset='ID BNetzA',keep='first').set_index('ID BNetzA')
match1tn.head()
In [ ]:
# Merge the UBA DataFrames
# Merge first two dataframes
plantlist_uba_for_merge = match1t1.append(matchnt1)
# Add third dataframe
plantlist_uba_for_merge = plantlist_uba_for_merge.append(match1tn)
# Merge plantlist_uba_for_merge into the plantlist
plantlist = pd.merge(plantlist, plantlist_uba_for_merge,
left_index=True, right_index=True, how='left')
plantlist.head()
Here, solar, wind onshore. and wind offshore technologies are deleted from the list, as they are handled by another datapackage. Furthermore, aggregate values are excluded as well.
In [ ]:
# Delete solar, wind onshore, and wind offshore
plantlist = plantlist[(plantlist['fuel'] != 'Solar')
& (plantlist['fuel'] != 'Onshore')
& (plantlist['fuel'] != 'Offshore')]
# Delete aggregate values
plantlist = plantlist[(plantlist['company'] != 'EEG-Anlagen < 10 MW')
& (plantlist['company'] != 'Nicht-EEG-Anlagen < 10 MW')]
Extract the year when plants were shutdown or retrofit, using regular expressions
In [ ]:
# Add columns with empty data
plantlist['shutdown'] = 'NaN'
plantlist['shutdown'] = pd.to_numeric(
plantlist['status'].str.extract('[\w].+(\d\d\d\d)', expand=False),
errors='coerce')
plantlist.loc[plantlist['shutdown'] > 0, 'status'] = 'shutdown'
# Fill retrofit data column
# Identify restrofit dates in UBA list
plantlist['retrofit'] = pd.to_numeric(
plantlist['uba_commissioned'].str.extract('[(.+](\d\d\d\d)', expand=False),
errors='coerce')
# Split multiple commissioning dates as listed in UBA
plantlist['uba_commissioned_1'] = pd.to_numeric(
plantlist['uba_commissioned'].str.extract('(\d\d\d\d)', expand=False),
errors='coerce')
plantlist.loc[plantlist['uba_commissioned_1'].isnull(), 'uba_commissioned_1'] = pd.to_numeric(
plantlist['uba_commissioned'].str.extract('(\d\d\d\d).+[\w]', expand=False),
errors='coerce')
plantlist['uba_commissioned_2'] = pd.to_numeric(
plantlist['uba_commissioned'].str.extract('[\w].+(\d\d\d\d).+[\w]', expand=False),
errors='coerce')
plantlist['uba_commissioned_3'] = pd.to_numeric(
plantlist['uba_commissioned'].str.extract('[\w].+(\d\d\d\d)', expand=False),
errors='coerce')
plantlist.loc[plantlist['retrofit'] == plantlist['uba_commissioned_1'], 'uba_commissioned_1'] = ''
plantlist.loc[plantlist['retrofit'] == plantlist['uba_commissioned_2'], 'uba_commissioned_2'] = ''
plantlist.loc[plantlist['retrofit'] == plantlist['uba_commissioned_3'], 'uba_commissioned_3'] = ''
# Split multiple commissioning dates as listed in BNetzA
plantlist['commissioned_1'] = pd.to_numeric(
plantlist['commissioned'].str.extract('(\d\d\d\d)', expand=False),
errors='coerce')
plantlist.loc[plantlist['commissioned_1'].isnull(), 'commissioned_1'] = pd.to_numeric(
plantlist['commissioned'].str.extract('(\d\d\d\d).+[\w]', expand=False),
errors='coerce')
plantlist['commissioned_2'] = pd.to_numeric(
plantlist['commissioned'].str.extract('[\w].+(\d\d\d\d).+[\w]', expand=False),
errors='coerce')
plantlist['commissioned_3'] = pd.to_numeric(
plantlist['commissioned'].str.extract('[\w].+(\d\d\d\d)', expand=False),
errors='coerce')
# Show plantlist
plantlist[plantlist['status'] == 'shutdown']
In [ ]:
plantlist['capacity_float'] = pd.to_numeric(
plantlist['capacity'],
errors='coerce')
plantlist['commissioned_float'] = pd.to_numeric(
plantlist[['commissioned','commissioned_1','commissioned_2','commissioned_3']].max(axis=1),
errors='coerce')
plantlist['retrofit_float'] = pd.to_numeric(
plantlist['retrofit'],
errors='coerce')
plantlist.head()
Technologies describes the turbine specification etc., and "type" determines how the plant is used.
In [ ]:
# Split uba_technology information into technology (GT, CC,...) and type (HKW, IKW, ...)
plantlist['technology'] = plantlist['uba_technology']
plantlist['type'] = plantlist['uba_technology']
dict_technology = {
'GT': 'Gas turbine',
'DT': 'Steam turbine',
'GT / DT': 'Combined cycle',
'GuD': 'Combined cycle',
'DKW': 'Steam turbine',
'LWK': 'Run-of-river',
'PSW': 'Pumped storage',
'DWR': 'Steam turbine', #Pressurized water reactor
'G/AK': 'Gas turbine', #GT with heat recovery
'SWR': 'Steam turbine', #boiling water reactor
'SWK': 'Reservoir', #storage power plant
'SSA': '', #bus bar
'HKW (DT)': 'Steam turbine',
'HKW / GuD': 'Combined cycle',
'GuD / HKW': 'Combined cycle',
'IKW / GuD': 'Combined cycle',
'IKW /GuD': 'Combined cycle',
'HKW / SSA': '',
'IKW / SSA': '',
'HKW': '',
'IKW': '',
'IKW / HKW': '',
'WEA': 'WT',
'IKW / HKW / GuD' : 'Combined cycle',
}
plantlist['technology'].replace(dict_technology, inplace=True)
plantlist['technology'].unique()
# Check if all technologies have been translated
for technology in plantlist['technology'].unique():
if (technology not in dict_technology.values()) & (str(technology) != 'nan'):
logger.error('Untranslated technology: ' + str(technology))
# Translate types
dict_type = {
'HKW': 'CHP', #thermal power plant,
'HKW (DT)': 'CHP',
'IKW': 'IPP', #industrial power plant
'HKW / GuD': 'CHP',
'GuD / HKW': 'CHP',
'IKW / GuD': 'IPP',
'IKW /GuD': 'IPP',
'IKW / SSA': 'IPP',
'HKW / SSA': 'CHP',
'IKW / HKW': 'CHP',
'GT': '',
'DT': '',
'GT / DT': '',
'GuD': '',
'DKW': '',
'LWK': '',
'PSW': '',
'DWR': '', #Pressurized water reactor
'G/AK': 'CHP', #GT with heat recovery
'SWR': '', #boiling water reactor
'SWK': '', #storage power plant
'SSA': '',
'WEA': '',
'IKW / HKW / GuD' : 'IPP',
}
plantlist['type'].replace(dict_type, inplace=True)
plantlist['type'].unique()
# Check if all types have been translated
for type in plantlist['type'].unique():
if (type not in dict_type.values()) & (str(type) != 'nan'):
logger.error('Untranslated type: ' + str(type))
In [ ]:
# Set technology based on fuels
plantlist.loc[(plantlist['fuel'] == 'Nuclear') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'Steam turbine'
plantlist.loc[(plantlist['fuel'] == 'Lignite') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'Steam turbine'
plantlist.loc[(plantlist['fuel'] == 'Hard Coal') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'Steam turbine'
plantlist.loc[(plantlist['fuel'] == 'Hard coal') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'Steam turbine'
plantlist.loc[(plantlist['fuel'] == 'Hydro') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'Run-of-river'
plantlist.loc[(plantlist['fuel'] == 'Hydro PSP') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'Pumped storage'
plantlist.loc[(plantlist['fuel'] == 'Hydro PSP'), 'fuel'] = 'Hydro'
plantlist.loc[(plantlist['fuel'] == 'Hydro reservoir') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'RES'
plantlist.loc[(plantlist['fuel'] == 'Hydro reservoir'), 'fuel'] = 'Hydro'
plantlist.loc[(plantlist['fuel'] == 'reservoir') & ((plantlist['technology'] == '') | (
plantlist['technology'].isnull())), 'technology'] = 'RES'
# Set technology based on name and block information combined with fuels (e.g. combined-cycle, gas turbine)
# Define technology CC as combination of GT and DT
plantlist.loc[((plantlist['name'].str.contains("GT")) | (plantlist['block'].str.contains("GT")))
& ((plantlist['name'].str.contains("DT")) | (plantlist['block'].str.contains("DT")))
& ((plantlist['technology'] == '') | (plantlist['technology'].isnull())), 'technology'] = 'Combined cycle'
# Define technology CC if specified as GuD
plantlist.loc[((plantlist['name'].str.contains("GuD")) | (plantlist['block'].str.contains("GuD"))
| (plantlist['name'].str.contains("GUD")) | (plantlist['name'].str.contains("GUD")))
& ((plantlist['technology'] == '') | (plantlist['technology'].isnull())), 'technology'] = 'Combined cycle'
# Define technology GT
plantlist.loc[((plantlist['name'].str.contains("GT"))
| (plantlist['block'].str.contains("GT"))
| (plantlist['name'].str.contains("Gasturbine"))
| (plantlist['block'].str.contains("Gasturbine")))
& ((plantlist['technology'] == '') | (plantlist['technology'].isnull())), 'technology'] = 'Gas turbine'
# Define technology ST
plantlist.loc[((plantlist['name'].str.contains("DT"))
| (plantlist['block'].str.contains("DT"))
| (plantlist['name'].str.contains("Dampfturbine"))
| (plantlist['block'].str.contains("Dampfturbine"))
| (plantlist['name'].str.contains("Dampfkraftwerk"))
| (plantlist['block'].str.contains("Dampfkraftwerk"))
| (plantlist['name'].str.contains("DKW"))
| (plantlist['block'].str.contains("DKW")))
& ((plantlist['technology'] == '') | (plantlist['technology'].isnull())), 'technology'] = 'Steam turbine'
# Define technology CB
plantlist.loc[((plantlist['name'].str.contains("motor"))
| (plantlist['block'].str.contains("motor"))
| (plantlist['name'].str.contains("Motor"))
| (plantlist['block'].str.contains("Motor")))
& ((plantlist['technology'] == '') | (plantlist['technology'].isnull())), 'technology'] = 'Combustion Engine'
# Identify stroage technologies
plantlist.loc[(plantlist['fuel'] == 'Other fuels') & ((plantlist[
'fuel_basis'] == 'Sonstige Speichertechnologien') & (plantlist['technology'].isnull())), 'technology'] = 'Storage technologies'
# Set technology ST for all technologies which could not be identified
plantlist.loc[((plantlist['technology'] == '')
| (plantlist['technology'].isnull())), 'technology'] = 'Steam turbine'
Some power plants are in Austria, Switzerland, or Luxembourg. As they are sometimes part of the German electricity system, they are included here.
In [ ]:
# Add country Code
plantlist['country_code'] = plantlist['state']
dict_state_country = {
'Brandenburg': 'DE',
'Baden-Württemberg': 'DE',
'Niedersachsen': 'DE',
'Bayern': 'DE',
'Mecklenburg-Vorpommern': 'DE',
'Sachsen-Anhalt': 'DE',
'Hessen': 'DE',
'Nordrhein-Westfalen': 'DE',
'Berlin': 'DE',
'Saarland': 'DE',
'Thüringen': 'DE',
'Sachsen': 'DE',
'Bremen': 'DE',
'Schleswig-Holstein': 'DE',
'Hamburg': 'DE',
'Rheinland-Pfalz': 'DE',
'Österreich': 'AT',
'Luxemburg': 'LU',
'Schweiz': 'CH',
}
plantlist['country_code'].replace(dict_state_country, inplace=True)
# Check if all types have been translated
for plant_type in plantlist['country_code'].unique():
if (plant_type not in dict_state_country.values()) & (str(plant_type) != 'nan'):
logger.error('Untranslated type: ' + str(plant_type))
This sections adds efficiency data. These values have been researched by hand.
The source of each value is given in the column "efficiency_source".
Additionally, a rating of the source has been done starting from A (e.g. website of the power plants operator) to C (e.g. Article in local newspaper).
In [ ]:
# Efficiencies
data_efficiencies_bnetza = pd.read_csv(os.path.join('input', 'input_efficiency_de.csv'),
sep=',', # CSV field separator, default is ','
decimal='.', # Decimal separator, default is '.')
index_col='id',
encoding='utf8')
data_efficiencies_bnetza['efficiency_net'] = pd.to_numeric(
data_efficiencies_bnetza['efficiency_net'],
errors='coerce')
data_efficiencies_bnetza = data_efficiencies_bnetza.dropna(subset=['efficiency_net'])
plantlist = pd.merge(
plantlist,
data_efficiencies_bnetza,
left_index=True,
right_index=True,
how='left')
plantlist.head()
In [ ]:
plantlist.iloc[:,6:-1].head()
In [ ]:
plantlist_for_efficiency_analysis = plantlist
plantlist_for_efficiency_analysis = plantlist_for_efficiency_analysis.dropna(subset=['efficiency_net'])
In [ ]:
# Plot efficiencies for lignite, coal, oil, and natural gas
fuel_for_plot = ['Lignite', 'Hard coal', 'Oil', 'Natural gas']
col_dict = {'Lignite': 'brown', 'Hard coal': 'grey', 'Oil': 'k', 'Natural gas': 'orange'}
fig, ax = plt.subplots(figsize=(16,8))
for fuels in fuel_for_plot:
sub_df = plantlist_for_efficiency_analysis[plantlist_for_efficiency_analysis.fuel == fuels]
if len(sub_df['efficiency_net']) > 10:
x = np.array(sub_df['commissioned_float'].astype(int))
fit = np.polyfit(x, sub_df['efficiency_net'], deg=1)
ax.plot(x, fit[0]*x + fit[1], color=col_dict[fuels])
sub_df.plot(ax=ax,
kind='scatter',
x='commissioned_float',
y='efficiency_net',
c=col_dict[fuels],
label=fuels)
Jonas Egerer, Clemens Gerbaulet, Richard Ihlenburg, Friedrich Kunz, Benjamin Reinhard, Christian von Hirschhausen, Alexander Weber, Jens Weibezahn (2014): Electricity Sector Data for Policy-Relevant Modeling: Data Documentation and Applications to the German and European Electricity Markets. DIW Data Documentation 72, Berlin, Germany.
For each energy source - technology combination two values are read, to be applied as a linear approximation based on the year of commissioning. Therefore, the efficiency is made up of the efficiency_intercept (the efficiency at "year zero") plus the efficiency_slope multiplied by the year of commissioning.
In [ ]:
data_efficiencies_literature = pd.read_csv(os.path.join('input','input_efficiency_literature_by_fuel_technology.csv'),
sep=',', # CSV field separator, default is ','
decimal='.', # Decimal separator, default is '.')
encoding='utf8')
#data_efficiencies_literature['technology'] = data_efficiencies_literature['technology'].str.upper()
data_efficiencies_literature = data_efficiencies_literature.set_index(['fuel','technology'])
data_efficiencies_literature
In [ ]:
plantlist = plantlist.join(data_efficiencies_literature,on=['fuel','technology'])
plantlist['efficiency_literature'] = plantlist['efficiency_intercept'] + plantlist['efficiency_slope']*plantlist[['commissioned_float','retrofit_float']].max(axis=1)
plantlist.head()
The locations of power plants have been researched manually, these are now added to the output. Checking was done visually using satellite imagery and other mapping material.
In [ ]:
data_plant_locations = pd.read_csv(os.path.join('input','input_plant_locations_de.csv'),
sep=',', # CSV field separator, default is ','
decimal='.', # Decimal separator, default is '.')
encoding='utf8')
data_plant_locations = data_plant_locations.set_index('id')
data_plant_locations['lat'] = pd.to_numeric(data_plant_locations['lat'],
errors='coerce')
data_plant_locations['lon'] = pd.to_numeric(data_plant_locations['lon'],
errors='coerce')
plantlist = pd.merge(plantlist,
data_plant_locations,
left_index=True,
right_index=True,
how='left')
plantlist.head()
In [ ]:
plantlist[plantlist.lat.isnull()]
To enable a more readable output, the columns 'fuel' is augmented with additional information called 'energy source level'
In [ ]:
# read energy source level allocation table
energy_source_level_allocator = pd.read_csv(os.path.join('input', 'energy_source_level_allocator.csv'),
sep=',', # CSV field separator, default is ','
decimal='.', # Decimal separator, default is '.')
index_col='fuel',
encoding='utf8')
plantlist = pd.merge(energy_source_level_allocator, plantlist,
left_index = True,
right_on='fuel',
how='outer')
plantlist
In [ ]:
# Merge uba_name_aggregate and uba_name
plantlist.loc[plantlist['uba_name_aggregate'].isnull(), 'uba_name_aggregate'] = plantlist['uba_name']
# Drop columns not relevant for output
colsToDrop = ['bnetza_id',
'capacity',
'uba_name',
'uba_capacity_original',
'uba_chp_capacity_original',
'uba_city',
'uba_commissioned',
'uba_company',
'uba_company_aggregate',
'uba_fuel',
'uba_postcode',
'uba_state',
'uba_technology',
'uba_technology_aggregate',
'retrofit',
'uba_commissioned_1',
'uba_commissioned_2',
'uba_commissioned_3',
'commissioned_1',
'commissioned_2',
'commissioned_3',
'fuel_basis',
'fuel_multiple1',
'fuel_multiple2',
'efficiency_gross',
'efficiency_intercept',
'efficiency_slope',
'source_type',
'date'
]
plantlist = plantlist.drop(colsToDrop, axis=1)
# Rename columns
plantlist = plantlist.rename(columns={'commissioned': 'commissioned_original',
'commissioned_float': 'commissioned',
'retrofit_float': 'retrofit',
'capacity_float': 'capacity_net_bnetza',
'uba_capacity': 'capacity_gross_uba',
'uba_chp_capacity': 'chp_capacity_uba',
'efficiency_net': 'efficiency_data',
'efficiency_literature': 'efficiency_estimate',
'uba_name_aggregate': 'name_uba',
'name': 'name_bnetza',
'block': 'block_bnetza',
})
# Sort columns
columns_sorted = [
'name_bnetza',
'block_bnetza',
'name_uba',
'company',
'street',
'postcode',
'city',
'state',
'country_code',
'capacity_net_bnetza',
'capacity_gross_uba',
'fuel',
'technology',
'chp',
'chp_capacity_uba',
'commissioned',
'commissioned_original',
'retrofit',
'shutdown',
'status',
'type',
'lat',
'lon',
'eic_code_plant',
'eic_code_block',
'efficiency_data',
'efficiency_source',
'efficiency_estimate',
'energy_source_level_1',
'energy_source_level_2',
'energy_source_level_3',
'eeg',
'network_node',
'voltage',
'network_operator',
'merge_comment',
'comment']
plantlist = plantlist.reindex(columns=columns_sorted)
plantlist.head()
In [ ]:
# Round capacity values as well as the efficiency estimate to five decimals-
plantlist.capacity_net_bnetza = plantlist.capacity_net_bnetza.round(decimals=5)
plantlist.capacity_gross_uba = plantlist.capacity_gross_uba.round(decimals=5)
plantlist.efficiency_estimate = plantlist.efficiency_estimate.round(decimals=5)
In [ ]:
pivot_status_capacity = pd.pivot_table(
plantlist,
values='capacity_net_bnetza',
columns='status',
index='fuel',
aggfunc=np.sum
)
pivot_status_capacity.sort_values(by='operating', inplace=True, ascending=0)
pivot_status_capacity_plot=pivot_status_capacity.plot(kind='barh',
stacked=True,
legend=True,
figsize=(12, 6))
pivot_status_capacity_plot.set_xlabel("MW")
pivot_status_capacity_plot
In [ ]:
plantlist_filtered = plantlist
pivot_age_capacity = pd.pivot_table(
plantlist_filtered,
values='capacity_net_bnetza',
columns='fuel',
index='commissioned',
aggfunc=np.sum,
dropna=True
)
pivot_age_capacity_plot=pivot_age_capacity.plot(kind='bar',
stacked=True,
legend=True,
figsize=(17, 10))
pivot_age_capacity_plot.set_ylabel("MW")
xaxis_labels = pivot_age_capacity.index.astype(int)
pivot_age_capacity_plot.set_xticklabels(xaxis_labels)
pivot_age_capacity_plot
This chart is suitable to check outliers of commissioning years and block sizes. In theory, there should be no unexpected values, e.g. all commissioning years should be greater than 1900. Block sizes above 2000 MW are also unlikely.
In [ ]:
plantlist_for_plot = plantlist.copy(deep=True)
plantlist_for_plot['capacity_float'] = pd.to_numeric(plantlist_for_plot['capacity_net_bnetza'],
errors='coerce')
plantlist_for_plot['commissioned_float'] = pd.to_numeric(plantlist_for_plot['commissioned'],
errors='coerce')
#plot_blocksize_year = Scatter(plantlist_for_plot,
# x='commissioned_float',
# y='capacity_float',
# color='fuel',
# title='Block-Size vs Year of Commissioning',
# xlabel='Year',
# ylabel='MW',
# legend="top_left",
# height=500,
# width=700)
#show(plot_blocksize_year)
plantlist_for_plot.plot(kind='scatter', x='commissioned_float', y='capacity_float')
List all entries with zero capacity.
In [ ]:
plantlist[plantlist.capacity_net_bnetza == 0]
In [ ]:
#Show all Plants with commisioning dates below 1900
plantlist[plantlist['commissioned'] <= 1900]
In [ ]:
# Show all Plants with invalid commisioning dates
plantlist[plantlist['commissioned'].isnull()]
In [ ]:
# TODO: When implemented write a marker in the comment column
# List all entries with diverging postcodes (if a postcode is given)
# plantlist[(plantlist['uba_postcode'].notnull() == True) & (pd.to_numeric(plantlist.postcode, errors='coerce') != pd.to_numeric(plantlist.uba_postcode, errors='coerce'))]
In [ ]:
# TODO: improve this comparison, it creates many false positives
capacitycomparison = pd.DataFrame(plantlist.capacity_net_bnetza / plantlist.capacity_gross_uba)
capacitycomparison['Name'] = plantlist.name_bnetza
capacitycomparison['Block'] = plantlist.block_bnetza
capacitycomparison['BnetzaCapacity'] = plantlist.capacity_net_bnetza
capacitycomparison['UBACapacity'] = plantlist.capacity_gross_uba
capacitycomparison.dropna(inplace=True)
capacitycomparison.sort_values(by=0)
We document the data packages meta data in the specific format JSON as proposed by the Open Knowledge Foundation. See the Frictionless Data project by OKFN (http://data.okfn.org/) and the Data Package specifications (http://dataprotocols.org/data-packages/) for more details.
In order to keep the notebook more readable, we first formulate the metadata in the human-readable YAML format using a multi-line string. We then parse the string into a Python dictionary and save that to disk as a JSON file.
In [ ]:
with open(os.path.join('input', 'metadata.yml'), 'r') as f:
metadata = yaml.load(f.read())
datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': '))
In [ ]:
output_path = 'output'
plantlist.to_csv(
os.path.join(output_path, 'conventional_power_plants_DE.csv'),
encoding='utf-8', index_label='id'
)
plantlist.to_excel(
os.path.join(output_path, 'conventional_power_plants_DE.xlsx'),
sheet_name='plants', index_label='id'
)
plantlist.to_sql(
'conventional_power_plants_DE',
sqlite3.connect(os.path.join(output_path ,'conventional_power_plants.sqlite')),
if_exists="replace", index_label='id'
)
with open(os.path.join(output_path, 'datapackage.json'), 'w') as f:
f.write(datapackage_json)
In [ ]:
files = [
'conventional_power_plants_DE.csv', 'conventional_power_plants_DE.xlsx',
'conventional_power_plants_EU.csv', 'conventional_power_plants_EU.xlsx',
'conventional_power_plants.sqlite'
]
with open('checksums.txt', 'w') as f:
for file_name in files:
file_hash = get_sha_hash(os.path.join(output_path, file_name))
f.write('{},{}\n'.format(file_name, file_hash))