In [1]:
import pandas as pd
In [2]:
probe_spec = pd.read_csv('NiPOD-ProbeSpec.csv')
Let's see what the column names that end in 'ID' are. Those are probably primary keys and foreign keys.
In [3]:
for col in probe_spec.columns:
if col.endswith('ID'):
print col
First, let's set the index to what I think is the primary key
In [4]:
probe_spec.set_index('DesignID',inplace=True)
probe_spec.head()
Out[4]:
In [5]:
design_type = pd.read_csv('NiPOD-DesignType.csv')
for col in design_type.columns:
if col.endswith('ID'):
print col
design_type.head()
Out[5]:
In [6]:
probe_spec = probe_spec.merge(design_type, on='DesignTypeID')
In [7]:
manufacture = pd.read_csv('NiPOD-Manufacture.csv')
for col in manufacture.columns:
if col.endswith('ID'):
print col
manufacture.head()
Out[7]:
In [8]:
probe_spec = probe_spec.merge(manufacture, on='ManufactureID')
In [9]:
package = pd.read_csv('NiPOD-ProbePackage.csv')
for col in package.columns:
if col.endswith('ID'):
print col
package.head()
Out[9]:
In [10]:
probe_spec = probe_spec.merge(package, on='PackageID')
In [11]:
probe_type = pd.read_csv('NiPOD-ProbeType.csv')
for col in probe_type.columns:
if col.endswith('ID'):
print col
probe_type.head()
Out[11]:
In [12]:
probe_spec = probe_spec.merge(probe_type, on='ProbeTypeID')
In [13]:
probe_spec.head()
Out[13]:
In [14]:
keep = ['DesignName',
'FirstChannelYSpacing',
'NumChannel',
'NumShank',
'NumSitePerShank',
'OtherParameters',
'PackageID',
'ShankHeight',
'ShankSpace',
'ShankStartingXLocation',
'ShankStartingYLocation',
'ShankWidth',
'SiteArea',
'TetrodeOffsetLeft',
'TetrodeOffsetRight',
'TetrodeOffsetUp',
'TrueShankLength',
'TrueSiteSpacing',
'DesignType',
'PackageName',
'ProbeType']
In [15]:
probe_spec = probe_spec[keep]
In [16]:
probe_spec.head()
Out[16]:
In [17]:
probe_spec.to_csv('NiPOD-ProbeSpec-denormalized.csv',
encoding='utf-8',
index=False)