In [15]:
import pandas as pd 
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt 
np.random.seed(seed=42)

In [43]:
a = np.random.random_integers(1, 100, 100)
s = np.random.dirichlet(a, 1000)
print s.shape
z = np.linspace(0,2.0,100)
pdf_names = ['pdf_' + str(i) for i in range(100)]
df = pd.DataFrame(s, columns= pdf_names)
df['z_spec'] = np.random.uniform(0,2,1000)
df['weight'] = np.random.dirichlet(a, 1000)[0,:]


(1000, 100)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-43-821e7d57e4d7> in <module>()
      6 df = pd.DataFrame(s, columns= pdf_names)
      7 df['z_spec'] = np.random.uniform(0,2,1000)
----> 8 df['weight'] = np.random.dirichlet(a, 1000)[0,:]

/Users/Christopher_old/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __setitem__(self, key, value)
   2125         else:
   2126             # set column
-> 2127             self._set_item(key, value)
   2128 
   2129     def _setitem_slice(self, key, value):

/Users/Christopher_old/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _set_item(self, key, value)
   2202 
   2203         self._ensure_valid_index(value)
-> 2204         value = self._sanitize_column(key, value)
   2205         NDFrame._set_item(self, key, value)
   2206 

/Users/Christopher_old/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _sanitize_column(self, key, value)
   2360 
   2361             # turn me into an ndarray
-> 2362             value = _sanitize_index(value, self.index, copy=False)
   2363             if not isinstance(value, (np.ndarray, Index)):
   2364                 if isinstance(value, list) and len(value) > 0:

/Users/Christopher_old/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in _sanitize_index(data, index, copy)
   2577 
   2578     if len(data) != len(index):
-> 2579         raise ValueError('Length of values does not match length of '
   2580                          'index')
   2581 

ValueError: Length of values does not match length of index

In [17]:
df.head()


Out[17]:
pdf_0 pdf_1 pdf_2 pdf_3 pdf_4 pdf_5 pdf_6 pdf_7 pdf_8 pdf_9 ... pdf_92 pdf_93 pdf_94 pdf_95 pdf_96 pdf_97 pdf_98 pdf_99 z_spec weight
0 0.008092 0.016048 0.003233 0.015099 0.011225 0.003413 0.013246 0.018494 0.016217 0.012765 ... 0.019147 0.011730 0.007820 0.016400 0.015422 0.014096 0.007739 0.004836 1.797772 0.009466
1 0.008682 0.017071 0.003219 0.011721 0.015499 0.005767 0.015797 0.017332 0.013524 0.011667 ... 0.017028 0.012187 0.007438 0.016055 0.017929 0.016125 0.009620 0.003188 0.136269 0.016747
2 0.008310 0.019172 0.002295 0.013534 0.012054 0.005590 0.018083 0.017274 0.017286 0.016156 ... 0.014249 0.012099 0.008920 0.016750 0.015889 0.017018 0.011053 0.005926 0.604158 0.003455
3 0.009450 0.018925 0.004161 0.014716 0.013958 0.003665 0.017972 0.018815 0.012034 0.015107 ... 0.014079 0.013663 0.007994 0.015989 0.013037 0.015299 0.008846 0.005579 0.640460 0.015489
4 0.008485 0.017963 0.003577 0.015289 0.011353 0.004700 0.017929 0.017450 0.015599 0.014457 ... 0.019358 0.014398 0.007598 0.014795 0.013736 0.019372 0.011800 0.004501 0.868892 0.011147

5 rows × 102 columns


In [8]:
df.to_csv('test_df.csv',index=False)

In [29]:
a =  type(df)

In [19]:
type(z)


Out[19]:
numpy.ndarray

In [27]:
print  type(df) == 'pandas.core.frame.DataFrame'>"


False

In [34]:
isinstance(z, np.ndarray)


Out[34]:
True

In [ ]: