In [2]:
from data_cleaner import DataCleaner


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-2-27261c293ddb> in <module>()
----> 1 from data_cleaner import DataCleaner

/usr/local/lib/python2.7/site-packages/data_cleaner/__init__.py in <module>()
      1 # -*- coding: utf-8 -*-
      2 
----> 3 from data_cleaner import DataCleaner
      4 
      5 __author__ = 'Gobierno Abierto Argentina'

/usr/local/lib/python2.7/site-packages/data_cleaner/data_cleaner.py in <module>()
     10 from __future__ import print_function
     11 from __future__ import with_statement
---> 12 import pandas as pd
     13 from dateutil import tz
     14 import arrow

/usr/local/lib/python2.7/site-packages/pandas/__init__.py in <module>()
     11                       "pandas from the source directory, you may need to run "
     12                       "'python setup.py build_ext --inplace' to build the C "
---> 13                       "extensions first.".format(module))
     14 
     15 from datetime import datetime

ImportError: C extension: hashtable not built. If you want to import pandas from the source directory, you may need to run 'python setup.py build_ext --inplace' to build the C extensions first.

In [ ]:


In [1]:
input_path = "establecimientos-de-salud-raw.csv"
output_path = "establecimientos-de-salud-clean.csv"

rules = [
    {
        "nombre_propio": [
            {"field": "nombre"}
        ]
    },
    {"string": [
        {"field": "financiamiento"},
        {"field": "provincia"},
        {"field": "localidad"},
    ]}
]

dc = DataCleaner(input_path)
dc.clean_file(rules, output_path)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-1-0fa061cd8e74> in <module>()
----> 1 from data_cleaner import DataCleaner
      2 
      3 input_path = "establecimientos-de-salud-raw.csv"
      4 output_path = "establecimientos-de-salud-clean.csv"
      5 

/usr/local/lib/python2.7/site-packages/data_cleaner/__init__.py in <module>()
      1 # -*- coding: utf-8 -*-
      2 
----> 3 from data_cleaner import DataCleaner
      4 
      5 __author__ = 'Gobierno Abierto Argentina'

/usr/local/lib/python2.7/site-packages/data_cleaner/data_cleaner.py in <module>()
     10 from __future__ import print_function
     11 from __future__ import with_statement
---> 12 import pandas as pd
     13 from dateutil import tz
     14 import arrow

/usr/local/lib/python2.7/site-packages/pandas/__init__.pyc in <module>()
     42 import pandas.core.config_init
     43 
---> 44 from pandas.core.api import *
     45 from pandas.sparse.api import *
     46 from pandas.stats.api import *

/usr/local/lib/python2.7/site-packages/pandas/core/api.py in <module>()
      7 from pandas.core.common import isnull, notnull
      8 from pandas.core.categorical import Categorical
----> 9 from pandas.core.groupby import Grouper
     10 from pandas.core.format import set_eng_float_format
     11 from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex

/usr/local/lib/python2.7/site-packages/pandas/core/groupby.py in <module>()
     15 from pandas.core.base import PandasObject
     16 from pandas.core.categorical import Categorical
---> 17 from pandas.core.frame import DataFrame
     18 from pandas.core.generic import NDFrame
     19 from pandas.core.index import Index, MultiIndex, CategoricalIndex, _ensure_index

/usr/local/lib/python2.7/site-packages/pandas/core/frame.py in <module>()
     39                                    create_block_manager_from_arrays,
     40                                    create_block_manager_from_blocks)
---> 41 from pandas.core.series import Series
     42 from pandas.core.categorical import Categorical
     43 import pandas.computation.expressions as expressions

/usr/local/lib/python2.7/site-packages/pandas/core/series.py in <module>()
   2907 # Add plotting methods to Series
   2908 
-> 2909 import pandas.tools.plotting as _gfx
   2910 
   2911 Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, _gfx.SeriesPlotMethods)

/usr/local/lib/python2.7/site-packages/pandas/tools/plotting.py in <module>()
     26 from pandas.util.decorators import Appender
     27 try:  # mpl optional
---> 28     import pandas.tseries.converter as conv
     29     conv.register()  # needs to override so set_xlim works with str/number
     30 except ImportError:

/usr/local/lib/python2.7/site-packages/pandas/tseries/converter.py in <module>()
      5 from dateutil.relativedelta import relativedelta
      6 
----> 7 import matplotlib.units as units
      8 import matplotlib.dates as dates
      9 

/usr/local/lib/python2.7/site-packages/matplotlib/__init__.py in <module>()
   1129 
   1130 # this is the instance used by the matplotlib classes
-> 1131 rcParams = rc_params()
   1132 
   1133 if rcParams['examples.directory']:

/usr/local/lib/python2.7/site-packages/matplotlib/__init__.py in rc_params(fail_on_error)
    973         return ret
    974 
--> 975     return rc_params_from_file(fname, fail_on_error)
    976 
    977 

/usr/local/lib/python2.7/site-packages/matplotlib/__init__.py in rc_params_from_file(fname, fail_on_error, use_default_template)
   1098         parameters specified in the file. (Useful for updating dicts.)
   1099     """
-> 1100     config_from_file = _rc_params_in_file(fname, fail_on_error)
   1101 
   1102     if not use_default_template:

/usr/local/lib/python2.7/site-packages/matplotlib/__init__.py in _rc_params_in_file(fname, fail_on_error)
   1016     cnt = 0
   1017     rc_temp = {}
-> 1018     with _open_file_or_url(fname) as fd:
   1019         try:
   1020             for line in fd:

/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/lib/python2.7/contextlib.pyc in __enter__(self)
     15     def __enter__(self):
     16         try:
---> 17             return self.gen.next()
     18         except StopIteration:
     19             raise RuntimeError("generator didn't yield")

/usr/local/lib/python2.7/site-packages/matplotlib/__init__.py in _open_file_or_url(fname)
    998     else:
    999         fname = os.path.expanduser(fname)
-> 1000         encoding = locale.getdefaultlocale()[1]
   1001         if encoding is None:
   1002             encoding = "utf-8"

/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/lib/python2.7/locale.pyc in getdefaultlocale(envvars)
    541     else:
    542         localename = 'C'
--> 543     return _parse_localename(localename)
    544 
    545 

/usr/local/Cellar/python/2.7.10/Frameworks/Python.framework/Versions/2.7/lib/python2.7/locale.pyc in _parse_localename(localename)
    473     elif code == 'C':
    474         return None, None
--> 475     raise ValueError, 'unknown locale: %s' % localename
    476 
    477 def _build_localename(localetuple):

ValueError: unknown locale: UTF-8

In [ ]: