In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
import os
import pandas as pd
import scipy
from scipy import stats
import sys

In [5]:
dataDir = "/Users/Gabriel/Desktop"
data = pd.read_csv("/Users/Gabriel/Desktop/TXN_ALERTS.csv")
print data.shape
dropL = ['SD_RETL_ID', 'SD_PROD_IND', 'SD_TERM_NAME_LOC', 'SD_TERM_CITY_OLD', 'SD_TERM_ST', 'SD_TERM_CNTRY', 'SD_CR_DB_IND', 'SD_CASH_IND', 'SD_CRD_PLASTIC_TYP', 'SD_TERM_CITY', 'DD_DDAY', 'DD_TRAN_DAT_TIM', 'DD_CRD_LAST_ISS', 'DD_CRD_EMBOSS', 'SD_TRAN_RSN_CDE', 'DD_DDAY', 'DD_APDATE', 'DD_TRAN_DAT_TIM', 'SD_TERM_ID', 'Alert']
print data['SD_TERM_ID'].unique()


(6991, 34)
['ATM383841' 'ATM454305' 'ATM591693' ..., 'POS65407053' 'POS66243900'
 'POS56731010']

In [6]:
data = data[['MD_TRAN_AMT1', 'SD_NAU']]
data = data.fillna(0)
print len(data['SD_NAU'].unique())
data


543
Out[6]:
MD_TRAN_AMT1 SD_NAU
0 577.96 67416041.0
1 436.70 67416041.0
2 634.88 67416041.0
3 1261.73 4527506.0
4 708.76 35914094.0
5 695.71 35914094.0
6 1101.27 35914094.0
7 1085.43 81647386.0
8 356.29 11956076.0
9 1092.86 11956076.0
10 797.80 11956076.0
11 757.98 11956076.0
12 57.87 53712737.0
13 1010.14 62889548.0
14 1031.74 27454439.0
15 512.38 12297534.0
16 31.62 12297534.0
17 197.08 12297534.0
18 419.00 12297534.0
19 881.09 12297534.0
20 867.72 12297534.0
21 3.81 40649432.0
22 17.50 53712737.0
23 830.70 53712737.0
24 561.69 53712737.0
25 42.70 53712737.0
26 1084.04 53712737.0
27 1061.36 12623215.0
28 1147.92 12623215.0
29 303.89 26305464.0
... ... ...
6961 67.54 0.0
6962 77.25 0.0
6963 222.95 0.0
6964 824.70 0.0
6965 83.59 0.0
6966 11.73 0.0
6967 72.19 0.0
6968 159.70 0.0
6969 310.14 0.0
6970 191.55 0.0
6971 5.93 0.0
6972 302.08 0.0
6973 319.64 0.0
6974 14.85 0.0
6975 84.92 0.0
6976 260.02 0.0
6977 385.11 0.0
6978 111.71 0.0
6979 553.46 0.0
6980 322.09 0.0
6981 90.38 0.0
6982 323.82 0.0
6983 356.37 0.0
6984 299.36 0.0
6985 11.61 0.0
6986 84.52 0.0
6987 63.66 0.0
6988 506.84 0.0
6989 518.20 0.0
6990 203.19 0.0

6991 rows × 2 columns


In [8]:
dataDir = "/Users/Gabriel/Dropbox/ACI"
dataf = pd.read_csv("/Users/Gabriel/Dropbox/ACI/TXN_OHE.csv")

data = pd.read_csv(dataf)
print data.dtypes


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-8-3e367e868db4> in <module>()
      2 dataf = pd.read_csv("/Users/Gabriel/Dropbox/ACI/TXN_OHE.csv")
      3 
----> 4 data = pd.read_csv(dataf)
      5 print data.dtypes

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    643                     skip_blank_lines=skip_blank_lines)
    644 
--> 645         return _read(filepath_or_buffer, kwds)
    646 
    647     parser_f.__name__ = name

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    386 
    387     # Create the parser.
--> 388     parser = TextFileReader(filepath_or_buffer, **kwds)
    389 
    390     if (nrows is not None) and (chunksize is not None):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    727             self.options['has_index_names'] = kwds['has_index_names']
    728 
--> 729         self._make_engine(self.engine)
    730 
    731     def close(self):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    920     def _make_engine(self, engine='c'):
    921         if engine == 'c':
--> 922             self._engine = CParserWrapper(self.f, **self.options)
    923         else:
    924             if engine == 'python':

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1387         kwds['allow_leading_cols'] = self.index_col is not False
   1388 
-> 1389         self._reader = _parser.TextReader(src, **kwds)
   1390 
   1391         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4019)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:8144)()

IOError: Expected file path name or file-like object, got <class 'pandas.core.frame.DataFrame'> type

In [ ]: