notebook.community

Edit and run



In [4]:

    
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
import os
import pandas as pd
import scipy
from scipy import stats
import sys



In [5]:

    
dataDir = "/Users/Gabriel/Desktop"
data = pd.read_csv("/Users/Gabriel/Desktop/TXN_ALERTS.csv")
print data.shape
dropL = ['SD_RETL_ID', 'SD_PROD_IND', 'SD_TERM_NAME_LOC', 'SD_TERM_CITY_OLD', 'SD_TERM_ST', 'SD_TERM_CNTRY', 'SD_CR_DB_IND', 'SD_CASH_IND', 'SD_CRD_PLASTIC_TYP', 'SD_TERM_CITY', 'DD_DDAY', 'DD_TRAN_DAT_TIM', 'DD_CRD_LAST_ISS', 'DD_CRD_EMBOSS', 'SD_TRAN_RSN_CDE', 'DD_DDAY', 'DD_APDATE', 'DD_TRAN_DAT_TIM', 'SD_TERM_ID', 'Alert']
print data['SD_TERM_ID'].unique()









    



(6991, 34)
['ATM383841' 'ATM454305' 'ATM591693' ..., 'POS65407053' 'POS66243900'
 'POS56731010']



In [6]:

    
data = data[['MD_TRAN_AMT1', 'SD_NAU']]
data = data.fillna(0)
print len(data['SD_NAU'].unique())
data









    



543






    Out[6]:






  
    
      
      MD_TRAN_AMT1
      SD_NAU
    
  
  
    
      0
      577.96
      67416041.0
    
    
      1
      436.70
      67416041.0
    
    
      2
      634.88
      67416041.0
    
    
      3
      1261.73
      4527506.0
    
    
      4
      708.76
      35914094.0
    
    
      5
      695.71
      35914094.0
    
    
      6
      1101.27
      35914094.0
    
    
      7
      1085.43
      81647386.0
    
    
      8
      356.29
      11956076.0
    
    
      9
      1092.86
      11956076.0
    
    
      10
      797.80
      11956076.0
    
    
      11
      757.98
      11956076.0
    
    
      12
      57.87
      53712737.0
    
    
      13
      1010.14
      62889548.0
    
    
      14
      1031.74
      27454439.0
    
    
      15
      512.38
      12297534.0
    
    
      16
      31.62
      12297534.0
    
    
      17
      197.08
      12297534.0
    
    
      18
      419.00
      12297534.0
    
    
      19
      881.09
      12297534.0
    
    
      20
      867.72
      12297534.0
    
    
      21
      3.81
      40649432.0
    
    
      22
      17.50
      53712737.0
    
    
      23
      830.70
      53712737.0
    
    
      24
      561.69
      53712737.0
    
    
      25
      42.70
      53712737.0
    
    
      26
      1084.04
      53712737.0
    
    
      27
      1061.36
      12623215.0
    
    
      28
      1147.92
      12623215.0
    
    
      29
      303.89
      26305464.0
    
    
      ...
      ...
      ...
    
    
      6961
      67.54
      0.0
    
    
      6962
      77.25
      0.0
    
    
      6963
      222.95
      0.0
    
    
      6964
      824.70
      0.0
    
    
      6965
      83.59
      0.0
    
    
      6966
      11.73
      0.0
    
    
      6967
      72.19
      0.0
    
    
      6968
      159.70
      0.0
    
    
      6969
      310.14
      0.0
    
    
      6970
      191.55
      0.0
    
    
      6971
      5.93
      0.0
    
    
      6972
      302.08
      0.0
    
    
      6973
      319.64
      0.0
    
    
      6974
      14.85
      0.0
    
    
      6975
      84.92
      0.0
    
    
      6976
      260.02
      0.0
    
    
      6977
      385.11
      0.0
    
    
      6978
      111.71
      0.0
    
    
      6979
      553.46
      0.0
    
    
      6980
      322.09
      0.0
    
    
      6981
      90.38
      0.0
    
    
      6982
      323.82
      0.0
    
    
      6983
      356.37
      0.0
    
    
      6984
      299.36
      0.0
    
    
      6985
      11.61
      0.0
    
    
      6986
      84.52
      0.0
    
    
      6987
      63.66
      0.0
    
    
      6988
      506.84
      0.0
    
    
      6989
      518.20
      0.0
    
    
      6990
      203.19
      0.0
    
  

6991 rows × 2 columns



In [8]:

    
dataDir = "/Users/Gabriel/Dropbox/ACI"
dataf = pd.read_csv("/Users/Gabriel/Dropbox/ACI/TXN_OHE.csv")

data = pd.read_csv(dataf)
print data.dtypes









    



---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-8-3e367e868db4> in <module>()
      2 dataf = pd.read_csv("/Users/Gabriel/Dropbox/ACI/TXN_OHE.csv")
      3 
----> 4 data = pd.read_csv(dataf)
      5 print data.dtypes

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    643                     skip_blank_lines=skip_blank_lines)
    644 
--> 645         return _read(filepath_or_buffer, kwds)
    646 
    647     parser_f.__name__ = name

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    386 
    387     # Create the parser.
--> 388     parser = TextFileReader(filepath_or_buffer, **kwds)
    389 
    390     if (nrows is not None) and (chunksize is not None):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    727             self.options['has_index_names'] = kwds['has_index_names']
    728 
--> 729         self._make_engine(self.engine)
    730 
    731     def close(self):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    920     def _make_engine(self, engine='c'):
    921         if engine == 'c':
--> 922             self._engine = CParserWrapper(self.f, **self.options)
    923         else:
    924             if engine == 'python':

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1387         kwds['allow_leading_cols'] = self.index_col is not False
   1388 
-> 1389         self._reader = _parser.TextReader(src, **kwds)
   1390 
   1391         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4019)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:8144)()

IOError: Expected file path name or file-like object, got <class 'pandas.core.frame.DataFrame'> type



In [ ]:

	MD_TRAN_AMT1	SD_NAU
0	577.96	67416041.0
1	436.70	67416041.0
2	634.88	67416041.0
3	1261.73	4527506.0
4	708.76	35914094.0
5	695.71	35914094.0
6	1101.27	35914094.0
7	1085.43	81647386.0
8	356.29	11956076.0
9	1092.86	11956076.0
10	797.80	11956076.0
11	757.98	11956076.0
12	57.87	53712737.0
13	1010.14	62889548.0
14	1031.74	27454439.0
15	512.38	12297534.0
16	31.62	12297534.0
17	197.08	12297534.0
18	419.00	12297534.0
19	881.09	12297534.0
20	867.72	12297534.0
21	3.81	40649432.0
22	17.50	53712737.0
23	830.70	53712737.0
24	561.69	53712737.0
25	42.70	53712737.0
26	1084.04	53712737.0
27	1061.36	12623215.0
28	1147.92	12623215.0
29	303.89	26305464.0
...	...	...
6961	67.54	0.0
6962	77.25	0.0
6963	222.95	0.0
6964	824.70	0.0
6965	83.59	0.0
6966	11.73	0.0
6967	72.19	0.0
6968	159.70	0.0
6969	310.14	0.0
6970	191.55	0.0
6971	5.93	0.0
6972	302.08	0.0
6973	319.64	0.0
6974	14.85	0.0
6975	84.92	0.0
6976	260.02	0.0
6977	385.11	0.0
6978	111.71	0.0
6979	553.46	0.0
6980	322.09	0.0
6981	90.38	0.0
6982	323.82	0.0
6983	356.37	0.0
6984	299.36	0.0
6985	11.61	0.0
6986	84.52	0.0
6987	63.66	0.0
6988	506.84	0.0
6989	518.20	0.0
6990	203.19	0.0