Connecting to Database

In [6]:
import pandas as pd
import numpy as np
terror = pd.read_csv('file.csv', encoding='ISO-8859-1')
cleanedforuse = terror.filter(['imonth', 'iday', 'region','property','propextent','attacktype1','weaptype1','nperps','success','multiple','specificity'])
final = cleanedforuse[~np.isnan(cleanedforuse).any(axis=1)]

/Users/chloe/anaconda/lib/python3.6/site-packages/IPython/core/ DtypeWarning: Columns (4,61,62,66,116,117,123) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [7]:

imonth iday region property propextent attacktype1 weaptype1 nperps success multiple specificity
5 1 1 1 1 3.0 2 5 -99.0 1 0 1
7 1 2 1 1 3.0 3 6 -99.0 1 0 1
8 1 2 1 1 3.0 7 8 1.0 1 0 1
9 1 3 1 1 3.0 7 8 1.0 1 0 1
11 1 6 1 1 3.0 7 8 -99.0 1 0 1

In [8]:
import sqlite3
conn = sqlite3.connect('Terrorisks.db')

In [9]:
final.to_sql('final',con=conn, flavor='sqlite', if_exists='replace')

/Users/chloe/anaconda/lib/python3.6/site-packages/pandas/io/ FutureWarning: the 'flavor' parameter is deprecated and will be removed in a future version, as 'sqlite' is the only supported option when SQLAlchemy is not installed.

In [10]:
df = pd.read_sql_query('SELECT * FROM final', conn)

In [11]:

index imonth iday region property propextent attacktype1 weaptype1 nperps success multiple specificity
0 5 1 1 1 1 3.0 2 5 -99.0 1 0 1
1 7 1 2 1 1 3.0 3 6 -99.0 1 0 1
2 8 1 2 1 1 3.0 7 8 1.0 1 0 1
3 9 1 3 1 1 3.0 7 8 1.0 1 0 1
4 11 1 6 1 1 3.0 7 8 -99.0 1 0 1
5 13 1 9 1 1 3.0 7 8 -99.0 1 0 1
6 14 1 9 1 1 2.0 7 8 -99.0 1 0 1
7 17 1 12 1 1 3.0 3 6 -99.0 1 0 1
8 18 1 12 1 -9 4.0 3 6 -99.0 1 0 1
9 19 1 13 1 1 3.0 7 8 -99.0 1 0 1


In [12]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from patsy import dmatrices
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import roc_curve, auc

/Users/chloe/anaconda/lib/python3.6/site-packages/sklearn/ DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [14]:
y, X = dmatrices('success ~ C(imonth) + C(iday) + region + C(property) + C(propextent) + C(attacktype1) + C(weaptype1)+ C(nperps) + specificity', df, return_type="dataframe")

       Intercept  C(imonth)[T.2]  C(imonth)[T.3]  C(imonth)[T.4]  \
0            1.0             0.0             0.0             0.0   
1            1.0             0.0             0.0             0.0   
2            1.0             0.0             0.0             0.0   
3            1.0             0.0             0.0             0.0   
4            1.0             0.0             0.0             0.0   
5            1.0             0.0             0.0             0.0   
6            1.0             0.0             0.0             0.0   
7            1.0             0.0             0.0             0.0   
8            1.0             0.0             0.0             0.0   
9            1.0             0.0             0.0             0.0   
10           1.0             0.0             0.0             0.0   
11           1.0             0.0             0.0             0.0   
12           1.0             0.0             0.0             0.0   
13           1.0             0.0             0.0             0.0   
14           1.0             0.0             0.0             0.0   
15           1.0             0.0             0.0             0.0   
16           1.0             0.0             0.0             0.0   
17           1.0             0.0             0.0             0.0   
18           1.0             0.0             0.0             0.0   
19           1.0             0.0             0.0             0.0   
20           1.0             0.0             0.0             0.0   
21           1.0             0.0             0.0             0.0   
22           1.0             0.0             0.0             0.0   
23           1.0             0.0             0.0             0.0   
24           1.0             1.0             0.0             0.0   
25           1.0             1.0             0.0             0.0   
26           1.0             1.0             0.0             0.0   
27           1.0             1.0             0.0             0.0   
28           1.0             1.0             0.0             0.0   
29           1.0             1.0             0.0             0.0   
...          ...             ...             ...             ...   
35460        1.0             0.0             0.0             0.0   
35461        1.0             0.0             0.0             0.0   
35462        1.0             0.0             0.0             0.0   
35463        1.0             0.0             0.0             0.0   
35464        1.0             0.0             0.0             0.0   
35465        1.0             0.0             0.0             0.0   
35466        1.0             0.0             0.0             0.0   
35467        1.0             0.0             0.0             0.0   
35468        1.0             0.0             0.0             0.0   
35469        1.0             0.0             0.0             0.0   
35470        1.0             0.0             0.0             0.0   
35471        1.0             0.0             0.0             0.0   
35472        1.0             0.0             0.0             0.0   
35473        1.0             0.0             0.0             0.0   
35474        1.0             0.0             0.0             0.0   
35475        1.0             0.0             0.0             0.0   
35476        1.0             0.0             0.0             0.0   
35477        1.0             0.0             0.0             0.0   
35478        1.0             0.0             0.0             0.0   
35479        1.0             0.0             0.0             0.0   
35480        1.0             0.0             0.0             0.0   
35481        1.0             0.0             0.0             0.0   
35482        1.0             0.0             0.0             0.0   
35483        1.0             0.0             0.0             0.0   
35484        1.0             0.0             0.0             0.0   
35485        1.0             0.0             0.0             0.0   
35486        1.0             0.0             0.0             0.0   
35487        1.0             0.0             0.0             0.0   
35488        1.0             0.0             0.0             0.0   
35489        1.0             0.0             0.0             0.0   

       C(imonth)[T.5]  C(imonth)[T.6]  C(imonth)[T.7]  C(imonth)[T.8]  \
0                 0.0             0.0             0.0             0.0   
1                 0.0             0.0             0.0             0.0   
2                 0.0             0.0             0.0             0.0   
3                 0.0             0.0             0.0             0.0   
4                 0.0             0.0             0.0             0.0   
5                 0.0             0.0             0.0             0.0   
6                 0.0             0.0             0.0             0.0   
7                 0.0             0.0             0.0             0.0   
8                 0.0             0.0             0.0             0.0   
9                 0.0             0.0             0.0             0.0   
10                0.0             0.0             0.0             0.0   
11                0.0             0.0             0.0             0.0   
12                0.0             0.0             0.0             0.0   
13                0.0             0.0             0.0             0.0   
14                0.0             0.0             0.0             0.0   
15                0.0             0.0             0.0             0.0   
16                0.0             0.0             0.0             0.0   
17                0.0             0.0             0.0             0.0   
18                0.0             0.0             0.0             0.0   
19                0.0             0.0             0.0             0.0   
20                0.0             0.0             0.0             0.0   
21                0.0             0.0             0.0             0.0   
22                0.0             0.0             0.0             0.0   
23                0.0             0.0             0.0             0.0   
24                0.0             0.0             0.0             0.0   
25                0.0             0.0             0.0             0.0   
26                0.0             0.0             0.0             0.0   
27                0.0             0.0             0.0             0.0   
28                0.0             0.0             0.0             0.0   
29                0.0             0.0             0.0             0.0   
...               ...             ...             ...             ...   
35460             0.0             0.0             0.0             0.0   
35461             0.0             0.0             0.0             0.0   
35462             0.0             0.0             0.0             0.0   
35463             0.0             0.0             0.0             0.0   
35464             0.0             0.0             0.0             0.0   
35465             0.0             0.0             0.0             0.0   
35466             0.0             0.0             0.0             0.0   
35467             0.0             0.0             0.0             0.0   
35468             0.0             0.0             0.0             0.0   
35469             0.0             0.0             0.0             0.0   
35470             0.0             0.0             0.0             0.0   
35471             0.0             0.0             0.0             0.0   
35472             0.0             0.0             0.0             0.0   
35473             0.0             0.0             0.0             0.0   
35474             0.0             0.0             0.0             0.0   
35475             0.0             0.0             0.0             0.0   
35476             0.0             0.0             0.0             0.0   
35477             0.0             0.0             0.0             0.0   
35478             0.0             0.0             0.0             0.0   
35479             0.0             0.0             0.0             0.0   
35480             0.0             0.0             0.0             0.0   
35481             0.0             0.0             0.0             0.0   
35482             0.0             0.0             0.0             0.0   
35483             0.0             0.0             0.0             0.0   
35484             0.0             0.0             0.0             0.0   
35485             0.0             0.0             0.0             0.0   
35486             0.0             0.0             0.0             0.0   
35487             0.0             0.0             0.0             0.0   
35488             0.0             0.0             0.0             0.0   
35489             0.0             0.0             0.0             0.0   

       C(imonth)[T.9]  C(imonth)[T.10]     ...       C(nperps)[T.800.0]  \
0                 0.0              0.0     ...                      0.0   
1                 0.0              0.0     ...                      0.0   
2                 0.0              0.0     ...                      0.0   
3                 0.0              0.0     ...                      0.0   
4                 0.0              0.0     ...                      0.0   
5                 0.0              0.0     ...                      0.0   
6                 0.0              0.0     ...                      0.0   
7                 0.0              0.0     ...                      0.0   
8                 0.0              0.0     ...                      0.0   
9                 0.0              0.0     ...                      0.0   
10                0.0              0.0     ...                      0.0   
11                0.0              0.0     ...                      0.0   
12                0.0              0.0     ...                      0.0   
13                0.0              0.0     ...                      0.0   
14                0.0              0.0     ...                      0.0   
15                0.0              0.0     ...                      0.0   
16                0.0              0.0     ...                      0.0   
17                0.0              0.0     ...                      0.0   
18                0.0              0.0     ...                      0.0   
19                0.0              0.0     ...                      0.0   
20                0.0              0.0     ...                      0.0   
21                0.0              0.0     ...                      0.0   
22                0.0              0.0     ...                      0.0   
23                0.0              0.0     ...                      0.0   
24                0.0              0.0     ...                      0.0   
25                0.0              0.0     ...                      0.0   
26                0.0              0.0     ...                      0.0   
27                0.0              0.0     ...                      0.0   
28                0.0              0.0     ...                      0.0   
29                0.0              0.0     ...                      0.0   
...               ...              ...     ...                      ...   
35460             0.0              0.0     ...                      0.0   
35461             0.0              0.0     ...                      0.0   
35462             0.0              0.0     ...                      0.0   
35463             0.0              0.0     ...                      0.0   
35464             0.0              0.0     ...                      0.0   
35465             0.0              0.0     ...                      0.0   
35466             0.0              0.0     ...                      0.0   
35467             0.0              0.0     ...                      0.0   
35468             0.0              0.0     ...                      0.0   
35469             0.0              0.0     ...                      0.0   
35470             0.0              0.0     ...                      0.0   
35471             0.0              0.0     ...                      0.0   
35472             0.0              0.0     ...                      0.0   
35473             0.0              0.0     ...                      0.0   
35474             0.0              0.0     ...                      0.0   
35475             0.0              0.0     ...                      0.0   
35476             0.0              0.0     ...                      0.0   
35477             0.0              0.0     ...                      0.0   
35478             0.0              0.0     ...                      0.0   
35479             0.0              0.0     ...                      0.0   
35480             0.0              0.0     ...                      0.0   
35481             0.0              0.0     ...                      0.0   
35482             0.0              0.0     ...                      0.0   
35483             0.0              0.0     ...                      0.0   
35484             0.0              0.0     ...                      0.0   
35485             0.0              0.0     ...                      0.0   
35486             0.0              0.0     ...                      0.0   
35487             0.0              0.0     ...                      0.0   
35488             0.0              0.0     ...                      0.0   
35489             0.0              0.0     ...                      0.0   

       C(nperps)[T.900.0]  C(nperps)[T.1000.0]  C(nperps)[T.1200.0]  \
0                     0.0                  0.0                  0.0   
1                     0.0                  0.0                  0.0   
2                     0.0                  0.0                  0.0   
3                     0.0                  0.0                  0.0   
4                     0.0                  0.0                  0.0   
5                     0.0                  0.0                  0.0   
6                     0.0                  0.0                  0.0   
7                     0.0                  0.0                  0.0   
8                     0.0                  0.0                  0.0   
9                     0.0                  0.0                  0.0   
10                    0.0                  0.0                  0.0   
11                    0.0                  0.0                  0.0   
12                    0.0                  0.0                  0.0   
13                    0.0                  0.0                  0.0   
14                    0.0                  0.0                  0.0   
15                    0.0                  0.0                  0.0   
16                    0.0                  0.0                  0.0   
17                    0.0                  0.0                  0.0   
18                    0.0                  0.0                  0.0   
19                    0.0                  0.0                  0.0   
20                    0.0                  0.0                  0.0   
21                    0.0                  0.0                  0.0   
22                    0.0                  0.0                  0.0   
23                    0.0                  0.0                  0.0   
24                    0.0                  0.0                  0.0   
25                    0.0                  0.0                  0.0   
26                    0.0                  0.0                  0.0   
27                    0.0                  0.0                  0.0   
28                    0.0                  0.0                  0.0   
29                    0.0                  0.0                  0.0   
...                   ...                  ...                  ...   
35460                 0.0                  0.0                  0.0   
35461                 0.0                  0.0                  0.0   
35462                 0.0                  0.0                  0.0   
35463                 0.0                  0.0                  0.0   
35464                 0.0                  0.0                  0.0   
35465                 0.0                  0.0                  0.0   
35466                 0.0                  0.0                  0.0   
35467                 0.0                  0.0                  0.0   
35468                 0.0                  0.0                  0.0   
35469                 0.0                  0.0                  0.0   
35470                 0.0                  0.0                  0.0   
35471                 0.0                  0.0                  0.0   
35472                 0.0                  0.0                  0.0   
35473                 0.0                  0.0                  0.0   
35474                 0.0                  0.0                  0.0   
35475                 0.0                  0.0                  0.0   
35476                 0.0                  0.0                  0.0   
35477                 0.0                  0.0                  0.0   
35478                 0.0                  0.0                  0.0   
35479                 0.0                  0.0                  0.0   
35480                 0.0                  0.0                  0.0   
35481                 0.0                  0.0                  0.0   
35482                 0.0                  0.0                  0.0   
35483                 0.0                  0.0                  0.0   
35484                 0.0                  0.0                  0.0   
35485                 0.0                  0.0                  0.0   
35486                 0.0                  0.0                  0.0   
35487                 0.0                  0.0                  0.0   
35488                 0.0                  0.0                  0.0   
35489                 0.0                  0.0                  0.0   

       C(nperps)[T.1500.0]  C(nperps)[T.2000.0]  C(nperps)[T.3000.0]  \
0                      0.0                  0.0                  0.0   
1                      0.0                  0.0                  0.0   
2                      0.0                  0.0                  0.0   
3                      0.0                  0.0                  0.0   
4                      0.0                  0.0                  0.0   
5                      0.0                  0.0                  0.0   
6                      0.0                  0.0                  0.0   
7                      0.0                  0.0                  0.0   
8                      0.0                  0.0                  0.0   
9                      0.0                  0.0                  0.0   
10                     0.0                  0.0                  0.0   
11                     0.0                  0.0                  0.0   
12                     0.0                  0.0                  0.0   
13                     0.0                  0.0                  0.0   
14                     0.0                  0.0                  0.0   
15                     0.0                  0.0                  0.0   
16                     0.0                  0.0                  0.0   
17                     0.0                  0.0                  0.0   
18                     0.0                  0.0                  0.0   
19                     0.0                  0.0                  0.0   
20                     0.0                  0.0                  0.0   
21                     0.0                  0.0                  0.0   
22                     0.0                  0.0                  0.0   
23                     0.0                  0.0                  0.0   
24                     0.0                  0.0                  0.0   
25                     0.0                  0.0                  0.0   
26                     0.0                  0.0                  0.0   
27                     0.0                  0.0                  0.0   
28                     0.0                  0.0                  0.0   
29                     0.0                  0.0                  0.0   
...                    ...                  ...                  ...   
35460                  0.0                  0.0                  0.0   
35461                  0.0                  0.0                  0.0   
35462                  0.0                  0.0                  0.0   
35463                  0.0                  0.0                  0.0   
35464                  0.0                  0.0                  0.0   
35465                  0.0                  0.0                  0.0   
35466                  0.0                  0.0                  0.0   
35467                  0.0                  0.0                  0.0   
35468                  0.0                  0.0                  0.0   
35469                  0.0                  0.0                  0.0   
35470                  0.0                  0.0                  0.0   
35471                  0.0                  0.0                  0.0   
35472                  0.0                  0.0                  0.0   
35473                  0.0                  0.0                  0.0   
35474                  0.0                  0.0                  0.0   
35475                  0.0                  0.0                  0.0   
35476                  0.0                  0.0                  0.0   
35477                  0.0                  0.0                  0.0   
35478                  0.0                  0.0                  0.0   
35479                  0.0                  0.0                  0.0   
35480                  0.0                  0.0                  0.0   
35481                  0.0                  0.0                  0.0   
35482                  0.0                  0.0                  0.0   
35483                  0.0                  0.0                  0.0   
35484                  0.0                  0.0                  0.0   
35485                  0.0                  0.0                  0.0   
35486                  0.0                  0.0                  0.0   
35487                  0.0                  0.0                  0.0   
35488                  0.0                  0.0                  0.0   
35489                  0.0                  0.0                  0.0   

       C(nperps)[T.5000.0]  region  specificity  
0                      0.0     1.0          1.0  
1                      0.0     1.0          1.0  
2                      0.0     1.0          1.0  
3                      0.0     1.0          1.0  
4                      0.0     1.0          1.0  
5                      0.0     1.0          1.0  
6                      0.0     1.0          1.0  
7                      0.0     1.0          1.0  
8                      0.0     1.0          1.0  
9                      0.0     1.0          1.0  
10                     0.0     1.0          1.0  
11                     0.0     3.0          1.0  
12                     0.0     1.0          1.0  
13                     0.0     1.0          1.0  
14                     0.0     1.0          1.0  
15                     0.0     1.0          1.0  
16                     0.0     1.0          1.0  
17                     0.0     1.0          1.0  
18                     0.0     1.0          1.0  
19                     0.0     1.0          1.0  
20                     0.0     1.0          1.0  
21                     0.0     1.0          1.0  
22                     0.0     1.0          1.0  
23                     0.0     1.0          1.0  
24                     0.0     1.0          1.0  
25                     0.0     1.0          1.0  
26                     0.0     1.0          1.0  
27                     0.0     1.0          1.0  
28                     0.0     1.0          1.0  
29                     0.0     1.0          1.0  
...                    ...     ...          ...  
35460                  0.0     5.0          3.0  
35461                  0.0     5.0          4.0  
35462                  0.0     6.0          1.0  
35463                  0.0    10.0          1.0  
35464                  0.0    10.0          1.0  
35465                  0.0     6.0          1.0  
35466                  0.0     6.0          1.0  
35467                  0.0     6.0          3.0  
35468                  0.0     5.0          1.0  
35469                  0.0     6.0          1.0  
35470                  0.0     6.0          3.0  
35471                  0.0     6.0          3.0  
35472                  0.0     6.0          1.0  
35473                  0.0     6.0          3.0  
35474                  0.0    10.0          1.0  
35475                  0.0    10.0          1.0  
35476                  0.0    10.0          1.0  
35477                  0.0    11.0          4.0  
35478                  0.0     6.0          3.0  
35479                  0.0     6.0          3.0  
35480                  0.0     6.0          3.0  
35481                  0.0    10.0          1.0  
35482                  0.0    10.0          1.0  
35483                  0.0    10.0          4.0  
35484                  0.0    10.0          2.0  
35485                  0.0    10.0          1.0  
35486                  0.0    10.0          4.0  
35487                  0.0     9.0          1.0  
35488                  0.0     8.0          1.0  
35489                  0.0    11.0          1.0  

[35490 rows x 150 columns]

In [24]:
y = np.ravel(y)
# instantiate a logistic regression model, and fit with X and y
model = LogisticRegression()
model =, y)
# what percentage had multiple?
b = y.mean()
# check the accuracy on the training set
a = model.score(X, y)

# evaluate the model by splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
model2 = LogisticRegression(), y_train)
# predict class labels for the test set
predicted = model2.predict(X_test)
print (predicted)
# generate class probabilities
probs = model2.predict_proba(X_test)
print (probs)
# generate evaluation metrics
print (metrics.accuracy_score(y_test, predicted))
print (metrics.roc_auc_score(y_test, probs[:, 1]))
print (metrics.confusion_matrix(y_test, predicted))
print (metrics.classification_report(y_test, predicted))
scores = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
print (scores)
print (scores.mean())
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, predicted)
roc_auc = auc(false_positive_rate, true_positive_rate)
print('AUC = %0.4f'% roc_auc)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')

[ 1.  1.  1. ...,  1.  1.  1.]
[[ 0.01257728  0.98742272]
 [ 0.01640847  0.98359153]
 [ 0.01318311  0.98681689]
 [ 0.0046974   0.9953026 ]
 [ 0.00399588  0.99600412]
 [ 0.00653373  0.99346627]]
[[  222   187]
 [  107 10131]]
             precision    recall  f1-score   support

        0.0       0.67      0.54      0.60       409
        1.0       0.98      0.99      0.99     10238

avg / total       0.97      0.97      0.97     10647

[ 0.96365173  0.95886165  0.97351367  0.97210482  0.97295013  0.97633136
  0.97097774  0.96844181  0.97266836  0.96816005]
AUC = 0.7662

In [ ]:

Logistic Regression - Success

Logistic Regression - MULTIPLE

In [25]:
y, X = dmatrices('multiple ~ C(imonth) + C(iday) + region + C(property) + C(propextent) + C(attacktype1) + C(weaptype1)+ C(nperps) + specificity', df, return_type="dataframe")

In [26]:
y = np.ravel(y)
# instantiate a logistic regression model, and fit with X and y
model = LogisticRegression()
model =, y)
# what percentage had multiple?
b = y.mean()
# check the accuracy on the training set
a = model.score(X, y)

# evaluate the model by splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
model2 = LogisticRegression(), y_train)
# predict class labels for the test set
predicted = model2.predict(X_test)
print (predicted)
# generate class probabilities
probs = model2.predict_proba(X_test)
print (probs)
# generate evaluation metrics
print (metrics.accuracy_score(y_test, predicted))
print (metrics.roc_auc_score(y_test, probs[:, 1]))
print (metrics.confusion_matrix(y_test, predicted))
print (metrics.classification_report(y_test, predicted))
scores = cross_val_score(LogisticRegression(), X, y, scoring='accuracy', cv=10)
print (scores)
print (scores.mean())
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, predicted)
roc_auc = auc(false_positive_rate, true_positive_rate)
print('AUC = %0.4f'% roc_auc)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')

[ 0.  0.  0. ...,  0.  0.  0.]
[[ 0.85340288  0.14659712]
 [ 0.81828894  0.18171106]
 [ 0.78779969  0.21220031]
 [ 0.91212401  0.08787599]
 [ 0.81110209  0.18889791]
 [ 0.84036816  0.15963184]]
[[8859   10]
 [1766   12]]
             precision    recall  f1-score   support

        0.0       0.83      1.00      0.91      8869
        1.0       0.55      0.01      0.01      1778

avg / total       0.79      0.83      0.76     10647

[ 0.82619718  0.82901408  0.83042254  0.82952944  0.82981121  0.83009298
  0.83009298  0.8277903   0.82807215  0.82919955]
AUC = 0.5028


Random Forest- MULTIPLE

In [31]:
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd

In [56]:
y = df['multiple']

In [57]:
X = df.filter(['imonth', 'iday', 'region','property',
Xone= pd.get_dummies(X, prefix='month', columns=['imonth'])
Xtwo= pd.get_dummies(Xone, prefix='day', columns=['iday'])
Xthree= pd.get_dummies(Xtwo, prefix='region', columns=['region'])
Xfour= pd.get_dummies(Xthree, prefix='attacktype', columns=['attacktype1'])
Xfive= pd.get_dummies(Xfour, prefix='weapontype', columns=['weaptype1'])
Xsix= pd.get_dummies(Xfive, prefix='specificity', columns=['specificity'])

In [ ]:

In [ ]:

In [58]:
features_train, features_test,target_train, target_test = train_test_split(Xsix,y, test_size = 0.2,random_state=0)

In [59]:
print("Benchmark: " )


In [60]:
#Random Forest
forest = features_train, target_train)
output = forest.predict(features_test).astype(int)
forest.score(features_train, target_train )


In [49]:
false_positive_rate, true_positive_rate, thresholds = roc_curve(target_test, output)
roc_auc = auc(false_positive_rate, true_positive_rate)
print('AUC = %0.4f'% roc_auc)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')

AUC = 0.7165

In [50]:
scores = cross_val_score(forest, X, y, scoring='accuracy', cv=10)
print (scores)
print (scores.mean())

[ 0.82056338  0.79070423  0.80957746  0.79120879  0.78472809  0.77571147
  0.79064525  0.75704622  0.77874859  0.78889515]

In [ ]:

In [ ]:

Random Forest- SUCCESS

In [43]:
y = df['success']
X = df.filter(['imonth', 'iday', 'region','property',
features_train, features_test,target_train, target_test = train_test_split(X,y, test_size = 0.2,random_state=0)
#Random Forest
forest = features_train, target_train)
output = forest.predict(features_test).astype(int)
score = forest.score(features_train, target_train)
print("Benchmark: " )
print('Our Accuracy:')
false_positive_rate, true_positive_rate, thresholds = roc_curve(target_test, output)
roc_auc = auc(false_positive_rate, true_positive_rate)
print('AUC = %0.4f'% roc_auc)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')

Our Accuracy:
AUC = 0.7688

Preventing Overfitting of the tree for multiple model

The results are different now due to the different sample used from here as compared to when we built the model shown during presentation; as such, results may vary slightly

In [53]:
from sklearn.tree import _tree

def leaf_depths(tree, node_id = 0):
     tree.children_left and tree.children_right store ids
     of left and right chidren of a given node
     left_child = tree.children_left[node_id]
     right_child = tree.children_right[node_id]

     If a given node is terminal, 
     both left and right children are set to _tree.TREE_LEAF
     if left_child == _tree.TREE_LEAF:
         Set depth of terminal nodes to 0
         depths = np.array([0])

         Get depths of left and right children and
         increment them by 1
         left_depths = leaf_depths(tree, left_child) + 1
         right_depths = leaf_depths(tree, right_child) + 1
         depths = np.append(left_depths, right_depths)
     return depths

def leaf_samples(tree, node_id = 0):
     left_child = tree.children_left[node_id]
     right_child = tree.children_right[node_id]

     if left_child == _tree.TREE_LEAF:
         samples = np.array([tree.n_node_samples[node_id]])

         left_samples = leaf_samples(tree, left_child)
         right_samples = leaf_samples(tree, right_child)

         samples = np.append(left_samples, right_samples)

     return samples

def draw_tree(ensemble, tree_id=0):


     tree = ensemble.estimators_[tree_id].tree_

     depths = leaf_depths(tree)
     plt.hist(depths, histtype='step', color='#9933ff', 
              bins=range(min(depths), max(depths)+1))

     plt.xlabel("Depth of leaf nodes (tree %s)" % tree_id)
     samples = leaf_samples(tree)
     plt.hist(samples, histtype='step', color='#3399ff', 
              bins=range(min(samples), max(samples)+1))
     plt.xlabel("Number of samples in leaf nodes (tree %s)" % tree_id)
def draw_ensemble(ensemble):


     depths_all = np.array([], dtype=int)

     for x in ensemble.estimators_:
         tree = x.tree_
         depths = leaf_depths(tree)
         depths_all = np.append(depths_all, depths)
         plt.hist(depths, histtype='step', color='#ddaaff', 
                  bins=range(min(depths), max(depths)+1))

     plt.hist(depths_all, histtype='step', color='#9933ff', 
              bins=range(min(depths_all), max(depths_all)+1), 
     plt.xlabel("Depth of leaf nodes")
     samples_all = np.array([], dtype=int)
     for x in ensemble.estimators_:
         tree = x.tree_
         samples = leaf_samples(tree)
         samples_all = np.append(samples_all, samples)
         plt.hist(samples, histtype='step', color='#aaddff', 
                  bins=range(min(samples), max(samples)+1))
     plt.hist(samples_all, histtype='step', color='#3399ff', 
              bins=range(min(samples_all), max(samples_all)+1), 
     plt.xlabel("Number of samples in leaf nodes")

In [61]:

In [62]:

In [64]:
y = df['multiple']
X = df.filter(['imonth', 'iday', 'region','property',
features_train, features_test,target_train, target_test = train_test_split(X,y, test_size = 0.2,random_state=0)
#Random Forest
forest=RandomForestClassifier(n_estimators=10, max_depth = 16)
forest = features_train, target_train)
output = forest.predict(features_test).astype(int)
score = forest.score(features_train, target_train)
print("Benchmark: " )
print('Our Accuracy:')
false_positive_rate, true_positive_rate, thresholds = roc_curve(target_test, output)
roc_auc = auc(false_positive_rate, true_positive_rate)
print('AUC = %0.4f'% roc_auc)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')

Our Accuracy:
AUC = 0.6205


In [ ]:
import pandas as pd

In [ ]:
df = pd.read_csv('/Users/Laishumin/Datasets/globalterrorism.csv', encoding='ISO-8859-1',low_memory=False)


In [ ]:
df_dummies1= pd.get_dummies(clean, prefix='month', columns=['imonth'])

In [ ]:
df_dummies2= pd.get_dummies(df_dummies1, prefix='region', columns=['region'])

In [ ]:
df_dummies3= pd.get_dummies(df_dummies2, prefix='specificity', columns=['specificity'])

In [ ]:
df_dummies4= pd.get_dummies(df_dummies3, prefix='attack_type', columns=['attacktype1'])

In [ ]:
df_dummies5= pd.get_dummies(df_dummies4, prefix='main_weapon_type', columns=['weaptype1'])

In [ ]:
data = df_dummies5
del data['iyear']
del data['iday']
del data['guncertain1']
del data['ingroup']
del data['doubtterr']

In [ ]:
names = list(data.columns.values)

In [ ]:
lift_multiple = []
for i in names:
    num_Feature = 0
    Count = 0
    for sample in data1[i]:
        thing = data1[i].astype(str).str.contains('1')
        if (thing.iloc[Count] == True):  
            num_Feature += 1
            Count +=1
            Count +=1
    print("{0} ".format(num_Feature) + " from " + i)

    rule_valid = 0
    rule_invalid = 0
    for j in range(len(data1)):
        if data1.iloc[j][i] == 1:
            if data1.iloc[j].multiple == 1:
                rule_valid += 1
                rule_invalid += 1
    print("{0} cases of the rule being valid were discovered".format(rule_valid))
    print("{0} cases of the rule being invalid were discovered".format(rule_invalid))

    # Now we have all the information needed to compute Support and Confidence
    support = rule_valid  # The Support is the number of times the rule is discovered.
    if (num_Feature == 0):

        confidence = (rule_valid) / (num_Feature) 
        lift = confidence / 0.13
    print(i + '-->Multiple')
    print("The support is {0}, the confidence is {1:.3f}, and the lift is {2:.3f}.".format(support, confidence, lift))
    print("As a percentage, the confidence is {0:.1f}%.".format(100 * confidence))

In [ ]:
lift_multiple_pd = pd.DataFrame(


In [ ]:
graph = lift_multiple_pd.sort(['Lift'], ascending=[0])


In [ ]:
%matplotlib inline

Violin Plot Visualisastions

In [8]:
import numpy as np
import seaborn as sns
import pandas as pd
sns.violinplot(x="weaptype1", y="success", data=df, palette="Set3")

<matplotlib.axes._subplots.AxesSubplot at 0x1170ef048>

In [9]:
sns.violinplot(x="propextent", y="multiple", data=df, palette="Set3")

<matplotlib.axes._subplots.AxesSubplot at 0x1170ef048>

In [10]:
sns.violinplot(x="imonth", y="multiple", data=df, palette="Set3")

<matplotlib.axes._subplots.AxesSubplot at 0x1170ef048>

In [11]:
sns.violinplot(x="property", y="multiple", data=df, palette="Set3")

<matplotlib.axes._subplots.AxesSubplot at 0x1170ef048>

In [ ]:

In [ ]: