Preliminary Analysis


In [1]:
import csv
with open("file.csv", encoding="ISO-8859-1") as csv_data:
    parsed_data = list(csv.DictReader(csv_data, delimiter=',', quotechar='"'))
    all_attacks = len(parsed_data)

Num of Attack from 2010 to 2015


In [2]:
print(len([x for x in parsed_data if 2010<=int(x['iyear'])<=2015]))


62022

Num of Attack from 2000 to 2010


In [3]:
print(len([x for x in parsed_data if 2000<=int(x['iyear'])<=2015]))


87009

10 countries with the highest number of terrorist attacks:


In [4]:
from collections import Counter
countries_data = Counter([x['country_txt'] for x in parsed_data])
for country, attacks in countries_data.most_common(10):
    print("{a} attacks in {c} - {p:.2f}% of all attacks".format(a=attacks, c=country, p=attacks/all_attacks*100))


18770 attacks in Iraq - 11.97% of all attacks
12768 attacks in Pakistan - 8.14% of all attacks
9940 attacks in India - 6.34% of all attacks
9690 attacks in Afghanistan - 6.18% of all attacks
8077 attacks in Colombia - 5.15% of all attacks
6085 attacks in Peru - 3.88% of all attacks
5576 attacks in Philippines - 3.56% of all attacks
5320 attacks in El Salvador - 3.39% of all attacks
4992 attacks in United Kingdom - 3.18% of all attacks
3557 attacks in Turkey - 2.27% of all attacks

Distribution of terrorist attacks in areas of the world:


In [5]:
from collections import Counter
from matplotlib import pyplot
regions_data = Counter([x['region_txt'] for x in parsed_data])
pyplot.pie(list(regions_data.values()), labels=list(regions_data.keys()), radius=2)
for region, attacks in Counter([x['region_txt'] for x in parsed_data]).most_common():
    print("{a} attacks in {r} - {p:.2f}% of all attacks".format(a=attacks, r=region, p=attacks/all_attacks*100))
pyplot.show()


40422 attacks in Middle East & North Africa - 25.78% of all attacks
37841 attacks in South Asia - 24.14% of all attacks
18628 attacks in South America - 11.88% of all attacks
16020 attacks in Western Europe - 10.22% of all attacks
13434 attacks in Sub-Saharan Africa - 8.57% of all attacks
10360 attacks in Southeast Asia - 6.61% of all attacks
10337 attacks in Central America & Caribbean - 6.59% of all attacks
4892 attacks in Eastern Europe - 3.12% of all attacks
3268 attacks in North America - 2.08% of all attacks
786 attacks in East Asia - 0.50% of all attacks
538 attacks in Central Asia - 0.34% of all attacks
246 attacks in Australasia & Oceania - 0.16% of all attacks

Attack in Singapore / Attack not in Singapore


In [6]:
print(len([x for x in parsed_data if x['country_txt']=="Singapore"])/len([x for x in parsed_data if x['country_txt']!="Singapore"]))


4.465282429113641e-05

10 cities with the highest number of terrorist attacks:


In [7]:
for city, attacks in Counter([x['city'] for x in parsed_data if x['city']!='Unknown']).most_common(10):
    print("{a} attacks in {c} - {p:.2f}% of all attacks".format(a=attacks, c=city, p=attacks/all_attacks*100))


6237 attacks in Baghdad - 3.98% of all attacks
2530 attacks in Karachi - 1.61% of all attacks
2358 attacks in Lima - 1.50% of all attacks
2102 attacks in Belfast - 1.34% of all attacks
1614 attacks in Santiago - 1.03% of all attacks
1553 attacks in Mosul - 0.99% of all attacks
1547 attacks in San Salvador - 0.99% of all attacks
1169 attacks in Mogadishu - 0.75% of all attacks
999 attacks in Istanbul - 0.64% of all attacks
974 attacks in Bogota - 0.62% of all attacks

In [8]:
#Most attack in USA
attacks_in_usa = [x for x in parsed_data if x['country_txt']=="United States"]
for state, attacks in Counter([x['provstate'] for x in attacks_in_usa]).most_common(20):
    print("{a} attacks in {s}".format(a=attacks, s=state))


584 attacks in California
496 attacks in New York
247 attacks in Puerto Rico
145 attacks in Florida
110 attacks in Illinois
95 attacks in Washington
82 attacks in District of Columbia
63 attacks in Oregon
55 attacks in Texas
53 attacks in Massachusetts
49 attacks in Ohio
44 attacks in Michigan
43 attacks in Colorado
43 attacks in New Jersey
38 attacks in Virginia
37 attacks in Wisconsin
34 attacks in Maryland
34 attacks in Arizona
33 attacks in Missouri
30 attacks in North Carolina

In [9]:
#Percentage of terrorist attack in USA
for state, attacks in Counter([x['provstate'] for x in attacks_in_usa]).most_common(20):
    print("{p:.2f}% of all attacks in USA happened in {s}".format(a=attacks, s=state, p=attacks/len(attacks_in_usa)*100))


21.69% of all attacks in USA happened in California
18.42% of all attacks in USA happened in New York
9.17% of all attacks in USA happened in Puerto Rico
5.38% of all attacks in USA happened in Florida
4.08% of all attacks in USA happened in Illinois
3.53% of all attacks in USA happened in Washington
3.04% of all attacks in USA happened in District of Columbia
2.34% of all attacks in USA happened in Oregon
2.04% of all attacks in USA happened in Texas
1.97% of all attacks in USA happened in Massachusetts
1.82% of all attacks in USA happened in Ohio
1.63% of all attacks in USA happened in Michigan
1.60% of all attacks in USA happened in Colorado
1.60% of all attacks in USA happened in New Jersey
1.41% of all attacks in USA happened in Virginia
1.37% of all attacks in USA happened in Wisconsin
1.26% of all attacks in USA happened in Maryland
1.26% of all attacks in USA happened in Arizona
1.23% of all attacks in USA happened in Missouri
1.11% of all attacks in USA happened in North Carolina

Most Dangerous Region


In [10]:
for area, code in (("region","region_txt"),("country","country_txt"),("city","city")):
    name, i = Counter([x[code] for x in parsed_data]).most_common(1)[0]
    print("The most dangerous {a} is {n} with {i} attacks".format(a=area,n=name,i=i))


The most dangerous region is Middle East & North Africa with 40422 attacks
The most dangerous country is Iraq with 18770 attacks
The most dangerous city is Unknown with 8728 attacks

Success Rate


In [11]:
successful_attacks = len([x for x in parsed_data if int(x['success'])])
print("{s} attacks were successful, it's {p:.2f}% of all attacks".format(s=successful_attacks, p=successful_attacks/all_attacks*100))


141661 attacks were successful, it's 90.36% of all attacks

Type of Attack


In [12]:
types_data = Counter([x['attacktype1_txt'] for x in parsed_data])
for attack_type, attacks in types_data.most_common(10):
    print("{a} {t} attacks - {p:.2f}% of all attacks".format(a=attacks, t=attack_type, p=attacks/all_attacks*100))


75963 Bombing/Explosion attacks - 48.45% of all attacks
37554 Armed Assault attacks - 23.95% of all attacks
17582 Assassination attacks - 11.22% of all attacks
9115 Hostage Taking (Kidnapping) attacks - 5.81% of all attacks
8849 Facility/Infrastructure Attack attacks - 5.64% of all attacks
5490 Unknown attacks - 3.50% of all attacks
835 Hostage Taking (Barricade Incident) attacks - 0.53% of all attacks
828 Unarmed Assault attacks - 0.53% of all attacks
556 Hijacking attacks - 0.35% of all attacks

Target


In [13]:
targets_data = Counter([x['targtype1_txt'] for x in parsed_data])
for attack_target, attacks in targets_data.most_common(10):
    print("{a} attacks on {t} - {p:.2f}% of all attacks".format(a=attacks, t=attack_target, p=attacks/all_attacks*100))


35877 attacks on Private Citizens & Property - 22.88% of all attacks
22924 attacks on Military - 14.62% of all attacks
21241 attacks on Police - 13.55% of all attacks
19251 attacks on Government (General) - 12.28% of all attacks
18882 attacks on Business - 12.04% of all attacks
6419 attacks on Transportation - 4.09% of all attacks
5504 attacks on Utilities - 3.51% of all attacks
3947 attacks on Educational Institution - 2.52% of all attacks
3891 attacks on Religious Figures/Institutions - 2.48% of all attacks
3805 attacks on Unknown - 2.43% of all attacks

Attacker Nationality


In [14]:
nationalites_data = Counter([x['natlty1_txt'] for x in parsed_data])
for nationality, attacks in nationalites_data.most_common(10):
    print("{a} attacks by citizens of {n} - {p:.2f}% of all attacks".format(a=attacks, n=nationality, p=attacks/all_attacks*100))


18284 attacks by citizens of Iraq - 11.66% of all attacks
12314 attacks by citizens of Pakistan - 7.85% of all attacks
10063 attacks by citizens of India - 6.42% of all attacks
8234 attacks by citizens of Afghanistan - 5.25% of all attacks
7698 attacks by citizens of Colombia - 4.91% of all attacks
5829 attacks by citizens of Peru - 3.72% of all attacks
5371 attacks by citizens of Philippines - 3.43% of all attacks
5212 attacks by citizens of El Salvador - 3.32% of all attacks
4886 attacks by citizens of United States - 3.12% of all attacks
3869 attacks by citizens of Turkey - 2.47% of all attacks

Num of Attacks per Year


In [15]:
import numpy
attacks_by_year = Counter([x['iyear'] for x in parsed_data if int(x["iyear"])>=1985])
labels, values = zip(*sorted(attacks_by_year.items(), key=lambda x: x[0]))

indexes = numpy.arange(len(labels))
width = 1

pyplot.bar(indexes, values, width)
pyplot.xticks(indexes + width * 0.5, labels, rotation='vertical')
pyplot.show()


Weapons used


In [16]:
from collections import OrderedDict
countries = sorted(set(x['country_txt'] for x in parsed_data))
favoriteweaponsby_country = OrderedDict()
for country in countries:
    attacks_in_country = [x for x in parsed_data if x['country_txt']==country]
    favoriteweaponsby_country[country] = Counter(x['weaptype1_txt'] for x in attacks_in_country).most_common(1)[0]
for country, weapon in favoriteweaponsby_country.items():
    weapon, number = weapon
    print("The most common weapon used to perform attack in {c} is {w}, it was used {n} times".format(c=country, w=weapon, n=number))


The most common weapon used to perform attack in Afghanistan is Explosives/Bombs/Dynamite, it was used 5457 times
The most common weapon used to perform attack in Albania is Explosives/Bombs/Dynamite, it was used 49 times
The most common weapon used to perform attack in Algeria is Explosives/Bombs/Dynamite, it was used 1083 times
The most common weapon used to perform attack in Andorra is Firearms, it was used 1 times
The most common weapon used to perform attack in Angola is Explosives/Bombs/Dynamite, it was used 255 times
The most common weapon used to perform attack in Antigua and Barbuda is Incendiary, it was used 1 times
The most common weapon used to perform attack in Argentina is Explosives/Bombs/Dynamite, it was used 487 times
The most common weapon used to perform attack in Armenia is Explosives/Bombs/Dynamite, it was used 10 times
The most common weapon used to perform attack in Australia is Explosives/Bombs/Dynamite, it was used 36 times
The most common weapon used to perform attack in Austria is Explosives/Bombs/Dynamite, it was used 54 times
The most common weapon used to perform attack in Azerbaijan is Firearms, it was used 21 times
The most common weapon used to perform attack in Bahamas is Firearms, it was used 2 times
The most common weapon used to perform attack in Bahrain is Explosives/Bombs/Dynamite, it was used 107 times
The most common weapon used to perform attack in Bangladesh is Explosives/Bombs/Dynamite, it was used 790 times
The most common weapon used to perform attack in Barbados is Explosives/Bombs/Dynamite, it was used 2 times
The most common weapon used to perform attack in Belarus is Explosives/Bombs/Dynamite, it was used 8 times
The most common weapon used to perform attack in Belgium is Explosives/Bombs/Dynamite, it was used 64 times
The most common weapon used to perform attack in Belize is Firearms, it was used 4 times
The most common weapon used to perform attack in Benin is Explosives/Bombs/Dynamite, it was used 4 times
The most common weapon used to perform attack in Bhutan is Explosives/Bombs/Dynamite, it was used 5 times
The most common weapon used to perform attack in Bolivia is Explosives/Bombs/Dynamite, it was used 244 times
The most common weapon used to perform attack in Bosnia-Herzegovina is Explosives/Bombs/Dynamite, it was used 106 times
The most common weapon used to perform attack in Botswana is Explosives/Bombs/Dynamite, it was used 9 times
The most common weapon used to perform attack in Brazil is Firearms, it was used 112 times
The most common weapon used to perform attack in Brunei is Explosives/Bombs/Dynamite, it was used 1 times
The most common weapon used to perform attack in Bulgaria is Explosives/Bombs/Dynamite, it was used 35 times
The most common weapon used to perform attack in Burkina Faso is Firearms, it was used 6 times
The most common weapon used to perform attack in Burundi is Firearms, it was used 219 times
The most common weapon used to perform attack in Cambodia is Explosives/Bombs/Dynamite, it was used 125 times
The most common weapon used to perform attack in Cameroon is Firearms, it was used 60 times
The most common weapon used to perform attack in Canada is Explosives/Bombs/Dynamite, it was used 40 times
The most common weapon used to perform attack in Central African Republic is Firearms, it was used 116 times
The most common weapon used to perform attack in Chad is Firearms, it was used 38 times
The most common weapon used to perform attack in Chile is Explosives/Bombs/Dynamite, it was used 1744 times
The most common weapon used to perform attack in China is Explosives/Bombs/Dynamite, it was used 162 times
The most common weapon used to perform attack in Colombia is Firearms, it was used 3285 times
The most common weapon used to perform attack in Comoros is Incendiary, it was used 3 times
The most common weapon used to perform attack in Costa Rica is Explosives/Bombs/Dynamite, it was used 35 times
The most common weapon used to perform attack in Croatia is Explosives/Bombs/Dynamite, it was used 40 times
The most common weapon used to perform attack in Cuba is Explosives/Bombs/Dynamite, it was used 12 times
The most common weapon used to perform attack in Cyprus is Explosives/Bombs/Dynamite, it was used 105 times
The most common weapon used to perform attack in Czech Republic is Explosives/Bombs/Dynamite, it was used 13 times
The most common weapon used to perform attack in Czechoslovakia is Explosives/Bombs/Dynamite, it was used 4 times
The most common weapon used to perform attack in Democratic Republic of the Congo is Firearms, it was used 204 times
The most common weapon used to perform attack in Denmark is Explosives/Bombs/Dynamite, it was used 15 times
The most common weapon used to perform attack in Djibouti is Explosives/Bombs/Dynamite, it was used 10 times
The most common weapon used to perform attack in Dominica is Firearms, it was used 2 times
The most common weapon used to perform attack in Dominican Republic is Explosives/Bombs/Dynamite, it was used 54 times
The most common weapon used to perform attack in East Germany (GDR) is Explosives/Bombs/Dynamite, it was used 15 times
The most common weapon used to perform attack in East Timor is Firearms, it was used 7 times
The most common weapon used to perform attack in Ecuador is Explosives/Bombs/Dynamite, it was used 119 times
The most common weapon used to perform attack in Egypt is Explosives/Bombs/Dynamite, it was used 871 times
The most common weapon used to perform attack in El Salvador is Firearms, it was used 2725 times
The most common weapon used to perform attack in Equatorial Guinea is Unknown, it was used 1 times
The most common weapon used to perform attack in Eritrea is Explosives/Bombs/Dynamite, it was used 4 times
The most common weapon used to perform attack in Estonia is Explosives/Bombs/Dynamite, it was used 8 times
The most common weapon used to perform attack in Ethiopia is Explosives/Bombs/Dynamite, it was used 64 times
The most common weapon used to perform attack in Falkland Islands is Explosives/Bombs/Dynamite, it was used 1 times
The most common weapon used to perform attack in Fiji is Incendiary, it was used 7 times
The most common weapon used to perform attack in Finland is Incendiary, it was used 9 times
The most common weapon used to perform attack in France is Explosives/Bombs/Dynamite, it was used 2016 times
The most common weapon used to perform attack in French Guiana is Explosives/Bombs/Dynamite, it was used 3 times
The most common weapon used to perform attack in French Polynesia is Incendiary, it was used 2 times
The most common weapon used to perform attack in Gabon is Explosives/Bombs/Dynamite, it was used 2 times
The most common weapon used to perform attack in Gambia is Unknown, it was used 1 times
The most common weapon used to perform attack in Georgia is Explosives/Bombs/Dynamite, it was used 122 times
The most common weapon used to perform attack in Germany is Incendiary, it was used 376 times
The most common weapon used to perform attack in Ghana is Explosives/Bombs/Dynamite, it was used 8 times
The most common weapon used to perform attack in Gibraltar is Explosives/Bombs/Dynamite, it was used 1 times
The most common weapon used to perform attack in Greece is Explosives/Bombs/Dynamite, it was used 691 times
The most common weapon used to perform attack in Grenada is Firearms, it was used 3 times
The most common weapon used to perform attack in Guadeloupe is Explosives/Bombs/Dynamite, it was used 45 times
The most common weapon used to perform attack in Guatemala is Firearms, it was used 1130 times
The most common weapon used to perform attack in Guinea is Firearms, it was used 15 times
The most common weapon used to perform attack in Guinea-Bissau is Firearms, it was used 8 times
The most common weapon used to perform attack in Guyana is Firearms, it was used 11 times
The most common weapon used to perform attack in Haiti is Firearms, it was used 117 times
The most common weapon used to perform attack in Honduras is Explosives/Bombs/Dynamite, it was used 160 times
The most common weapon used to perform attack in Hong Kong is Explosives/Bombs/Dynamite, it was used 9 times
The most common weapon used to perform attack in Hungary is Explosives/Bombs/Dynamite, it was used 31 times
The most common weapon used to perform attack in Iceland is Explosives/Bombs/Dynamite, it was used 2 times
The most common weapon used to perform attack in India is Explosives/Bombs/Dynamite, it was used 4168 times
The most common weapon used to perform attack in Indonesia is Explosives/Bombs/Dynamite, it was used 281 times
The most common weapon used to perform attack in International is Explosives/Bombs/Dynamite, it was used 1 times
The most common weapon used to perform attack in Iran is Explosives/Bombs/Dynamite, it was used 339 times
The most common weapon used to perform attack in Iraq is Explosives/Bombs/Dynamite, it was used 14251 times
The most common weapon used to perform attack in Ireland is Explosives/Bombs/Dynamite, it was used 142 times
The most common weapon used to perform attack in Israel is Explosives/Bombs/Dynamite, it was used 1481 times
The most common weapon used to perform attack in Italy is Explosives/Bombs/Dynamite, it was used 652 times
The most common weapon used to perform attack in Ivory Coast is Firearms, it was used 34 times
The most common weapon used to perform attack in Jamaica is Firearms, it was used 22 times
The most common weapon used to perform attack in Japan is Incendiary, it was used 184 times
The most common weapon used to perform attack in Jordan is Explosives/Bombs/Dynamite, it was used 43 times
The most common weapon used to perform attack in Kazakhstan is Explosives/Bombs/Dynamite, it was used 7 times
The most common weapon used to perform attack in Kenya is Explosives/Bombs/Dynamite, it was used 237 times
The most common weapon used to perform attack in Kosovo is Explosives/Bombs/Dynamite, it was used 109 times
The most common weapon used to perform attack in Kuwait is Explosives/Bombs/Dynamite, it was used 38 times
The most common weapon used to perform attack in Kyrgyzstan is Explosives/Bombs/Dynamite, it was used 14 times
The most common weapon used to perform attack in Laos is Explosives/Bombs/Dynamite, it was used 13 times
The most common weapon used to perform attack in Latvia is Explosives/Bombs/Dynamite, it was used 14 times
The most common weapon used to perform attack in Lebanon is Explosives/Bombs/Dynamite, it was used 1576 times
The most common weapon used to perform attack in Lesotho is Explosives/Bombs/Dynamite, it was used 15 times
The most common weapon used to perform attack in Liberia is Firearms, it was used 18 times
The most common weapon used to perform attack in Libya is Explosives/Bombs/Dynamite, it was used 787 times
The most common weapon used to perform attack in Lithuania is Explosives/Bombs/Dynamite, it was used 6 times
The most common weapon used to perform attack in Luxembourg is Explosives/Bombs/Dynamite, it was used 15 times
The most common weapon used to perform attack in Macau is Explosives/Bombs/Dynamite, it was used 15 times
The most common weapon used to perform attack in Macedonia is Explosives/Bombs/Dynamite, it was used 54 times
The most common weapon used to perform attack in Madagascar is Explosives/Bombs/Dynamite, it was used 11 times
The most common weapon used to perform attack in Malawi is Incendiary, it was used 2 times
The most common weapon used to perform attack in Malaysia is Explosives/Bombs/Dynamite, it was used 36 times
The most common weapon used to perform attack in Maldives is Incendiary, it was used 9 times
The most common weapon used to perform attack in Mali is Explosives/Bombs/Dynamite, it was used 135 times
The most common weapon used to perform attack in Malta is Explosives/Bombs/Dynamite, it was used 12 times
The most common weapon used to perform attack in Martinique is Explosives/Bombs/Dynamite, it was used 9 times
The most common weapon used to perform attack in Mauritania is Firearms, it was used 10 times
The most common weapon used to perform attack in Mauritius is Firearms, it was used 1 times
The most common weapon used to perform attack in Mexico is Firearms, it was used 249 times
The most common weapon used to perform attack in Moldova is Explosives/Bombs/Dynamite, it was used 11 times
The most common weapon used to perform attack in Montenegro is Explosives/Bombs/Dynamite, it was used 4 times
The most common weapon used to perform attack in Morocco is Explosives/Bombs/Dynamite, it was used 17 times
The most common weapon used to perform attack in Mozambique is Firearms, it was used 167 times
The most common weapon used to perform attack in Myanmar is Explosives/Bombs/Dynamite, it was used 191 times
The most common weapon used to perform attack in Namibia is Explosives/Bombs/Dynamite, it was used 84 times
The most common weapon used to perform attack in Nepal is Explosives/Bombs/Dynamite, it was used 456 times
The most common weapon used to perform attack in Netherlands is Explosives/Bombs/Dynamite, it was used 52 times
The most common weapon used to perform attack in New Caledonia is Explosives/Bombs/Dynamite, it was used 13 times
The most common weapon used to perform attack in New Hebrides is Explosives/Bombs/Dynamite, it was used 1 times
The most common weapon used to perform attack in New Zealand is Chemical, it was used 8 times
The most common weapon used to perform attack in Nicaragua is Firearms, it was used 1469 times
The most common weapon used to perform attack in Niger is Firearms, it was used 56 times
The most common weapon used to perform attack in Nigeria is Firearms, it was used 1507 times
The most common weapon used to perform attack in North Korea is Firearms, it was used 1 times
The most common weapon used to perform attack in North Yemen is Explosives/Bombs/Dynamite, it was used 3 times
The most common weapon used to perform attack in Norway is Incendiary, it was used 4 times
The most common weapon used to perform attack in Pakistan is Explosives/Bombs/Dynamite, it was used 7213 times
The most common weapon used to perform attack in Panama is Explosives/Bombs/Dynamite, it was used 73 times
The most common weapon used to perform attack in Papua New Guinea is Firearms, it was used 46 times
The most common weapon used to perform attack in Paraguay is Firearms, it was used 43 times
The most common weapon used to perform attack in People's Republic of the Congo is Unknown, it was used 2 times
The most common weapon used to perform attack in Peru is Explosives/Bombs/Dynamite, it was used 3056 times
The most common weapon used to perform attack in Philippines is Firearms, it was used 2828 times
The most common weapon used to perform attack in Poland is Explosives/Bombs/Dynamite, it was used 23 times
The most common weapon used to perform attack in Portugal is Explosives/Bombs/Dynamite, it was used 108 times
The most common weapon used to perform attack in Qatar is Firearms, it was used 2 times
The most common weapon used to perform attack in Republic of the Congo is Firearms, it was used 14 times
The most common weapon used to perform attack in Rhodesia is Explosives/Bombs/Dynamite, it was used 40 times
The most common weapon used to perform attack in Romania is Explosives/Bombs/Dynamite, it was used 3 times
The most common weapon used to perform attack in Russia is Explosives/Bombs/Dynamite, it was used 1209 times
The most common weapon used to perform attack in Rwanda is Firearms, it was used 72 times
The most common weapon used to perform attack in Saudi Arabia is Explosives/Bombs/Dynamite, it was used 116 times
The most common weapon used to perform attack in Senegal is Firearms, it was used 76 times
The most common weapon used to perform attack in Serbia is Explosives/Bombs/Dynamite, it was used 6 times
The most common weapon used to perform attack in Serbia-Montenegro is Explosives/Bombs/Dynamite, it was used 6 times
The most common weapon used to perform attack in Seychelles is Firearms, it was used 1 times
The most common weapon used to perform attack in Sierra Leone is Firearms, it was used 61 times
The most common weapon used to perform attack in Singapore is Explosives/Bombs/Dynamite, it was used 4 times
The most common weapon used to perform attack in Slovak Republic is Explosives/Bombs/Dynamite, it was used 13 times
The most common weapon used to perform attack in Slovenia is Explosives/Bombs/Dynamite, it was used 4 times
The most common weapon used to perform attack in Solomon Islands is Firearms, it was used 3 times
The most common weapon used to perform attack in Somalia is Explosives/Bombs/Dynamite, it was used 1278 times
The most common weapon used to perform attack in South Africa is Explosives/Bombs/Dynamite, it was used 947 times
The most common weapon used to perform attack in South Korea is Incendiary, it was used 15 times
The most common weapon used to perform attack in South Sudan is Firearms, it was used 53 times
The most common weapon used to perform attack in South Vietnam is Explosives/Bombs/Dynamite, it was used 1 times
The most common weapon used to perform attack in South Yemen is Explosives/Bombs/Dynamite, it was used 1 times
The most common weapon used to perform attack in Soviet Union is Firearms, it was used 41 times
The most common weapon used to perform attack in Spain is Explosives/Bombs/Dynamite, it was used 1935 times
The most common weapon used to perform attack in Sri Lanka is Firearms, it was used 1343 times
The most common weapon used to perform attack in St. Kitts and Nevis is Melee, it was used 1 times
The most common weapon used to perform attack in St. Lucia is Melee, it was used 1 times
The most common weapon used to perform attack in Sudan is Firearms, it was used 389 times
The most common weapon used to perform attack in Suriname is Firearms, it was used 37 times
The most common weapon used to perform attack in Swaziland is Explosives/Bombs/Dynamite, it was used 7 times
The most common weapon used to perform attack in Sweden is Incendiary, it was used 43 times
The most common weapon used to perform attack in Switzerland is Explosives/Bombs/Dynamite, it was used 63 times
The most common weapon used to perform attack in Syria is Explosives/Bombs/Dynamite, it was used 1041 times
The most common weapon used to perform attack in Taiwan is Incendiary, it was used 18 times
The most common weapon used to perform attack in Tajikistan is Firearms, it was used 103 times
The most common weapon used to perform attack in Tanzania is Explosives/Bombs/Dynamite, it was used 23 times
The most common weapon used to perform attack in Thailand is Explosives/Bombs/Dynamite, it was used 1575 times
The most common weapon used to perform attack in Togo is Explosives/Bombs/Dynamite, it was used 24 times
The most common weapon used to perform attack in Trinidad and Tobago is Incendiary, it was used 7 times
The most common weapon used to perform attack in Tunisia is Explosives/Bombs/Dynamite, it was used 42 times
The most common weapon used to perform attack in Turkey is Explosives/Bombs/Dynamite, it was used 1561 times
The most common weapon used to perform attack in Turkmenistan is Firearms, it was used 2 times
The most common weapon used to perform attack in Uganda is Firearms, it was used 169 times
The most common weapon used to perform attack in Ukraine is Explosives/Bombs/Dynamite, it was used 1088 times
The most common weapon used to perform attack in United Arab Emirates is Explosives/Bombs/Dynamite, it was used 13 times
The most common weapon used to perform attack in United Kingdom is Explosives/Bombs/Dynamite, it was used 2137 times
The most common weapon used to perform attack in United States is Explosives/Bombs/Dynamite, it was used 1377 times
The most common weapon used to perform attack in Uruguay is Explosives/Bombs/Dynamite, it was used 24 times
The most common weapon used to perform attack in Uzbekistan is Explosives/Bombs/Dynamite, it was used 14 times
The most common weapon used to perform attack in Vanuatu is Unknown, it was used 2 times
The most common weapon used to perform attack in Vatican City is Firearms, it was used 1 times
The most common weapon used to perform attack in Venezuela is Explosives/Bombs/Dynamite, it was used 91 times
The most common weapon used to perform attack in Vietnam is Chemical, it was used 4 times
The most common weapon used to perform attack in Wallis and Futuna is Incendiary, it was used 1 times
The most common weapon used to perform attack in West Bank and Gaza Strip is Firearms, it was used 684 times
The most common weapon used to perform attack in West Germany (FRG) is Explosives/Bombs/Dynamite, it was used 290 times
The most common weapon used to perform attack in Western Sahara is Unknown, it was used 2 times
The most common weapon used to perform attack in Yemen is Explosives/Bombs/Dynamite, it was used 1162 times
The most common weapon used to perform attack in Yugoslavia is Firearms, it was used 114 times
The most common weapon used to perform attack in Zaire is Unknown, it was used 16 times
The most common weapon used to perform attack in Zambia is Explosives/Bombs/Dynamite, it was used 33 times
The most common weapon used to perform attack in Zimbabwe is Explosives/Bombs/Dynamite, it was used 37 times

Distribution of Weapons


In [17]:
weapons_data = Counter([x['weaptype1_txt'] for x in parsed_data])
for weapon, attacks in weapons_data.most_common():
    print("{a} attacks by {w} - {p:.2f}% of all attacks".format(a=attacks, w=weapon, p=attacks/all_attacks*100))


79126 attacks by Explosives/Bombs/Dynamite - 50.47% of all attacks
51802 attacks by Firearms - 33.04% of all attacks
12388 attacks by Unknown - 7.90% of all attacks
9812 attacks by Incendiary - 6.26% of all attacks
3013 attacks by Melee - 1.92% of all attacks
231 attacks by Chemical - 0.15% of all attacks
123 attacks by Sabotage Equipment - 0.08% of all attacks
104 attacks by Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs) - 0.07% of all attacks
92 attacks by Other - 0.06% of all attacks
35 attacks by Biological - 0.02% of all attacks
33 attacks by Fake Weapons - 0.02% of all attacks
13 attacks by Radiological - 0.01% of all attacks

In [18]:
labels, values= [list(x) for x in zip(*weapons_data.most_common(5))]
others = sum(list(zip(*weapons_data.most_common()))[1])-sum(values)
pyplot.pie(values+[others], labels=labels+["others"])
pyplot.show()


Countries with most attact from 2013 to 2015


In [19]:
three_years_data = [x for x in parsed_data if 2013<=int(x['iyear'])<=2015]
countries_data = Counter([x['country_txt'] for x in three_years_data])
for country, attacks in countries_data.most_common(5):
    print("{a} attacks in {c}".format(a=attacks, c=country))


9517 attacks in Iraq
5595 attacks in Pakistan
5187 attacks in Afghanistan
2436 attacks in India
1965 attacks in Philippines

In [20]:
## Cities with most attact from 2013 to 2015

In [21]:
for country, _ in countries_data.most_common(5):
    city, number= Counter([x['city'] for x in three_years_data if x['country_txt']==country and x['city']!="Unknown"]).most_common()[0]
    print("{n} attacks in {city},{country}".format(n=number,city=city,country=country))


2552 attacks in Baghdad,Iraq
857 attacks in Karachi,Pakistan
226 attacks in Kabul,Afghanistan
102 attacks in Imphal,India
38 attacks in Cotabato City,Philippines

Failed Attack


In [22]:
failed_attacks_by_year = Counter([x['iyear'] for x in parsed_data if int(x["iyear"])>=1985 and not int(x['success'])])
labels, values = zip(*sorted(failed_attacks_by_year.items(), key=lambda x: x[0]))

indexes = numpy.arange(len(labels))
width = 1

pyplot.bar(indexes, values, width)
pyplot.xticks(indexes + width * 0.5, labels, rotation='vertical')
pyplot.show()


Num of Countries Affected by Terrorist Attact


In [23]:
print(len(set([x["country_txt"] for x in parsed_data if x["country_txt"]!="Unknown"])))


206

In [ ]:


In [24]:
import pandas as pd
import plotly.offline as py
py.offline.init_notebook_mode()

terror_data = pd.read_csv('file.csv', encoding='ISO-8859-1',usecols=[0, 1, 2, 3, 8, 11, 13, 14, 35, 84, 100, 103])
terror_data = terror_data.rename(
    columns={'eventid':'id', 'iyear':'year', 'imonth':'month', 'iday':'day',
             'country_txt':'country', 'provstate':'state', 'targtype1_txt':'target',
             'weaptype1_txt':'weapon', 'nkill':'fatalities', 'nwound':'injuries'})

terror_data = terror_data.sort_values(['country'], ascending = False)


countries = list(set(terror_data.country))

country_mean_kills = []
for country in countries:
    country_mean_kills.append(terror_data.fatalities[terror_data.country == country].sum())

#print('Number of people killed per attack by Country\n')
#for i, country in enumerate(countries):
#    print('{}:{}'.format(country, round(country_mean_kills[i],2)))
data = [ dict(
        type = 'choropleth',
        locations = countries,
        z = country_mean_kills,
        locationmode = 'country names',
        text = countries,
        marker = dict(
            line = dict(color = 'rgb(0,0,0)', width = 1)),
            colorbar = dict(autotick = True, tickprefix = '', 
            title = '# Number of\nKills')
            )
       ]

layout = dict(
    title = 'Number of people killed per attack by Country (1970 - 2015)',
    geo = dict(
        showframe = False,
        showocean = True,
        oceancolor = 'rgb(0,255,255)',
        projection = dict(
        type = 'orthographic',
            rotation = dict(
                    lon = 60,
                    lat = 10),
        ),
        lonaxis =  dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
            ),
        lataxis = dict(
                showgrid = True,
                gridcolor = 'rgb(102, 102, 102)'
                )
            ),
        )

fig = dict(data=data, layout=layout)
maps = py.iplot(fig, validate=False, filename='worldmap')



In [25]:
terror_data.head()


Out[25]:
id year month day country state latitude longitude target weapon fatalities injuries
71903 200103040006 2001 3 0 Zimbabwe Matabeleland North -19.860083 28.268742 Private Citizens & Property Firearms 1.0 0.0
30956 198705180003 1987 5 18 Zimbabwe Harare -17.825166 31.033510 Business Firearms 1.0 0.0
12231 198011160002 1980 11 16 Zimbabwe Salisbury -17.860618 30.981896 Business Explosives/Bombs/Dynamite 3.0 22.0
13029 198103160004 1981 3 16 Zimbabwe Masvingo -20.063730 30.827660 Private Citizens & Property Firearms 2.0 0.0
30889 198705090003 1987 5 9 Zimbabwe Midlands -19.698071 29.667207 Private Citizens & Property Firearms 4.0 0.0

In [ ]:


In [26]:
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
#import the data and rename the columns
d = pd.read_csv('file.csv', encoding='ISO-8859-1',low_memory=False)#,  usecols=[0, 1, 2, 3, 8, 11, 13, 14, 26, 29, 35, 37, 84, 100, 103])
 
d = d.rename(columns={'eventid':'id', 'iyear':'year', 'imonth':'month', 'iday':'day', 'country_txt':'country', 'provstate':'state', 'success':'success','targtype1_txt':'target', 'targsubtype1_txt' : 'targetsub','weaptype1_txt':'weapon', 'attacktype1_txt':'attack','nkill':'fatalities', 'nwound':'injuries'})


d = d.drop(['id'],axis=1)


df_num = d.select_dtypes(include=[np.number])
df_inf = df_num.replace([np.inf, -np.inf], np.nan)
df_inf.replace([np.inf, -np.inf], np.nan)
df_filled = df_inf.fillna(0)


//anaconda/lib/python3.6/site-packages/sklearn/cross_validation.py:44: DeprecationWarning:

This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.


In [27]:
df_filled.corr().abs()


Out[27]:
year month day extended region latitude longitude specificity vicinity crit1 ... ransomamt ransomamtus ransompaid ransompaidus hostkidoutcome nreleased INT_LOG INT_IDEO INT_MISC INT_ANY
year 1.000000 0.008121 0.015863 0.088923 0.391142 0.190852 0.551620 0.029520 0.077450 0.000140 ... 0.004866 0.002809 0.009421 0.001193 0.115534 0.085772 0.234008 0.231594 0.077005 0.266873
month 0.008121 1.000000 0.005991 0.001373 0.000373 0.014821 0.002063 0.002434 0.007455 0.001662 ... 0.000859 0.003008 0.000373 0.001449 0.004633 0.006736 0.003909 0.003617 0.003490 0.007883
day 0.015863 0.005991 1.000000 0.005025 0.008863 0.003753 0.012213 0.011166 0.005246 0.011904 ... 0.000873 0.000199 0.000053 0.000071 0.001528 0.002424 0.007560 0.007982 0.002237 0.007191
extended 0.088923 0.001373 0.005025 1.000000 0.032504 0.022614 0.034788 0.054347 0.017177 0.016506 ... 0.026327 0.020092 0.034101 0.019993 0.760736 0.416984 0.050541 0.053485 0.030440 0.064947
region 0.391142 0.000373 0.008863 0.032504 1.000000 0.341680 0.446143 0.089509 0.075372 0.018375 ... 0.000007 0.004293 0.004727 0.003496 0.052814 0.049483 0.153248 0.148840 0.048161 0.113385
latitude 0.190852 0.014821 0.003753 0.022614 0.341680 1.000000 0.290470 0.137347 0.011943 0.017608 ... 0.002458 0.001278 0.005859 0.000295 0.012015 0.006910 0.149012 0.147975 0.100880 0.089649
longitude 0.551620 0.002063 0.012213 0.034788 0.446143 0.290470 1.000000 0.045675 0.061507 0.005672 ... 0.002346 0.000305 0.008360 0.004932 0.053845 0.030419 0.176827 0.174365 0.031278 0.181758
specificity 0.029520 0.002434 0.011166 0.054347 0.089509 0.137347 0.045675 1.000000 0.038286 0.013099 ... 0.000027 0.001428 0.002297 0.002012 0.050565 0.029407 0.079861 0.079318 0.022455 0.066226
vicinity 0.077450 0.007455 0.005246 0.017177 0.075372 0.011943 0.061507 0.038286 1.000000 0.003777 ... 0.000208 0.000975 0.001191 0.001106 0.025640 0.013580 0.003202 0.002500 0.011080 0.002772
crit1 0.000140 0.001662 0.011904 0.016506 0.018375 0.017608 0.005672 0.013099 0.003777 1.000000 ... 0.000452 0.000402 0.004163 0.011869 0.018673 0.009182 0.087052 0.086884 0.018515 0.078969
crit2 0.024544 0.001135 0.003392 0.000896 0.015700 0.005216 0.008746 0.000715 0.000708 0.009326 ... 0.000776 0.000359 0.000891 0.000388 0.000224 0.003106 0.027124 0.027153 0.013349 0.031262
crit3 0.008009 0.000228 0.003962 0.046728 0.012154 0.007551 0.033621 0.077115 0.049932 0.039575 ... 0.003261 0.001522 0.003780 0.001646 0.051276 0.018599 0.106573 0.108488 0.039076 0.102216
doubtterr 0.278671 0.012036 0.000683 0.008816 0.071667 0.067572 0.107327 0.042614 0.034684 0.059548 ... 0.001741 0.001040 0.010182 0.003543 0.026763 0.035064 0.226359 0.227271 0.024566 0.160753
alternative 0.010062 0.008963 0.003736 0.011827 0.008284 0.010654 0.003293 0.043292 0.033685 0.358482 ... 0.002658 0.001319 0.001699 0.003177 0.007875 0.004679 0.018122 0.019438 0.024553 0.028520
multiple 0.091481 0.003781 0.003045 0.020751 0.000797 0.024524 0.009326 0.040043 0.010752 0.028852 ... 0.002674 0.000154 0.003435 0.001761 0.019006 0.009045 0.082953 0.081887 0.028053 0.048626
success 0.049852 0.005430 0.005642 0.065440 0.020784 0.071122 0.034576 0.028813 0.002230 0.009134 ... 0.002744 0.001365 0.003390 0.001477 0.066515 0.031700 0.059611 0.060016 0.017657 0.035563
suicide 0.134543 0.007571 0.002440 0.033063 0.100482 0.071304 0.075871 0.028427 0.008907 0.017525 ... 0.001603 0.000741 0.001839 0.000801 0.028873 0.013808 0.000401 0.001115 0.000310 0.004842
attacktype1 0.048197 0.009466 0.009588 0.268167 0.004438 0.000973 0.004777 0.049253 0.003429 0.026369 ... 0.012002 0.004978 0.014897 0.006819 0.280931 0.132985 0.038970 0.039385 0.013573 0.055426
attacktype2 0.105884 0.013860 0.000482 0.175655 0.057830 0.016067 0.050177 0.021636 0.026606 0.002358 ... 0.000885 0.000433 0.000842 0.000868 0.257306 0.152876 0.051149 0.051300 0.002641 0.042722
attacktype3 0.029250 0.006128 0.002127 0.088750 0.017054 0.009202 0.012075 0.005345 0.004330 0.002661 ... 0.000363 0.000169 0.000414 0.000183 0.131763 0.086354 0.024509 0.024309 0.000928 0.021171
targtype1 0.068837 0.001061 0.000071 0.007149 0.029113 0.040470 0.032706 0.048390 0.028187 0.049232 ... 0.003311 0.000747 0.006499 0.000653 0.021289 0.013729 0.049931 0.053241 0.050825 0.052555
targsubtype1 0.039052 0.003077 0.002173 0.013024 0.023109 0.059643 0.012828 0.037166 0.034601 0.042380 ... 0.003946 0.003417 0.008214 0.000156 0.027691 0.016618 0.016301 0.018546 0.019079 0.028396
natlty1 0.026828 0.010093 0.006083 0.012766 0.141940 0.027975 0.082014 0.037290 0.008944 0.004841 ... 0.000090 0.002099 0.002258 0.004695 0.008192 0.001172 0.039469 0.051951 0.250859 0.149717
targtype2 0.131608 0.001332 0.001121 0.046176 0.068359 0.024531 0.077401 0.003814 0.017589 0.010523 ... 0.000460 0.002664 0.000864 0.000925 0.050056 0.022499 0.000899 0.002112 0.032427 0.013805
targsubtype2 0.141823 0.000700 0.000166 0.045583 0.071015 0.025260 0.083296 0.002475 0.018612 0.010597 ... 0.000364 0.002962 0.000764 0.000917 0.050966 0.025795 0.000487 0.003543 0.028684 0.010981
natlty2 0.094075 0.003511 0.006359 0.050173 0.067515 0.005419 0.059406 0.005758 0.015063 0.008976 ... 0.000881 0.006725 0.000615 0.000721 0.055578 0.003906 0.011143 0.016218 0.062028 0.041291
targtype3 0.031974 0.001095 0.005552 0.043915 0.021120 0.004935 0.020405 0.003499 0.002070 0.001868 ... 0.000557 0.000278 0.000623 0.000379 0.043133 0.026721 0.008548 0.010624 0.026705 0.020515
targsubtype3 0.033286 0.000431 0.005460 0.042423 0.019625 0.003834 0.022922 0.004463 0.001971 0.001656 ... 0.000562 0.000279 0.000565 0.000383 0.042621 0.026732 0.009012 0.011243 0.026940 0.020517
natlty3 0.022003 0.000764 0.005114 0.032254 0.020326 0.008618 0.015119 0.007339 0.001680 0.001790 ... 0.000409 0.000215 0.000168 0.000315 0.030252 0.008431 0.010948 0.013531 0.032485 0.025934
ingroup 0.320863 0.005308 0.006729 0.096260 0.232674 0.049644 0.113153 0.023931 0.036175 0.029881 ... 0.004035 0.009919 0.002502 0.001913 0.114285 0.059786 0.222771 0.224276 0.004328 0.184116
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
weapsubtype2 0.041235 0.008456 0.001129 0.040934 0.028952 0.030617 0.020412 0.012882 0.014835 0.008521 ... 0.000911 0.000146 0.001967 0.000974 0.060245 0.020344 0.050152 0.050817 0.005055 0.045688
weaptype3 0.021383 0.012011 0.000461 0.016506 0.016145 0.012881 0.003727 0.004726 0.003453 0.001467 ... 0.005415 0.000383 0.000952 0.000415 0.034153 0.005529 0.039589 0.040118 0.000486 0.033353
weapsubtype3 0.023137 0.018144 0.004148 0.017842 0.020271 0.013984 0.005722 0.009854 0.003341 0.000455 ... 0.001241 0.000311 0.000772 0.000336 0.033596 0.006027 0.030917 0.031067 0.001077 0.027053
weaptype4 0.026314 0.001161 0.003663 0.005033 0.012800 0.010610 0.015412 0.001929 0.000973 0.002297 ... 0.019441 0.000088 0.000219 0.000096 0.000969 0.013351 0.014272 0.014471 0.004680 0.014677
weapsubtype4 0.020089 0.002580 0.004290 0.007407 0.011057 0.010005 0.009639 0.002039 0.000950 0.001851 ... 0.044358 0.000071 0.000177 0.000077 0.000945 0.017567 0.008273 0.008203 0.002254 0.008584
fatalities 0.020582 0.004887 0.003702 0.008628 0.040020 0.022074 0.007422 0.023813 0.017005 0.009723 ... 0.000176 0.000487 0.001637 0.000914 0.019807 0.016887 0.051475 0.052194 0.016059 0.036704
nkillus 0.000737 0.003311 0.001591 0.000841 0.008123 0.004987 0.007153 0.001596 0.000492 0.000141 ... 0.000003 0.000499 0.000070 0.000031 0.012184 0.000614 0.004279 0.005174 0.002308 0.006294
nkillter 0.063656 0.004715 0.002365 0.010310 0.036688 0.008426 0.030284 0.018425 0.006554 0.006021 ... 0.000583 0.000083 0.000807 0.000365 0.004651 0.004397 0.035726 0.036340 0.003131 0.029032
injuries 0.034701 0.002650 0.001365 0.016052 0.043732 0.021858 0.044755 0.022523 0.000537 0.009835 ... 0.001050 0.000516 0.001305 0.000593 0.017584 0.020048 0.004293 0.004531 0.007593 0.003505
nwoundus 0.005169 0.001144 0.001514 0.000881 0.010652 0.006609 0.011496 0.003327 0.000293 0.003063 ... 0.000092 0.000042 0.000105 0.000046 0.000954 0.000286 0.001433 0.002151 0.004116 0.003955
nwoundte 0.040244 0.003617 0.001443 0.001808 0.005013 0.016538 0.025963 0.024623 0.003807 0.003389 ... 0.000282 0.000189 0.000468 0.000204 0.001665 0.001535 0.020744 0.020630 0.003129 0.016766
property 0.295310 0.000212 0.005560 0.012908 0.181102 0.093652 0.179372 0.023172 0.028184 0.004276 ... 0.000499 0.001710 0.000475 0.000697 0.022767 0.012312 0.088721 0.087461 0.022138 0.102700
propextent 0.057919 0.028321 0.015618 0.072114 0.045623 0.031011 0.020877 0.004849 0.012509 0.026607 ... 0.005701 0.000456 0.006676 0.003365 0.066760 0.015911 0.024713 0.024673 0.006932 0.003488
propvalue 0.004487 0.001974 0.000536 0.001157 0.002043 0.004022 0.004652 0.002196 0.001019 0.000677 ... 0.000058 0.000027 0.000067 0.000030 0.001272 0.000583 0.004540 0.003839 0.005488 0.003782
ishostkid 0.009906 0.009100 0.005963 0.408266 0.023761 0.028507 0.015895 0.020603 0.004264 0.017480 ... 0.020081 0.009388 0.022618 0.010152 0.490323 0.221251 0.034073 0.035520 0.018361 0.044961
nhostkid 0.001653 0.002580 0.004819 0.023050 0.000699 0.003131 0.002106 0.000786 0.000117 0.002517 ... 0.005204 0.000253 0.000794 0.000067 0.018409 0.064014 0.003435 0.003482 0.000855 0.002832
nhostkidus 0.006123 0.002373 0.000399 0.039290 0.008104 0.003929 0.000772 0.004777 0.012532 0.007237 ... 0.001067 0.000220 0.000526 0.003824 0.068895 0.001255 0.002733 0.002945 0.009035 0.008122
nhours 0.060257 0.005547 0.005755 0.036495 0.026465 0.004841 0.039544 0.012272 0.010268 0.006187 ... 0.000253 0.000174 0.002723 0.000660 0.219819 0.062667 0.009818 0.009772 0.002547 0.007897
ndays 0.053624 0.001037 0.002500 0.210590 0.032119 0.003699 0.024572 0.013953 0.004577 0.000467 ... 0.024108 0.008804 0.020714 0.008776 0.394718 0.420592 0.003250 0.004818 0.022492 0.009097
ransom 0.065525 0.051048 0.001426 0.185989 0.017971 0.005235 0.047758 0.002178 0.029583 0.018195 ... 0.011663 0.005388 0.013380 0.004106 0.262342 0.181924 0.013358 0.012924 0.003421 0.009800
ransomamt 0.004866 0.000859 0.000873 0.026327 0.000007 0.002458 0.002346 0.000027 0.000208 0.000452 ... 1.000000 0.122256 0.103955 0.000269 0.023775 0.008683 0.004348 0.004726 0.004753 0.006247
ransomamtus 0.002809 0.003008 0.000199 0.020092 0.004293 0.001278 0.000305 0.001428 0.000975 0.000402 ... 0.122256 1.000000 0.002643 0.002352 0.019073 0.000727 0.002739 0.003382 0.006182 0.004533
ransompaid 0.009421 0.000373 0.000053 0.034101 0.004727 0.005859 0.008360 0.002297 0.001191 0.004163 ... 0.103955 0.002643 1.000000 0.000642 0.017097 0.002590 0.001040 0.000773 0.002454 0.000281
ransompaidus 0.001193 0.001449 0.000071 0.019993 0.003496 0.000295 0.004932 0.002012 0.001106 0.011869 ... 0.000269 0.002352 0.000642 1.000000 0.004933 0.000982 0.000706 0.000358 0.006855 0.005072
hostkidoutcome 0.115534 0.004633 0.001528 0.760736 0.052814 0.012015 0.053845 0.050565 0.025640 0.018673 ... 0.023775 0.019073 0.017097 0.004933 1.000000 0.598125 0.042720 0.044610 0.015317 0.049671
nreleased 0.085772 0.006736 0.002424 0.416984 0.049483 0.006910 0.030419 0.029407 0.013580 0.009182 ... 0.008683 0.000727 0.002590 0.000982 0.598125 1.000000 0.010178 0.010220 0.007232 0.007415
INT_LOG 0.234008 0.003909 0.007560 0.050541 0.153248 0.149012 0.176827 0.079861 0.003202 0.087052 ... 0.004348 0.002739 0.001040 0.000706 0.042720 0.010178 1.000000 0.996571 0.060332 0.880709
INT_IDEO 0.231594 0.003617 0.007982 0.053485 0.148840 0.147975 0.174365 0.079318 0.002500 0.086884 ... 0.004726 0.003382 0.000773 0.000358 0.044610 0.010220 0.996571 1.000000 0.091938 0.883116
INT_MISC 0.077005 0.003490 0.002237 0.030440 0.048161 0.100880 0.031278 0.022455 0.011080 0.018515 ... 0.004753 0.006182 0.002454 0.006855 0.015317 0.007232 0.060332 0.091938 1.000000 0.266785
INT_ANY 0.266873 0.007883 0.007191 0.064947 0.113385 0.089649 0.181758 0.066226 0.002772 0.078969 ... 0.006247 0.004533 0.000281 0.005072 0.049671 0.007415 0.880709 0.883116 0.266785 1.000000

77 rows × 77 columns


In [28]:
def correlation_matrix(df):
    from matplotlib import pyplot as plt
    from matplotlib import cm as cm

    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    cmap = cm.get_cmap('jet', 30)
    cax = ax1.imshow(df.corr(), interpolation="nearest", cmap=cmap)
    ax1.grid(True)
    plt.title('Abalone Feature Correlation')
    labels=['Sex','Length','Diam','Height','Whole','Shucked','Viscera','Shell','Rings',]
    ax1.set_xticklabels(labels,fontsize=6)
    ax1.set_yticklabels(labels,fontsize=6)
    # Add colorbar, make sure to specify tick locations to match desired ticklabels
    fig.colorbar(cax, ticks=[.75,.8,.85,.90,.95,1])
    plt.show()
    
correlation_matrix(df_filled)



In [29]:
from numpy import float32
df_filled.head()
df_transformed = df_filled.astype(float32)
df_transformed.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156772 entries, 0 to 156771
Data columns (total 77 columns):
year              156772 non-null float32
month             156772 non-null float32
day               156772 non-null float32
extended          156772 non-null float32
region            156772 non-null float32
latitude          156772 non-null float32
longitude         156772 non-null float32
specificity       156772 non-null float32
vicinity          156772 non-null float32
crit1             156772 non-null float32
crit2             156772 non-null float32
crit3             156772 non-null float32
doubtterr         156772 non-null float32
alternative       156772 non-null float32
multiple          156772 non-null float32
success           156772 non-null float32
suicide           156772 non-null float32
attacktype1       156772 non-null float32
attacktype2       156772 non-null float32
attacktype3       156772 non-null float32
targtype1         156772 non-null float32
targsubtype1      156772 non-null float32
natlty1           156772 non-null float32
targtype2         156772 non-null float32
targsubtype2      156772 non-null float32
natlty2           156772 non-null float32
targtype3         156772 non-null float32
targsubtype3      156772 non-null float32
natlty3           156772 non-null float32
ingroup           156772 non-null float32
ingroup2          156772 non-null float32
ingroup3          156772 non-null float32
guncertain1       156772 non-null float32
guncertain2       156772 non-null float32
guncertain3       156772 non-null float32
nperps            156772 non-null float32
nperpcap          156772 non-null float32
claimed           156772 non-null float32
claimmode         156772 non-null float32
claim2            156772 non-null float32
claimmode2        156772 non-null float32
claim3            156772 non-null float32
claimmode3        156772 non-null float32
compclaim         156772 non-null float32
weaptype1         156772 non-null float32
weapsubtype1      156772 non-null float32
weaptype2         156772 non-null float32
weapsubtype2      156772 non-null float32
weaptype3         156772 non-null float32
weapsubtype3      156772 non-null float32
weaptype4         156772 non-null float32
weapsubtype4      156772 non-null float32
fatalities        156772 non-null float32
nkillus           156772 non-null float32
nkillter          156772 non-null float32
injuries          156772 non-null float32
nwoundus          156772 non-null float32
nwoundte          156772 non-null float32
property          156772 non-null float32
propextent        156772 non-null float32
propvalue         156772 non-null float32
ishostkid         156772 non-null float32
nhostkid          156772 non-null float32
nhostkidus        156772 non-null float32
nhours            156772 non-null float32
ndays             156772 non-null float32
ransom            156772 non-null float32
ransomamt         156772 non-null float32
ransomamtus       156772 non-null float32
ransompaid        156772 non-null float32
ransompaidus      156772 non-null float32
hostkidoutcome    156772 non-null float32
nreleased         156772 non-null float32
INT_LOG           156772 non-null float32
INT_IDEO          156772 non-null float32
INT_MISC          156772 non-null float32
INT_ANY           156772 non-null float32
dtypes: float32(77)
memory usage: 46.0 MB

Linear Regression


In [30]:
lm = LinearRegression()

y = d['success']
X = df_filled[['month', 'day','region','property','propextent','attacktype1','weaptype1','nperps','specificity' ]]
X_train, X_test,y_train, y_test = train_test_split(X,y,random_state=2)

lm.fit(X_train, y_train)
r = lm.score(X_train, y_train)
print (r)
pred_train = lm.predict(X_train)
pred_test = lm.predict(X_test)
print(lm.coef_)


0.0372317320982
[ -1.73923957e-04  -1.68483111e-05  -1.83871397e-03  -7.29810486e-03
   3.15071149e-02   1.20843515e-02  -4.14460997e-03   5.30998137e-05
   7.99300431e-03]

In [31]:
mean_squared_error(pred_test,y_test)


Out[31]:
0.084661659437367143

In [32]:
plt.scatter(lm.predict(X_train), lm.predict(X_train) - y_train, c='b', s=40, alpha=0.5)
plt.scatter(lm.predict(X_test), lm.predict(X_test) - y_test, c='g', s=40)
#plt.hlines(y = 0, xmin = -400000000, xmax = 1400000000)


plt.title('Residual Plot using training (blue) and test (green) data')
plt.ylabel('Residuals')
plt.show()



In [33]:
import statsmodels.api as sm
fig = sm.qqplot(lm.predict(X_test) - y_test)
plt.show()



In [ ]:


In [ ]:


In [ ]: