In [1]:
import pandas as pd

In [4]:
df = pd.read_csv('suicide_history.csv')

In [5]:
df.head()


Out[5]:
Date Age Gender Profession Methodology Reason Time District Source
0 2010-01-05 19 Female Housewife Hanging Family feud 13:00:00 Dhaka http://archive.thedailystar.net/newDesign/news...
1 2010-01-05 22 Male Police Shooting Family feud 00:10:00 Dinajpur http://archive.thedailystar.net/newDesign/news...
2 2010-01-07 45 Male Police Shooting Family feud 21:15:00 Bogra http://archive.thedailystar.net/newDesign/news...
3 2010-01-11 9 Male Student Hanging - 02:00:00 Sirajganj http://archive.thedailystar.net/newDesign/news...
4 2010-01-12 17 Female Student Hanging - 13:30:00 Dhaka http://archive.thedailystar.net/newDesign/news...

In [6]:
df.columns


Out[6]:
Index([u'Date', u'Age', u'Gender', u'Profession', u'Methodology', u'Reason',
       u'Time', u'District', u'Source'],
      dtype='object')

In [ ]:


In [48]:
df.head()


Out[48]:
Date Age Gender Profession Methodology Reason Time District Source
0 2010-01-05 19 female housewife hanging family feud 13:00:00 dhaka http://archive.thedailystar.net/newDesign/news...
1 2010-01-05 22 male police shooting family feud 00:10:00 dinajpur http://archive.thedailystar.net/newDesign/news...
2 2010-01-07 45 male police shooting family feud 21:15:00 bogra http://archive.thedailystar.net/newDesign/news...
3 2010-01-11 9 male student hanging - 02:00:00 sirajganj http://archive.thedailystar.net/newDesign/news...
4 2010-01-12 17 female student hanging - 13:30:00 dhaka http://archive.thedailystar.net/newDesign/news...

In [49]:
df['District'].value_counts()['dhaka']


Out[49]:
336

In [61]:
# Overall death methodologies
unique_methods = list(set(df['Methodology'].str.lower()))
print df['Methodology'].value_counts()


hanging                       634
poison                        233
jumped in front of train       16
jumped                         13
not given                      12
setting fire                   12
shooting                       10
-                               8
jumping off river               6
slitting throat                 5
jumping off                     5
fire                            5
jumping off building            5
jumped under running train      4
train                           2
shot                            2
set fire                        1
bus                             1
strangulating                   1
posion                          1
not mentioned                   1
burning                         1
stabbing himself                1
Name: Methodology, dtype: int64

In [101]:
# Methodologies by gender
male_suicide_desc = df[df['Gender'] == 'male']['Methodology'].value_counts()
print sum(male_suicide_desc)


331

In [96]:
female_suicide_desc = df[df['Gender'] == 'female']['Methodology'].value_counts()

In [92]:
with open('male_suicide.txt', 'w') as f:
    f.write('[')
    for method in unique_methods:
        try:
            data_to_write = "{\"gender\" : \"male\", \"method\": \"%s\" , \"count\": %s}" % (method, male_suicide_desc[method])
            f.write(data_to_write + ',\n')
        except:
            print "ERROR"
    f.write(']')
f.close()


ERROR
ERROR
ERROR
ERROR
ERROR

In [54]:
df_onlineshop = pd.read_csv('Online_shoping_survey_data.csv')
df_onlineshop['Respondents Gender'].str.lower()
df_onlineshop['Respondents Professional Status'].str.lower()


Out[54]:
0                   4
1                   2
2                   3
3                   2
4                   6
5                   2
6                   1
7                   6
8                   2
9                   2
10                  3
11                  2
12                  2
13                  3
14                  2
15                  6
16                  7
17                  6
18                  6
19                  6
20                  6
21                  2
22                  6
23                  2
24                  2
25                  5
26                  2
27                  6
28                  7
29                  2
            ...      
496                 6
497                 2
498                 1
499                 2
500                 6
501                 1
502                 1
503                 2
504                 6
505                 6
506                 6
507                 7
508                 6
509                 6
510                 2
511                 6
512                 6
513                 7
514               NaN
515               map
516      bussinessman
517    service holder
518        home maker
519     self employed
520           teacher
521           student
522            others
523               map
524           married
525         unmarried
Name: Respondents Professional Status, dtype: object

In [95]:
for method in unique_methods:
    try:
        print "Method: {} Count: {}".format(method, male_suicide_desc[method])
    except:
        print "Method: {} Count: {} ".format(method, 0)


Method: jumping off building Count: 4
Method: stabbing himself Count: 1
Method: shot Count: 1
Method: burning Count: 0 
Method: jumping off river Count: 3
Method: slitting throat Count: 5
Method: strangulating Count: 1
Method: hanging Count: 203
Method: shooting Count: 10
Method: jumped in front of train Count: 7
Method: jumping off Count: 4
Method: jumped Count: 4
Method: - Count: 5
Method: poison Count: 76
Method: bus Count: 0 
Method: fire Count: 1
Method: jumped under running train Count: 1
Method: train Count: 2
Method: set fire Count: 0 
Method: posion Count: 0 
Method: not mentioned Count: 0 
Method: setting fire Count: 2
Method: not given Count: 1

In [97]:
for method in unique_methods:
    try:
        print "Method: {} Count: {}".format(method, female_suicide_desc[method])
    except:
        print "Method: {} Count: {} ".format(method, 0)


Method: jumping off building Count: 1
Method: stabbing himself Count: 0 
Method: shot Count: 1
Method: burning Count: 1
Method: jumping off river Count: 3
Method: slitting throat Count: 0 
Method: strangulating Count: 0 
Method: hanging Count: 431
Method: shooting Count: 0 
Method: jumped in front of train Count: 9
Method: jumping off Count: 1
Method: jumped Count: 9
Method: - Count: 3
Method: poison Count: 157
Method: bus Count: 1
Method: fire Count: 4
Method: jumped under running train Count: 3
Method: train Count: 0 
Method: set fire Count: 1
Method: posion Count: 1
Method: not mentioned Count: 1
Method: setting fire Count: 10
Method: not given Count: 11

In [103]:
print sum(male_suicide_desc)


331

In [110]:
with open('male_suicide_data.txt', 'w') as f:
    for method in unique_methods:
        try:
            data_to_write = "{group: \"Male\", category: \"%s\", measure: %s}" % (method, male_suicide_desc[method])
        except:
            print "EXCEPTION"
            data_to_write = "{group: \"Male\", category: \"%s\", measure: %s}" % (method, "0")
        f.write(data_to_write + ',\n')
f.close()


EXCEPTION
EXCEPTION
EXCEPTION
EXCEPTION
EXCEPTION

In [121]:
df_onlineshop.keys()
for key in df_onlineshop.keys():
    try:
        df_onlineshop[key] = df_onlineshop[key].str.lower()
    except:
        print "ERROR"


ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR

In [123]:
respondents_professional_status = {
    "1": "businessman",
    "2": "service Holder",
    "3": "home Maker",
    "4": "Self Employed",
    "5": "Teacher",
    "6": "Student",
    "7": "Others"
}

In [124]:
respondents_marital_status = {
    "1": "Married",
    "2": "Unmarried",
    "3": None
}

In [125]:
purpose_of_using_internet = {
    1: "Shopping",
    2: "Work",
    3: "Education",
    4: "Social Media",
    5: "Phone calls or distant messaging",
    6: "Others",
    7: None
}

In [126]:
df_onlineshop['Respondents Professional Status'].replace(respondents_professional_status, inplace=True)
df_onlineshop['Respondent\'s Marital Status'].replace(respondents_marital_status, inplace=True)
df_onlineshop['Main Purpose of using internet'].replace(purpose_of_using_internet, inplace=True)

In [127]:
df_onlineshop.keys()
for key in df_onlineshop.keys():
    try:
        df_onlineshop[key] = df_onlineshop[key].str.lower()
    except:
        print "ERROR"


ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR
ERROR

In [128]:
df_onlineshop.head()


Out[128]:
Respondents ID Respondents Gender Respondents living area Age of the respondent Respondent's Marital Status Respondents Professional Status Educational qualification Family members Respondents Income Respondents Expenditure ... OS reduces monetary cost traditional stores are better enjoy Online shopping Do not purchase online if there is no moneyback gurantee Delivery service is not satisfactory long time requires for getting a product satisfeid with the return policy Overall Satisfaction Opinion about the number of online stores Main Purpose of using internet
0 1.0 male dhaka south city corporation 29.0 married self employed graduate 3.0 12000.0 6000.0 ... indifferent indifferent agree indifferent disagree disagree indifferent somewhat satisfied adequate social media
1 2.0 male dhaka north city corporation 32.0 married service holder graduate 5.0 65000.0 50000.0 ... disagree indifferent agree indifferent disagree disagree indifferent somewhat satisfied too much social media
2 3.0 female dhaka south city corporation 35.0 married home maker secondary 4.0 18000.0 15000.0 ... disagree indifferent indifferent strongly agree disagree disagree indifferent indifferent too much shopping
3 4.0 female dhaka north city corporation 24.0 married service holder graduate 5.0 15000.0 5000.0 ... disagree indifferent indifferent indifferent disagree disagree indifferent somewhat dissatisfied adequate social media
4 5.0 male dhaka south city corporation 24.0 unmarried student undergraduate 6.0 6000.0 6000.0 ... disagree indifferent agree indifferent disagree strongly disagree indifferent somewhat satisfied adequate social media

5 rows × 68 columns


In [129]:
df_onlineshop.to_csv('output_shopping.csv')

In [1]:
shopping_data = 'malevsfemale.tsv'

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv(shopping_data, sep='\t')

In [6]:
df_male = df[df['Respondents Gender'] == 'male']

In [7]:
df_female = df[df['Respondents Gender'] == 'female']

In [13]:
df_male.head(15)


Out[13]:
Respondents Gender Mostly visited online shop count_sum
0 male kaymu.com.bd 30.0
2 male bikroy.com 49.0
3 male facebook pages 44.0
7 male amazon/ebay 21.0
8 male olx.com.bd 6.0
10 male ajkerdeal.com 15.0
11 male null 84.0
12 male ekhanei.com 18.0
13 male shohoz.com 22.0
14 male others 24.0
16 male alibaba.com 5.0
18 male chaldal.com 4.0
20 male daraz.com.bd 16.0
24 male clickbd.com 1.0

In [15]:
df_female.head(df_female.shape[0])


Out[15]:
Respondents Gender Mostly visited online shop count_sum
1 female bikroy.com 14.0
4 female kaymu.com.bd 8.0
5 female null 52.0
6 female facebook pages 68.0
9 female ekhanei.com 8.0
15 female amazon/ebay 4.0
17 female chaldal.com 1.0
19 female others 5.0
21 female ajkerdeal.com 5.0
22 female daraz.com.bd 7.0
23 female clickbd.com 1.0
25 female olx.com.bd 1.0
26 female alibaba.com 1.0

In [ ]: