notebook.community

Edit and run



In [1]:

    
#importing pandas package
import pandas as pd
path_to_projectData = 'Data/' #input path where the data of the project resides
bot_data = pd.read_csv(path_to_projectData + 'bots_data.csv',encoding='latin-1') #reading the bots_data.csv file
bot_data.head(10)
nonbots_data = pd.read_csv(path_to_projectData + 'nonbots_data.csv',encoding="latin-1") #reading the nonbots_data.csv file
nonbots_data.head(10)

complete_data= bot_data.append(nonbots_data) # Combining both the bot and non-bots data set
complete_data.head(10)

#SELECTING TOP 10 ROWS FROM THE COMBINED DATASET









    Out[1]:






  
    
      
      id
      id_str
      screen_name
      location
      description
      url
      followers_count
      friends_count
      listedcount
      created_at
      favourites_count
      verified
      statuses_count
      lang
      status
      default_profile
      default_profile_image
      has_extended_profile
      name
      bot
    
  
  
    
      0
      3.982732e+09
      3982731976
      mcgucket_bot
      NaN
      A bot that tweets every line said by Fiddlefor...
      NaN
      1129
      7
      2
      Sat Oct 17 22:35:31 +0000 2015
      0
      False
      23557
      en
      {'retweeted': False, 'is_quote_status': False,...
      False
      False
      False
      McGucket bot
      1
    
    
      1
      8.410000e+17
      8.41E+17
      BowieK66
      NaN
      NaN
      NaN
      0
      22
      0
      13/03/2017 22:21
      0
      False
      1
      en
      Status(_api=<tweepy.api.API object at 0x101927...
      True
      True
      False
      MJ Kuhn
      1
    
    
      2
      2.768375e+09
      2768374981
      ducknoteprice
      NaN
      NaN
      NaN
      3
      0
      3
      Mon Aug 25 23:17:59 +0000 2014
      0
      False
      1050
      en
      {'created_at': 'Fri Feb 26 17:01:15 +0000 2016...
      True
      False
      False
      duckNote
      1
    
    
      3
      3.304189e+09
      3304189373
      robotrecipes
      robot kitchen
      tasty recipes for robot // not for human // a ...
      http://t.co/PdagJGqVMR
      505
      13
      49
      Sat May 30 23:42:16 +0000 2015
      0
      False
      5109
      en
      {u'contributors': None, u'truncated': False, u...
      False
      False
      False
      robot recipes
      1
    
    
      4
      3.224289e+09
      3224289024
      everyumlaut
      NaN
      bot by @dbaker_h
      NaN
      15
      0
      11
      Sat May 23 14:39:21 +0000 2015
      0
      False
      31365
      en-gb
      {u'contributors': None, u'truncated': False, u...
      True
      False
      False
      ÌÇvÌÇrÌÀ Ì_mlÌ_Ì_¼Ñ
      1
    
    
      5
      7.300000e+17
      7.30E+17
      glossatory
      Australah
      SOCIAL ON GLOSSATORY: the branch of the consid...
      https://t.co/hcUNcUdlcq
      16
      1
      4
      5/11/2016 5:45
      0
      False
      1832
      en
      Status(in_reply_to_user_id=None, favorited=Fal...
      False
      False
      False
      GLOSSATORY
      1
    
    
      6
      8.200000e+17
      8.20E+17
      Fancypants6047
      vancouver
      NaN
      NaN
      41
      394
      0
      13/01/2017 19:37
      3
      False
      4
      en
      Status(_api=<tweepy.api.API object at 0x101927...
      True
      False
      True
      Fancypants
      1
    
    
      7
      2.602217e+09
      2602217174
      Hedgehogize
      Moebius
      @Hedgehogize me or @Hedgehogize NAME to unlock...
      http://t.co/a1UvEiAl3u
      549
      370
      12
      Thu Jul 03 20:37:48 +0000 2014
      12
      False
      4658
      en
      {u'contributors': None, u'truncated': False, u...
      False
      False
      False
      YOU the Hedgehog
      1
    
    
      8
      8.330000e+17
      8.33E+17
      jamieph93986621
      NaN
      I wasn't bor ysterday,  was born tomorrow
      NaN
      0
      60
      0
      Sun Feb 19 03:47:42 +0000 2017
      0
      False
      10
      en
      {"created_at": "Mon Mar 13 02:55:10 +0000 2017...
      True
      False
      False
      Jamie Phillips
      1
    
    
      9
      8.250000e+17
      8.25E+17
      NothemDonella
      NaN
      Your diac No
      NaN
      0
      43
      4
      Fri Jan 27 15:51:34 +0000 2017
      48
      False
      86
      en
      {"created_at": "Mon Mar 13 02:52:13 +0000 2017...
      True
      False
      False
      Donella Nothem
      1



In [2]:

    
#import train_test_split,Imputer from sklearn

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer

# selecting attributes which have numeric values and categorical values
X= complete_data.loc[:,['followers_count','friends_count','listedcount','favourites_count','verified','statuses_count',
              'default_profile','default_profile_image','has_extended_profile']]

X = Imputer().fit_transform(X) # filling the NaN and infinite values

Y= complete_data['bot']

#Splitting the dataset into test and train data
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42) 


#Importing the DecisionTreeClassifier and predicting whether the profile is a bot or not using it
from sklearn.tree import DecisionTreeClassifier
dtree= DecisionTreeClassifier()
dtree.fit(X_train,Y_train)
dtree_predictions=dtree.predict(X_test)

#Importing the classification_report,confusion_matrix from sklearn.metrics package
from sklearn.metrics import classification_report,confusion_matrix

#Printing the Confusion Matrix and Classification Report for DecisionTreeClassifier
print(confusion_matrix(Y_test,dtree_predictions))
print('\n')
print(classification_report(Y_test,dtree_predictions))









    



[[197  25]
 [ 46 179]]


             precision    recall  f1-score   support

          0       0.81      0.89      0.85       222
          1       0.88      0.80      0.83       225

avg / total       0.84      0.84      0.84       447



In [3]:

    
#Importing the RandomForestClassifier and predicting whether the profile is a bot or not using it
from sklearn.ensemble import RandomForestClassifier
rfc= RandomForestClassifier(n_estimators=200)
rfc.fit(X_train,Y_train)
rfc_predictions=rfc.predict(X_test)

#Printing the Confusion Matrix and Classification Report for RandomForestClassifier
print(confusion_matrix(Y_test,rfc_predictions))
print('\n')
print(classification_report(Y_test,rfc_predictions))









    



[[209  13]
 [ 42 183]]


             precision    recall  f1-score   support

          0       0.83      0.94      0.88       222
          1       0.93      0.81      0.87       225

avg / total       0.88      0.88      0.88       447

	id	id_str	screen_name	location	description	url	followers_count	friends_count	listedcount	created_at	favourites_count	verified	statuses_count	lang	status	default_profile	default_profile_image	has_extended_profile	name	bot
0	3.982732e+09	3982731976	mcgucket_bot	NaN	A bot that tweets every line said by Fiddlefor...	NaN	1129	7	2	Sat Oct 17 22:35:31 +0000 2015	0	False	23557	en	{'retweeted': False, 'is_quote_status': False,...	False	False	False	McGucket bot	1
1	8.410000e+17	8.41E+17	BowieK66	NaN	NaN	NaN	0	22	0	13/03/2017 22:21	0	False	1	en	Status(_api=<tweepy.api.API object at 0x101927...	True	True	False	MJ Kuhn	1
2	2.768375e+09	2768374981	ducknoteprice	NaN	NaN	NaN	3	0	3	Mon Aug 25 23:17:59 +0000 2014	0	False	1050	en	{'created_at': 'Fri Feb 26 17:01:15 +0000 2016...	True	False	False	duckNote	1
3	3.304189e+09	3304189373	robotrecipes	robot kitchen	tasty recipes for robot // not for human // a ...	http://t.co/PdagJGqVMR	505	13	49	Sat May 30 23:42:16 +0000 2015	0	False	5109	en	{u'contributors': None, u'truncated': False, u...	False	False	False	robot recipes	1
4	3.224289e+09	3224289024	everyumlaut	NaN	bot by @dbaker_h	NaN	15	0	11	Sat May 23 14:39:21 +0000 2015	0	False	31365	en-gb	{u'contributors': None, u'truncated': False, u...	True	False	False	ÌÇvÌÇrÌÀ Ì_mlÌ_Ì_¼Ñ	1
5	7.300000e+17	7.30E+17	glossatory	Australah	SOCIAL ON GLOSSATORY: the branch of the consid...	https://t.co/hcUNcUdlcq	16	1	4	5/11/2016 5:45	0	False	1832	en	Status(in_reply_to_user_id=None, favorited=Fal...	False	False	False	GLOSSATORY	1
6	8.200000e+17	8.20E+17	Fancypants6047	vancouver	NaN	NaN	41	394	0	13/01/2017 19:37	3	False	4	en	Status(_api=<tweepy.api.API object at 0x101927...	True	False	True	Fancypants	1
7	2.602217e+09	2602217174	Hedgehogize	Moebius	@Hedgehogize me or @Hedgehogize NAME to unlock...	http://t.co/a1UvEiAl3u	549	370	12	Thu Jul 03 20:37:48 +0000 2014	12	False	4658	en	{u'contributors': None, u'truncated': False, u...	False	False	False	YOU the Hedgehog	1
8	8.330000e+17	8.33E+17	jamieph93986621	NaN	I wasn't bor ysterday, was born tomorrow	NaN	0	60	0	Sun Feb 19 03:47:42 +0000 2017	0	False	10	en	{"created_at": "Mon Mar 13 02:55:10 +0000 2017...	True	False	False	Jamie Phillips	1
9	8.250000e+17	8.25E+17	NothemDonella	NaN	Your diac No	NaN	0	43	4	Fri Jan 27 15:51:34 +0000 2017	48	False	86	en	{"created_at": "Mon Mar 13 02:52:13 +0000 2017...	True	False	False	Donella Nothem	1