In [1]:
import pandas as pd
from sklearn import svm
from sklearn import metrics

raw_data=pd.read_csv("C:\\Users\\haris\\Desktop\\CombinedRandomDataCleaned3.csv")
print (raw_data.head(3))
print(raw_data.tail(3))


   altmetric_id                        DOI                  ISSN  mendeley  \
0       3960260  10.1192/apt.bp.114.013490            2056-4686;         3   
1       2295090          10.1111/ans.12536  1445-2197;1445-1433;        13   
2       6616300                        NaN  0008-5472;1538-7445;         3   

   citeulike  connotea  twitter  reddit  facebook  googleplus  blogs  news  \
0          0         0        1       0         1           0      0     0   
1          0         0        1       0         0           0      0     0   
2          0         0        1       0         0           0      0     0   

   video  wikipedia  q&a  pinterest  weibo  peer_reviews  policy  
0      0          0    0          0      0             0       0  
1      0          0    0          0      0             0       0  
2      0          0    0          0      0             0       0  
        altmetric_id                           DOI                 ISSN  \
178674       2563446  10.1016/j.vetpar.2009.01.013             03044017   
178675       2563220        10.1098/rsbl.2005.0390                  NaN   
178676       2565950      10.1136/jech.2010.127761  0143-005X;1470-2738   

        mendeley  citeulike  connotea  twitter  reddit  facebook  googleplus  \
178674        11          0         0        0       0         0           0   
178675        26          0         0        0       0         0           0   
178676         3          0         0        0       0         0           0   

        blogs  news  video  wikipedia  q&a  pinterest  weibo  peer_reviews  \
178674      1     0      0          0    0          0      1             2   
178675      1     0      0          0    0          0      1             2   
178676      1     0      0          0    0          0      1             2   

        policy  
178674       2  
178675       1  
178676       1  

In [2]:
raw_data = raw_data.sample(frac=1).reset_index(drop=True)
train = raw_data.sample(frac=0.8, random_state=1)
test = raw_data.loc[~raw_data.index.isin(train.index)]

data_columns=["mendeley", "citeulike", "connotea", "twitter", "reddit", "facebook", "googleplus", "blogs", "news", "video", "wikipedia", "q&a", "pinterest", "weibo", "peer_reviews"]
train_data_array=train.as_matrix(columns=data_columns)
train_class_array= train['policy'].values
test_data_array=test.as_matrix(columns=data_columns)
test_class_array= test['policy'].values

In [3]:
clf = svm.SVC()

In [4]:
clf.fit(train_data_array, train_class_array)


Out[4]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [5]:
pred = clf.predict(test_data_array)
score = metrics.accuracy_score(test_class_array, pred)
print("accuracy:   %0.3f" % score)

score = metrics.precision_score(test_class_array, pred)
print("Precision:   %0.3f" % score)

score = metrics.recall_score(test_class_array, pred)
print("Recall:   %0.3f" % score)

score = metrics.f1_score(test_class_array, pred)
print("F-measure:   %0.3f" % score)


accuracy:   0.868
Precision:   0.820
Recall:   0.868
F-measure:   0.824
C:\Program Files\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1203: DeprecationWarning: The default `weighted` averaging is deprecated, and from version 0.18, use of precision, recall or F-score with multiclass or multilabel data or pos_label=None will result in an exception. Please set an explicit value for `average`, one of (None, 'micro', 'macro', 'weighted', 'samples'). In cross validation use, for instance, scoring="f1_weighted" instead of scoring="f1".
  sample_weight=sample_weight)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1074: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1304: DeprecationWarning: The default `weighted` averaging is deprecated, and from version 0.18, use of precision, recall or F-score with multiclass or multilabel data or pos_label=None will result in an exception. Please set an explicit value for `average`, one of (None, 'micro', 'macro', 'weighted', 'samples'). In cross validation use, for instance, scoring="f1_weighted" instead of scoring="f1".
  sample_weight=sample_weight)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:756: DeprecationWarning: The default `weighted` averaging is deprecated, and from version 0.18, use of precision, recall or F-score with multiclass or multilabel data or pos_label=None will result in an exception. Please set an explicit value for `average`, one of (None, 'micro', 'macro', 'weighted', 'samples'). In cross validation use, for instance, scoring="f1_weighted" instead of scoring="f1".
  sample_weight=sample_weight)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [ ]: