notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
df = pd.read_csv("data/200log.csv")



In [3]:

    
df['unit'] = 1



In [7]:

    
success = df[df.url_status_code ==200].unit.count()
success_grouped = df[df.url_status_code ==200].groupby('name').unit.count()

fail = df[df.url_status_code !=200].unit.count()
fail_grouped = df[df.url_status_code !=200].groupby('name').unit.count()



In [8]:

    
fail_grouped









    Out[8]:





Series([], Name: unit, dtype: int64)



In [9]:

    
total_requests = df.url_status_code.sum()



In [10]:

    
print "Summary of URL Requests"
print "-" * 50
print "Percentage successful: %r" % (round(float(success)/ (success + fail), 2))
print "Total Succesful:", success
print "Total Unsuccesful:", fail
print "\n"


print 'Quantity of Articles Available'
print "-" * 50

for index, elem in enumerate(success_grouped):
    print success_grouped.index[index].capitalize(),": ", success_grouped[index]









    



Summary of URL Requests
--------------------------------------------------
Percentage successful: 1.0
Total Succesful: 25105
Total Unsuccesful: 0


Quantity of Articles Available
--------------------------------------------------
Dailyobserver :  4524
Frontpageafricaonline :  4359
Gnnliberia :  1749
Golministryofinformation :  2884
Post1847 :  6023
Theanalyst :  304
Thenewdawn :  5262



In [7]:

    
print success



In [14]:

    
df.url_status_code.count()









    Out[14]:





52995



In [17]:

    
df.groupby('name').count()









    Out[17]:






  
    
      
      url_request
      url_status_code
      header_len
      response_len
      time
      message
    
    
      name
      
      
      
      
      
      
    
  
  
    
      dailyobserver
      4524
      4524
      4524
      4524
      4524
      4524
    
    
      frontpageafricaonline
      4359
      4359
      4359
      4359
      4359
      4359
    
    
      gnnliberia
      1749
      1749
      1749
      1749
      1749
      1749
    
    
      golministryofinformation
      2884
      2884
      2884
      2884
      2884
      2884
    
    
      post1847
      6023
      6023
      6023
      6023
      6023
      6023
    
    
      theanalyst
      304
      304
      304
      304
      304
      304
    
    
      thenewdawn
      5262
      5262
      5262
      5262
      5262
      5262



In [18]:

    
df.count()









    Out[18]:





url_request        25105
url_status_code    25105
header_len         25105
response_len       25105
name               25105
time               25105
message            25105
dtype: int64



In [ ]:

	url_request	url_status_code	header_len	response_len	time	message
name
dailyobserver	4524	4524	4524	4524	4524	4524
frontpageafricaonline	4359	4359	4359	4359	4359	4359
gnnliberia	1749	1749	1749	1749	1749	1749
golministryofinformation	2884	2884	2884	2884	2884	2884
post1847	6023	6023	6023	6023	6023	6023
theanalyst	304	304	304	304	304	304
thenewdawn	5262	5262	5262	5262	5262	5262