In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data/200log.csv")

In [3]:
df['unit'] = 1

In [7]:
success = df[df.url_status_code ==200].unit.count()
success_grouped = df[df.url_status_code ==200].groupby('name').unit.count()

fail = df[df.url_status_code !=200].unit.count()
fail_grouped = df[df.url_status_code !=200].groupby('name').unit.count()

In [8]:
fail_grouped


Out[8]:
Series([], Name: unit, dtype: int64)

In [9]:
total_requests = df.url_status_code.sum()

In [10]:
print "Summary of URL Requests"
print "-" * 50
print "Percentage successful: %r" % (round(float(success)/ (success + fail), 2))
print "Total Succesful:", success
print "Total Unsuccesful:", fail
print "\n"


print 'Quantity of Articles Available'
print "-" * 50

for index, elem in enumerate(success_grouped):
    print success_grouped.index[index].capitalize(),": ", success_grouped[index]


Summary of URL Requests
--------------------------------------------------
Percentage successful: 1.0
Total Succesful: 25105
Total Unsuccesful: 0


Quantity of Articles Available
--------------------------------------------------
Dailyobserver :  4524
Frontpageafricaonline :  4359
Gnnliberia :  1749
Golministryofinformation :  2884
Post1847 :  6023
Theanalyst :  304
Thenewdawn :  5262

In [7]:
print success


0

In [14]:
df.url_status_code.count()


Out[14]:
52995

In [17]:
df.groupby('name').count()


Out[17]:
url_request url_status_code header_len response_len time message
name
dailyobserver 4524 4524 4524 4524 4524 4524
frontpageafricaonline 4359 4359 4359 4359 4359 4359
gnnliberia 1749 1749 1749 1749 1749 1749
golministryofinformation 2884 2884 2884 2884 2884 2884
post1847 6023 6023 6023 6023 6023 6023
theanalyst 304 304 304 304 304 304
thenewdawn 5262 5262 5262 5262 5262 5262

In [18]:
df.count()


Out[18]:
url_request        25105
url_status_code    25105
header_len         25105
response_len       25105
name               25105
time               25105
message            25105
dtype: int64

In [ ]: