In [ ]:
from collections import Counter
import csv
import os
csv.field_size_limit(sys.maxsize)
data_path = os.path.join(os.environ["DataPath"], "BayesImpact", "HackathonEscort")
main_path = os.path.join(data_path, "Raw", "escort_all.tsv")
cities = Counter()
f = open(main_path, "rU")
r = csv.reader(f, delimiter="\t", lineterminator="\n", quoting=csv.QUOTE_NONE)
i = 0
error_rows = [7044, 13093]
for row in r:
i+=1
if len(row)>1:
cities[row[1]] += 1
if i%100000==0:
print(i, " ", row)
print(cities.most_common(10))
f.close()
(100000, ' ', ['Backpage.com', 'Allentown', 'Allentown, I-78', '\xe2\x96\x86 \xe2\x96\x87 \xe2\x96\x88 E_X_O_T_I_C \xe2\x96\x88 \xe2\x96\x87 \xe2\x96\x86 \xe2\x96\x85 \xe2\x96\x84 \xe2\x96\x83 \xe2\x96\x82 \xe2\x97\x86\xe2\x99\xa5 S_E_X_X_X_X_Y \xe2\x99\xa5\xe2\x97\x86 \xe2\x96\x82 \xe2\x96\x83 \xe2\x96\x84 \xe2\x96\x85 \xe2\x96\x86 \xe2\x96\x87 \xe2\x96\x88 M_I_S_T_R_E_S_S \xe2\x96\x88 \xe2\x96\x87 \xe2\x96\x86 - 26', '26.0', "\xe2\x98\x85 I'm _ Q_U_A_L_I_F_I_E_D ((To Leave Your )) TANK E M P T Y \xe2\x98\x85 SExY **** *dIscReet +++++juicy bOOtY***ChOco Treat JuSt For YOu yes i am independent neat and sweet enough to eat waitin to see you; I kno u want to enjoy this pleasure With sexy baby-soft skin; full figured....no rushing just pure juicy eXtasy call me anytime I'm Sasha 484 524 2444 $pecial$ ,,, available Now & in the mood for you. I AM Av\xce\xb1l\xce\xb1\xd0\xb2l\xd1\x94 N\xcf\x83\xcf\x89\xe2\x9d\xa3 AND HOSTING PRIVATE AND DISCREET LOCATIONMicrosoft.SqlServer.Dts.Pipeline.BlobColumn", '2012-12-12 01:55:00+00:00', 'http://allentown.backpage.com/FemaleEscorts/e_x_o_t_i_c-s_e_x_x_x_x_y-m_i_s_t_r_e_s_s-26/13096388', '', '', '4845242444', '', '13096388 allentown'])
[('Alabama', 49704), ('Albany', 15063), ('Akron/Canton', 14230), ('Albuquerque', 8788), ('Abilene', 4764), ('Alaska', 2951), ('Allentown', 2107), ('Alexandria', 1988), ('Aberdeen', 307), ('Arkansas', 2)]
(200000, ' ', ['Backpage.com', 'Arizona', 'Tucson, az tucson oracle n orange grove', 'Sexy special today serious callers only 5208612371', '27', "hi guys i love to know when im giving you pleasure it turns me on just thinking about it red hair green eyes big booty avaliable now real pictures down to earth open minded i just want to have fun and help you relax and we'll go from there unforgetable LET ME SHOW YOU HOW ITS DONE orange grove and oracle jacuzzi suit 5*2*0*8*6*1*2*3*7*1Microsoft.SqlServer.Dts.Pipeline.BlobColumn", '2012-10-01 13:11:00+00:00', 'http://arizona.backpage.com/FemaleEscorts/sexy-special-today-serious-callers-only-5208612371-27/19509585', '', '', '5208612371', '', '19509585 arizona'])
[('Arizona', 64143), ('Alabama', 49705), ('Albany', 15063), ('Akron/Canton', 14230), ('Albuquerque', 8788), ('Alaska', 8787), ('Allentown', 8251), ('Amarillo', 7053), ('Ann Arbor', 5457), ('Abilene', 4764)]
(300000, ' ', ['Backpage.com', 'Atlanta', 'Atlanta, Blondes Have more fun!) (2Girl Specials!', '_______________ GOT a SWeeT TooTH ________________ ToTALLY BLoNDE and DELiCiOUS ________________ - 25', '25.0', "Come HORNY and leave HAPPY ! I've mastered the art of PLEASURE and know how to pamper a gentleman. I have amazing skills, 5 star service... I'm simply the BEST ! So dream no further I am here to turn your FANTASIES into REALITY ! Ask about my 2 girl sessions with Karma or Lilly! No Black Men! Sugar Kane 4O4-399-6ll2Microsoft.SqlServer.Dts.Pipeline.BlobColumn", '2014-04-13 11:55:00+00:00', 'http://atlanta.backpage.com/FemaleEscorts/got-a-sweet-tooth-_-totally-blonde-and-delicious-_-25/22463626', '', '', '4043996112', '', '22463626 atlanta'])
[('Arizona', 82664), ('Alabama', 49707), ('Atlanta', 42279), ('Arkansas', 24131), ('Albany', 15063), ('Akron/Canton', 14231), ('Albuquerque', 8788), ('Alaska', 8787), ('Allentown', 8251), ('Amarillo', 7053)]
(400000, ' ', ['Backpage.com', 'Austin', 'Austin, AUSTIN AND SURROUNDING TOWN', 'NEW ~*~ $40 Special SEXY BBW ~*~ MS.HEAVEN - 81T-986-5538- - 22', '22.0', 'HELLO Guys, Im Heaven that Sexy BBW Goddess that you dream about. Im the girl that willing to go all out to Please a man. Im the total package big booty sexy big tits a BBW body to die for..... Call Now Ms.HEAVAN 817-986-5538 In and Out Call $60 for 30minutes, ,,,, $120 for Hour Out Call $150 Very FETISH FRIENDLYMicrosoft.SqlServer.Dts.Pipeline.BlobColumn', '2013-12-01 18:16:00+00:00', 'http://austin.backpage.com/FemaleEscorts/new-40-special-sexy-bbw-msheaven-81t-986-5538-22/9188609', '', '', '8179865538', '', '9188609 austin'])
[('Atlanta', 112006), ('Arizona', 82664), ('Alabama', 49707), ('Arkansas', 24132), ('Austin', 18230), ('Albany', 15063), ('Akron/Canton', 14231), ('Augusta', 10008), ('Albuquerque', 8788), ('Alaska', 8787)]
(500000, ' ', ['Backpage.com', 'Biloxi', 'Biloxi, Ocean springs', 'Ocean springs Incall ** Nikki&Friends** (601)533-8596 - 36', '36.0', 'Hey guys! Im NIKKI Im in town today with a sexxy friend doing incalls.we can ease your stresses take away your frustrations and leave your mind relaxed. call us for a wonderful and exotic time. Generous gentlemen over 35. 601-533-8596Microsoft.SqlServer.Dts.Pipeline.BlobColumn', '2014-03-19 09:36:00+00:00', 'http://biloxi.backpage.com/FemaleEscorts/ocean-springs-incall-nikkiandfriends-601533-8596-36/5489154', '', '', '3560153385', '', '5489154 biloxi'])
[('Atlanta', 112007), ('Arizona', 82664), ('Alabama', 49707), ('Baltimore', 38338), ('Austin', 31465), ('Arkansas', 24135), ('Baton Rouge', 15905), ('Albany', 15064), ('Akron/Canton', 14231), ('Beaumont', 11151)]
(600000, ' ', ['Backpage.com', 'Boston', 'North Shore, Melrose medford', '**~\xe2\x99\xa5Young lady looking to spice up the night\xe2\x99\xa5~** - 24', '24.0', '"Hi I\'m Ashley I\'m 5\'3"",125 pounds, I have extremely seductive eyes,luscious lips and a petite body. I\'m also open minded and interested in learning new things. So if your interested in having an unforgettable experience and getting your roses worth than call me at 339-221-6665!!!! In & out calls No fuss No rush Roses for my time 100% SATISFACTION GUARANTEE SERIOUS APPOINTMENTS ONLYMicrosoft.SqlServer.Dts.Pipeline.BlobColumn"', '2014-04-08 04:32:00+00:00', 'http://boston.backpage.com/FemaleEscorts/young-lady-looking-to-spice-up-the-night-24/25976682', '', '', '3392216665', '', '25976682 boston'])
[('Atlanta', 112007), ('Arizona', 82664), ('Boston', 60117), ('Alabama', 49707), ('Baltimore', 38338), ('Austin', 31465), ('Arkansas', 24136), ('Birmingham', 17826), ('Baton Rouge', 15905), ('Albany', 15064)]
(700000, ' ', ['Backpage.com', 'Carbondale', 'Bloomington/Normal, Chambana, Chicago, Decatur, La Salle County, Mattoon-Charleston, Peoria, Rockford, Southern Illinois, Springfield, Western Illinois, anywhere you want me!', 'Busty Babe with Wicked Curves! - 19', '19.0', 'http://www.adultsearch.com/classifieds/bp?id=2Microsoft.SqlServer.Dts.Pipeline.BlobColumn', '2014-05-06 00:02:00+00:00', 'http://carbondale.backpage.com/FemaleEscorts/busty-babe-with-wicked-curves-19/6278401', '', '', '', '', '6278401 carbondale'])
[('Atlanta', 112009), ('Arizona', 82664), ('Boston', 61234), ('Alabama', 49707), ('Baltimore', 38338), ('Brooklyn', 35852), ('Austin', 31465), ('Bronx', 29004), ('Arkansas', 24137), ('Birmingham', 17826)]
(800000, ' ', ['Backpage.com', 'Chattanooga', 'Chattanooga, chattanooga/', '*** BrunEtte BomBshell *** mS BrIttAnie * available now *** - 26', '26.0', 'Hello gentlemen.. my name is Brittanie i stans 5 5 135lbs 36 c cup nice round booty looong beautiful dark hair that u.. can run your fingers thru deep dark seductive bedroom eyes and a beautiful smile ...giv me a call i am waiting to hear from u 9013046642 # 420 #friendly No rush very clean n well groomed very discreetMicrosoft.SqlServer.Dts.Pipeline.BlobColumn', '2014-02-03 17:53:00+00:00', 'http://chattanooga.backpage.com/FemaleEscorts/brunette-bombshell-ms-brittanie-available-now-26/9681879', '', '', '9013046642', '', '9681879 chattanooga'])
[('Atlanta', 112010), ('Arizona', 82664), ('Boston', 61234), ('Alabama', 49707), ('Charlotte', 42676), ('Baltimore', 38338), ('Brooklyn', 35853), ('Austin', 31465), ('Bronx', 29004), ('Central Jersey', 28408)]
(900000, ' ', ['Backpage.com', 'Chicago', 'North Chicagoland, Gurnee in/out calls', 'Sexy blonde new to Gurnee/ Chicago would love your company!! Call now', '25.0', "I Am Gorgeous and Friendly! Always Attentive and Absolutely Charming! And most important.... I'M REAL AND I'M READY TO MAKE YOU SMILE! I HAVE A GORGEOUS FACE TO MATCH MY SEXY BODY! TREAT YOURSELF TO A LITTLE PIECE OF HEAVEN...YOU DESERVE IT! CALL ME ILL BE WAITING FOR YOU.... Ms.sarah 424 222 0904Microsoft.SqlServer.Dts.Pipeline.BlobColumn", '2012-06-29 13:23:00+00:00', 'http://chicago.backpage.com/FemaleEscorts/sexy-blonde-new-to-gurnee-chicago-would-love-your-company-call-now-25/12179746', '', '', '4242220904', '', '12179746 chicago'])
[('Atlanta', 112010), ('Chicago', 87410), ('Arizona', 82664), ('Boston', 61234), ('Alabama', 49707), ('Charlotte', 42677), ('Baltimore', 38338), ('Brooklyn', 35854), ('Austin', 31465), ('Bronx', 29004)]
(1000000, ' ', ['Backpage.com', 'Colorado', 'Denver', '\xe2\x98\x85\xe2\x99\x9a\xe2\x9d\xa4 \xe2\x99\x9a \xe2\x9d\xa4 \xe2\x99\x9b\xe2\x98\x85 D.E.N.V.E.R.S \xe2\x99\x9b F.I.N.E.S.T\xe2\x99\x9b C.L.A.S.S.Y \xe2\x98\x85\xe2\x99\x9a\xe2\x9d\xa4 \xe2\x99\x9a\xe2\x9d\xa4 \xe2\x99\x9a\xe2\x98\x85 N.A.S.T.Y \xe2\x99\x9b G.I.R.L \xe2\x98\x85\xe2\x99\x9a\xe2\x9d\xa4 \xe2\x99\x9a \xe2\x9d\xa4 \xe2\x99\x9b\xe2\x98\x85 - 22', '22.0', "HEY You, I'm nikki the Perfect Blend of Sexy and FUN !!I'm here To Please Your Deepest and Most Intimate Fantasies!You'll be Truly,Satisfied with the Time We Share...You'll wonder why you didn't call sooner...Well reviewed ask for info..100% ME, 100% HOT AND READY*****PLEASE NO BLOCKED CALLS ;) 720 252 3383.!Microsoft.SqlServer.Dts.Pipeline.BlobColumn", '2013-08-03 02:40:00+00:00', 'http://colorado.backpage.com/FemaleEscorts/denvers-finest-classy-nasty-girl-22/13026364', '', '', '7202523383', '', '13026364 colorado'])
[('Chicago', 114898), ('Atlanta', 112010), ('Arizona', 82664), ('Boston', 61234), ('Alabama', 49707), ('Charlotte', 42677), ('Baltimore', 38338), ('Brooklyn', 35854), ('Austin', 31465), ('Bronx', 29004)]
(1100000, ' ', ['Backpage.com', 'Columbia/Jeff City', 'Columbia/Jeff City, My Place or Yours :)', '\xc2\xb0o\xe2\x99\xa5 YOUR \xe2\x80\xa2\xc2\xb0o\xe2\x99\xa5o\xc2\xb0\xe2\x80\xa2 SATISFACTION \xe2\x80\xa2\xc2\xb0o\xe2\x99\xa5o\xc2\xb0\xe2\x80\xa2 IS MY \xe2\x80\xa2\xc2\xb0o\xe2\x99\xa5o\xc2\xb0\xe2\x80\xa2 #1 PRIORITY!!! Morning Special - 21', '21.0', "Hi Guys, My name is Jordynn Don't be shy guys. I am sweet intelligent & not to mention a good listener which makes me a perfect companion for any occasion. I am also described as sexy, genuine, charming, seductive & discreet. I enjoy making new friends, stimulating conversation, & a fun time. TO MAKE YOUR FANTASY COME TRUE, I'M THE GIRL FOR YOU! 100% Independent 100% Real Pics No AA 30+ Gentlemen No Blocked Calls/Texting (Serious Inquires Only) Jordynn (913)544-8704Microsoft.SqlServer.Dts.Pipeline.BlobColumn", '2013-05-04 07:20:00+00:00', 'http://columbiamo.backpage.com/FemaleEscorts/o-your-oo-satisfaction-oo-is-my-oo-1-priority-morning-special-21/10849005', '', '', '9135448704', '', '10849005 columbiamo'])
In [ ]:
import csv
import os
import gzip
import re
data_path = os.path.join(os.environ["DataPath"], "BayesImpact", "HackathonEscort")
main_path = os.path.join(data_path, "Working", "escort_all.tsv.gz")
f = gzip.open(main_path, "r")
r = csv.reader(f, delimiter="\t", lineterminator="\n")
for i in range(7044):
row = r.next()
#print(i)
if len(row) != 13:
print(i, len(row))
d=f.read(10000)
print(d.replace("\n", "****NEWLINE_HERE\n****").replace("\t","----TAB----"))
open(os.path.join(data_path, "Working", "temp"), "w").write(d)
f.close()
In [ ]:
help(csv.)
In [ ]:
Content source: benhamner/BayesImpactHackathonThorn
Similar notebooks: