Word! Automating a Hip-hop word of the day blog

Requirements


In [2]:
import pandas as pd
import numpy as np
import glob
import re
from collections import defaultdict

Loading Lyrics into Memory


In [3]:
from html.parser import HTMLParser

class MLStripper(HTMLParser):
    def __init__(self):
        self.reset()
        self.strict = False
        self.convert_charrefs= True
        self.fed = []
    def handle_data(self, d):
        self.fed.append(d)
    def get_data(self):
        return ''.join(self.fed)

def strip_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data()

In [4]:
df_data = defaultdict(list)
for filename in glob.iglob('Lyrics/ohhla.com/*/*/*/*.txt', recursive=True):
    with open(filename, 'r', encoding = "ISO-8859-1") as f:
        stripped_lyrics = strip_tags(f.read())
        
        artist = re.search('Artist:\s*(.*)\s*\n', stripped_lyrics)
        song = re.search('Song:\s*(.*)\s*\n', stripped_lyrics)
        lyrics = re.search('Typed by:\s*(.*)\s*\n([\s\S]*)', stripped_lyrics)

        if artist is not None and song is not None and lyrics is not None:
            df_data["filename"].append(filename)
            df_data["artist"].append(artist.group(1))
            df_data["song"].append(song.group(1))
            df_data["lyrics"].append(lyrics.group(2).lower())  # group(1) is the transcriber

In [5]:
rap_data = pd.DataFrame(df_data)
rap_data.iloc[105:120]


Out[5]:
artist filename lyrics song
105 2 Chainz f/ The Weeknd Lyrics/ohhla.com/anonymous/2_chainz/TRUstory/l... [chorus: the weeknd]\ngirl, i'm just another b... Like Me
106 2 Chainz Lyrics/ohhla.com/anonymous/2_chainz/TRUstory/m... [intro: 2 chainz]\n(yeah~!) i told them get on... Money Machine
107 2 Chainz f/ Drake Lyrics/ohhla.com/anonymous/2_chainz/TRUstory/n... [intro]\n{*mike will made it*}\nyo.. t.r.u.! (... No Lie
108 2 Chainz f/ Dolla Boy Lyrics/ohhla.com/anonymous/2_chainz/TRUstory/s... [intro: r&b samples]\nnothing in the whole wid... Stop Me Now
109 2 Chainz f/ Lil Wayne Lyrics/ohhla.com/anonymous/2_chainz/TRUstory/y... [intro]\nyuck daddy! yuck!\nyuck daddy! yuck!\... Yuck!
110 2 Hungry Bros. present 8thW1 f/ Janelle Renee Lyrics/ohhla.com/anonymous/2_hungry/no_room/mo... [intro]\ngoo-oh-oh-ohhh {*2x*}\n(c'mon people!... More Go
111 2 Hungry Bros. present 8thW1 Lyrics/ohhla.com/anonymous/2_hungry/no_room/sh... [intro/chorus - interpolation of krs-one]\nthe... Short and Sweet
112 2 Hungry Bros. present 8thW1 Lyrics/ohhla.com/anonymous/2_hungry/no_room/sy... [8thw1]\nthe teacher, class in session\nthe be... Say My Name Right
113 2 Hungry Bros. present 8thW1 Lyrics/ohhla.com/anonymous/2_hungry/no_room/ta... [8thw1]\nwhy stress mic skills? i upgrade to l... Talkin
114 2 Live Crew Lyrics/ohhla.com/anonymous/2_live/2liveis/chec... [mr. mixx]\ncheck it out y'all\nch-ch-check it... Check it Out Y'all
115 2 Live Crew Lyrics/ohhla.com/anonymous/2_live/2liveis/get_... ( *mr. mixx cuts in dolemite* )\n(i wouldn't l... Get it Girl
116 2 Live Crew Lyrics/ohhla.com/anonymous/2_live/2liveis/thro... [ fresh kid ice ]\nlisten up y'all 'cause this... Throw the 'D'
117 2 Live Crew Lyrics/ohhla.com/anonymous/2_live/2liveis/want... chorus(4x): luke\nsomebody say hey-y-y-y-y we... We Want Some Pussy
118 2 Live Crew Lyrics/ohhla.com/anonymous/2_live/2liveis/what... 1-2 1-2\ni am the dominating fresh kid ice\nan... 2 Live is What We Are... (Word)
119 2 Live Crew Lyrics/ohhla.com/anonymous/2_live/as_nasty/7_b... [note: brother marquis does all the verses.]\... My Seven Bizzos

In [6]:
rap_data.shape


Out[6]:
(33371, 4)

Checking Words


In [7]:
for artist, song, lyrics in zip(df_data["artist"],
                                df_data["song"],
                                df_data["lyrics"]):
    if "python" in set(lyrics.split()):
        print(artist, " - ", song)


50 Cent  -  Leave the Lights On
Ab-Soul f/ A-Mack, Punch, SZA  -  Dub Sac
Action Bronson f/ Big Body Bes, Mac Miller  -  Twin Peugots
The Ambassador f/ LaKia Wise  -  Get You Open
Azealia Banks f/ MJ Cole, Peter Rosenberg  -  Desperado
Beast 1333  -  113 Bars
Beast 1333  -  Anonymous
Big Daddy Kane  -  Uncut, Pure (Original and Remix)
Childish Gambino (Donald Glover)  -  Lights Turned On
D-12  -  American Psycho
Deltron 3030 (Del the Funky Homosapien)  -  Battle Song
Fat Joe f/ Busta Rhymes, DJ Khaled, Jadakiss, Miguel, Mos Def, Roscoe Dash, Kanye West  -  Pride N Joy
G. Dep f/ Kool G. Rap, Rakim  -  I Am
Ghostface Killah, Masta Killa, U-God, Raekwon, Cappadonna  -  Winter Warz
Hell Razah f/ Fokis, Killah Priest  -  Gladiators
Ice-T f/ EJ Evil E the Great, Trigger Tha Gambler (SMG)  -  Cramp Your Style (Live)
Ice-T  -  Cramp Your Style
Joe Budden  -  Roll Call
Killah Priest f/ Antonio Chance  -  Atoms to Adams
DJ KaySlay f/ Fat Joe, Raekwon, Scarface  -  I Never Liked Ya Ass
DJ KaySlay f/ LL Cool J  -  The Truth
Kreators f/ Akrobatik, Big Shug, Ed O.G., Guru, Krumb Snatcha  -  Home
Kurupt f/ Deadly Venoms  -  It's Time	
Lil Twist f/ Busta Rhymes  -  Turnt Up
Lil Wayne  -  Sh!t
Lil Wayne f/ B.G., Manny Fresh  -  Drop it Like it's Hot
LL Cool J  -  Murdergram 
Ludacris  -  Dancin Dirty
MellowHype f/ Mike G  -  666
R. Kelly f/ Big Tigger, Cam'Ron  -  Snake (Remix)
Missy Elliott f/ Timbaland  -  9th Inning
Nicki Minaj f/ Skylar Grey  -  Bed of Lies
OutKast f/ Lil Wayne, Snoop Dogg  -  Hollywood Divorce
Raekwon f/ American Cream Team, Killa Sin  -  Giant Size
Raekwon f/ Lord Superb  -  Russian Cut
Lord Have Mercy and D.V. alias Khrist  -  Holy Water
The Union f/ Buckshot and Kasino  -  Throw it Up
Rick Ross  -  Crocodile Python
The Roots  -  Quills
Erick Sermon f/ Ryze, Twon Gabz  -  Fix Your Face
Swollen Members f/ Chali 2na, Evidence  -  Full Contact
The Game  -  400 Bars
The LOX f/ Foxy Brown  -  My Niggaz
Twista f/ Do or Die, Johnny P  -  Yo Body
Wyclef Jean f/ Supreme C, Marie Antoinette, Hope  -  Da Cypha
Xzibit f/ Nate Dogg  -  Been a Long Time

In [8]:
for artist, song, lyrics in zip(df_data["artist"],
                                df_data["song"],
                                df_data["lyrics"]):
    if "anaconda" in set(lyrics.split()):
        print(artist, " - ", song)


2Pac f/ Outlawz  -  Runnin On E
2nd II None f/ DJ Quik, AMG, Hi-C, Playa Hamm  -  Got A Nu Woman
All City  -  Ded Right
Big Ed  -  Head Busta
Big K.R.I.T. f/ Ludacris  -  What U Mean
Big L f/ Stan Spit  -  Who You Slidin' Wit'
Big Punisher  -  The Dream Shatterer
Big Punisher  -  The Dream Shatterer (Original)
Canibus  -  Who Stopped Ya?
Chamillionaire  -  Set it Off Freestyle
Chamillionaire f/ Rasaq  -  Panky Rang
Clika One f/ Kurupt, Chiko Dateh, Don Cisco  -  Gangsta Pimpin'
Danny!  -  D.A.N.N.Y.
Daz Dillinger & JT the Bigga Figga f/ Kurupt, Rappin' 4-Tay  -  Sweet Love
E-40  -  If If Was a 5th
Flo Rida f/ Robin Thicke, Verdine White  -  I Don't Like It, I Love It
Jim Crow f/ Jazze Pha, Too $hort  -  That Drama (Baby's Momma)
Juvenile f/ Jay Da Menace, Kango Slim  -  Make U Feel Alright
Kanye West f/ 2 Chainz, Marsha Ambrosius, Big Sean, James Fauntleroy II  -  The One
Kool G Rap  -  A Thug's Love Story (Chapter I, II, III)
Kurupt f/ Lil 1/2 Dead  -  On, OnSite
LL Cool J f/ Mashonda, Swizz Beatz  -  Crazy Girl
Hollister, Dave f/ Redman, Erick Sermon  -  The Weekend
Nicki Minaj  -  Anaconda
OutKast  -  War
P. Diddy (Puff Daddy) f/ Eightball & MJG, Faith Evans  -  Roll With Me
Raekwon  -  Respect Power
Royce Da 5'9" f/ Mr. Porter  -  Mine In Thiz
Silkk the Shocker f/ No Limit All-Stars  -  I'm a Soldier
Slaughterhouse  -  SayDatThen
Soulja Boy  -  Go to War
Tech N9ne f/ Shabba Ranks  -  Boy Toy
Tekitha f/ Cappadonna  -  I Can See
The Game f/ Future, Young Jeezy  -  I Remember *
Tyga f/ Future, The Game  -  I Remember
Xzibit  -  A Minute to Pray

Finding Rare Words


In [9]:
# The 1/3 million most frequent words, all lowercase, with counts. 
# http://norvig.com/ngrams/
ngrams=pd.read_csv("assets/count_1w.txt",sep="\t",names=["word","count"])
ngrams.tail(n=20)


Out[9]:
word count
333313 goooglo 12711
333314 gooogla 12711
333315 gooogd 12711
333316 gooofa 12711
333317 goooao 12711
333318 goollo 12711
333319 goolld 12711
333320 goolh 12711
333321 goolgee 12711
333322 googook 12711
333323 googllr 12711
333324 googlal 12711
333325 googgoo 12711
333326 googgol 12711
333327 goofel 12711
333328 gooek 12711
333329 gooddg 12711
333330 gooblle 12711
333331 gollgo 12711
333332 golgw 12711

In [10]:
# The Tournament Word List (178,690 words) -- used by North American Scrabble players.
# http://norvig.com/ngrams/
twl_wordlist=pd.read_csv("assets/TWL06.txt",names=["word"])
twl_wordlist=pd.DataFrame(twl_wordlist["word"].str.lower())

twl_w_ngrams = pd.merge(twl_wordlist, ngrams)

In [11]:
twl_w_ngrams.sort_values(by="count").head(n=20)


Out[11]:
word count
73785 triose 12711
59922 retinae 12712
35948 inhibitive 12714
57100 rasbora 12714
40668 loadstar 12714
2765 antemortem 12714
45565 munting 12714
72551 toits 12716
11210 cestus 12716
36139 insatiably 12717
18170 defoliate 12718
41780 malaprops 12719
70256 synchronises 12719
64308 sibilance 12720
70772 tappets 12723
40563 lither 12724
27791 forfend 12724
73589 treponemal 12724
48890 outcross 12725
16895 cuprum 12728

In [12]:
np.sum(twl_w_ngrams["count"] < 400000)


Out[12]:
51195

In [13]:
all_bigwords = twl_w_ngrams[twl_w_ngrams["count"] < 400000]["word"].values
all_big_bigwords = [word for word in all_bigwords if len(word) > 4]
bigset = set(all_big_bigwords)

print(all_big_bigwords[:100])


['aardvarks', 'aardwolf', 'aargh', 'abaca', 'aback', 'abaft', 'abandonments', 'abandons', 'abase', 'abased', 'abasement', 'abashed', 'abated', 'abatements', 'abates', 'abating', 'abattoir', 'abattoirs', 'abaxial', 'abaya', 'abbes', 'abbess', 'abbesses', 'abbeys', 'abbots', 'abbreviate', 'abbreviates', 'abbreviating', 'abdicate', 'abdicated', 'abdicates', 'abdicating', 'abdication', 'abdomens', 'abdominals', 'abduct', 'abductee', 'abductees', 'abducting', 'abductions', 'abductor', 'abductors', 'abducts', 'abeam', 'abecedarian', 'abele', 'abeles', 'abelia', 'aberrant', 'abets', 'abetted', 'abetting', 'abettor', 'abeyance', 'abhor', 'abhorred', 'abhorrence', 'abhorrent', 'abhors', 'abided', 'abides', 'abiogenesis', 'abiotic', 'abject', 'abjection', 'abjectly', 'abjuration', 'abjure', 'abjured', 'ablate', 'ablated', 'ablations', 'ablative', 'ablaze', 'abled', 'abler', 'ables', 'ablest', 'ablution', 'ablutions', 'abnegation', 'abodes', 'abolishes', 'abolishing', 'abolishment', 'abolitionism', 'abolitionist', 'abolitionists', 'abominable', 'abominably', 'abomination', 'abominations', 'aboriginals', 'aborigine', 'aborigines', 'abortifacient', 'aborting', 'abortionist', 'abortionists', 'abortive']

In [14]:
df_data["matches"] = []
for lyrics in df_data["lyrics"]:
    found_words = set(lyrics.split()) & bigset
    df_data["matches"].append(found_words)

In [15]:
rap_data = pd.DataFrame(df_data)
rap_data = rap_data[rap_data.matches != set()]

In [16]:
rap_data.head(n=20)


Out[16]:
artist filename lyrics matches song
0 10 K.A.N.'s Lyrics/ohhla.com/anonymous/10_kans/rm_bside/u_... [* applause *]\n\n[dj]\nright about now we got... {fellas, skeet, fiends} U Need Dick In Your Life
1 10sion Lyrics/ohhla.com/anonymous/10sion/tension/lets... -=talking=-\nlets get it on every time\nholler... {playas, holler, hypnotized} Let's Get it On
2 113 Lyrics/ohhla.com/anonymous/113/dans_lur/ausumm... * send corrections to the typist\n\n[refrain]\... {favelas, typist, prise, pavillon, spliff, dom... Au Summum
3 1200 Techniques Lyrics/ohhla.com/anonymous/1200tech/c_theory/e... * send corrections to the typist\n\n*scratchin... {typist, showdowns, shipwrecked, emcees, sicke... Eye of the Storm
4 1200 Techniques Lyrics/ohhla.com/anonymous/1200tech/c_theory/w... [dj peril]\nnfamous - step, step, step, step, ... {melanin, renegades, booed, befriending, albin... Where Ur At?
5 1200 Techniques Lyrics/ohhla.com/anonymous/1200tech/infinite/k... * send corrections to the typist\n\n[prolouge]... {typist, showdowns, hopscotch, quitters, fickl... Karma
6 12 O'Clock w/ Raekwon the Chef Lyrics/ohhla.com/anonymous/12oclock/rm_bside/n... intro: raekwon the chef \n\nyeah yeah, that's ... {jubilant, boneyard, livest, beefs, gleam, swo... Nasty Immigrants *
8 1982 (Statik Selektah & Termanology) f/ Cassid... Lyrics/ohhla.com/anonymous/1982/1982/goinback.... (i'm goin back, back, back)\n\n[verse 1 - cass... {backer, iliad, porky, caskets} Goin Back
9 1982 (Statik Selektah & Termanology) f/ Lil' F... Lyrics/ohhla.com/anonymous/1982/rm_bside/thuga... [intro]\nbrrrrrrrrrrrrrrrrr\nshow off, show of... {hugger, mobsters} Thugathon
10 1.4.0. Productions f/ Chapel Lyrics/ohhla.com/anonymous/1pt_four/po_poets/f... [intro: chapel]\nlet me bless this shit, i'mma... {fiends, deadliest, cocking} Freestyle
11 1.4.0. Productions f/ Franky Botts, Molly-Q Lyrics/ohhla.com/anonymous/1pt_four/po_poets/g... [molly-q]\ndart double click, through your car... {detonate, godfathers, plutons} Godfathers
12 1.4.0. Productions f/ Cheesey Rat, Crunch Lo, ... Lyrics/ohhla.com/anonymous/1pt_four/po_poets/g... "only the gods could watch the earth twist" ->... {willies, caramels, nobly, wheezy, realest} God Twist
13 1.4.0. Productions f/ Franky Botts, Othorized ... Lyrics/ohhla.com/anonymous/1pt_four/po_poets/s... [intro: lounge lo]\nya'll know the work, aiyo\... {lounging, escapade, stutter, stubbed, perfect... Salute
14 1.4.0. Productions f/ Crunch Lo Lyrics/ohhla.com/anonymous/1pt_four/po_poets/s... [hook: crunch lo]\nis it the thug? deep in my ... {embezzle, bugged, reefer, corns} So What Now?
15 1.4.0. Productions f/ Othorized F.A.M. Lyrics/ohhla.com/anonymous/1pt_four/po_poets/w... [shawn wigs]\ndon't believe the hype, we hold ... {ratchets} We Always Gonna Do It
16 1.4.0. Productions f/ Crunch Lo, Samantha Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/e... [chorus: samantha (crunch lo)]\ni will give yo... {timberlands, prowl, beaters, extort, jakes, s... Everything U Need
17 1.4.0. Productions f/ Molly-Q Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/f... [molly-q]\nballistic, linguistic, slang-tistic... {twats, tippy} Freestyle
18 1.4.0. Productions f/ Shawn Wigs Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/k... [intro: shawn wigs]\nson, it was the illest fu... {stashed, illest, spasm} Knee Deep in Some Ass
19 1.4.0. Productions f/ Franky Botts, Lighter Sh... Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/l... [chorus 2x: lighter shade]\nlet it bang, bang ... {pillage, wiseguys, televise, manholes, shogun... Let it Bang
20 1.4.0. Productions f/ Shawn Wigs, Toilet Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/l... [chorus: toilet (shawn wigs)]\ni got a mansion... {smalls, shoeshine, maxed} Life's Been Good to Me

Finding Rare Rap Words


In [17]:
from collections import Counter
rap_onegrams = Counter()

for lyrics in df_data["lyrics"]:
    rap_onegrams.update(lyrics.split())

In [18]:
rap_onegrams_df = pd.DataFrame.from_dict(rap_onegrams, orient='index').reset_index()
rap_onegrams_df.columns = ["word","count"]

In [19]:
rap_onegrams_df.sort_values(by="count").tail(n=20)


Out[19]:
word count
192727 is 112697
36746 get 112745
39709 we 129374
360668 your 131125
155504 with 137393
110645 of 141163
6432 like 151442
146271 that 163367
138161 on 174972
200839 me 175888
274697 i'm 189356
122193 it 208014
118228 in 226361
173041 my 254126
87779 to 341578
223350 and 366264
20481 a 374886
171794 you 423922
9205 i 492419
22813 the 742326

In [20]:
raptwl_df = pd.merge(twl_wordlist, rap_onegrams_df, on="word")
raptwl_df.head()

all_bigrapwords = set(raptwl_df[raptwl_df["count"] < 5]["word"].values)
len(all_bigrapwords)


Out[20]:
19515

In [21]:
df_data["rap_matches"] = []
for lyrics in df_data["lyrics"]:
    found_words = set(lyrics.split()) & all_bigrapwords
    df_data["rap_matches"].append(found_words)

In [22]:
rap_data = pd.DataFrame(df_data)
rap_data = rap_data[rap_data.rap_matches != set()]
rap_data


Out[22]:
artist filename lyrics matches rap_matches song
2 113 Lyrics/ohhla.com/anonymous/113/dans_lur/ausumm... * send corrections to the typist\n\n[refrain]\... {favelas, typist, prise, pavillon, spliff, dom... {cartes, favelas, prise, pavillon, haute, domi... Au Summum
3 1200 Techniques Lyrics/ohhla.com/anonymous/1200tech/c_theory/e... * send corrections to the typist\n\n*scratchin... {typist, showdowns, shipwrecked, emcees, sicke... {shipwrecked} Eye of the Storm
4 1200 Techniques Lyrics/ohhla.com/anonymous/1200tech/c_theory/w... [dj peril]\nnfamous - step, step, step, step, ... {melanin, renegades, booed, befriending, albin... {albinos, befriending, constructing, swooned} Where Ur At?
5 1200 Techniques Lyrics/ohhla.com/anonymous/1200tech/infinite/k... * send corrections to the typist\n\n[prolouge]... {typist, showdowns, hopscotch, quitters, fickl... {dodgy, umbilicals} Karma
6 12 O'Clock w/ Raekwon the Chef Lyrics/ohhla.com/anonymous/12oclock/rm_bside/n... intro: raekwon the chef \n\nyeah yeah, that's ... {jubilant, boneyard, livest, beefs, gleam, swo... {jubilant, sweetened, boneyard, connives} Nasty Immigrants *
8 1982 (Statik Selektah & Termanology) f/ Cassid... Lyrics/ohhla.com/anonymous/1982/1982/goinback.... (i'm goin back, back, back)\n\n[verse 1 - cass... {backer, iliad, porky, caskets} {iliad} Goin Back
9 1982 (Statik Selektah & Termanology) f/ Lil' F... Lyrics/ohhla.com/anonymous/1982/rm_bside/thuga... [intro]\nbrrrrrrrrrrrrrrrrr\nshow off, show of... {hugger, mobsters} {hugger} Thugathon
10 1.4.0. Productions f/ Chapel Lyrics/ohhla.com/anonymous/1pt_four/po_poets/f... [intro: chapel]\nlet me bless this shit, i'mma... {fiends, deadliest, cocking} {cloves} Freestyle
11 1.4.0. Productions f/ Franky Botts, Molly-Q Lyrics/ohhla.com/anonymous/1pt_four/po_poets/g... [molly-q]\ndart double click, through your car... {detonate, godfathers, plutons} {plutons, sirloins} Godfathers
12 1.4.0. Productions f/ Cheesey Rat, Crunch Lo, ... Lyrics/ohhla.com/anonymous/1pt_four/po_poets/g... "only the gods could watch the earth twist" ->... {willies, caramels, nobly, wheezy, realest} {caramels, nobly} God Twist
13 1.4.0. Productions f/ Franky Botts, Othorized ... Lyrics/ohhla.com/anonymous/1pt_four/po_poets/s... [intro: lounge lo]\nya'll know the work, aiyo\... {lounging, escapade, stutter, stubbed, perfect... {goombah, raviolis, bulked} Salute
14 1.4.0. Productions f/ Crunch Lo Lyrics/ohhla.com/anonymous/1pt_four/po_poets/s... [hook: crunch lo]\nis it the thug? deep in my ... {embezzle, bugged, reefer, corns} {ameba, dismissing, toughly} So What Now?
15 1.4.0. Productions f/ Othorized F.A.M. Lyrics/ohhla.com/anonymous/1pt_four/po_poets/w... [shawn wigs]\ndon't believe the hype, we hold ... {ratchets} {breasted} We Always Gonna Do It
19 1.4.0. Productions f/ Franky Botts, Lighter Sh... Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/l... [chorus 2x: lighter shade]\nlet it bang, bang ... {pillage, wiseguys, televise, manholes, shogun... {croquets, manholes, gondola} Let it Bang
21 1.4.0. Productions f/ Chapel, Wise Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/l... [chorus 2x: wise]\nyo, we unfold, leaving you ... {plucking, proclamations, sware, snitch} {proclamations} Lock N Load
22 1.4.0. Productions f/ Clocka, Lighter Shade, O... Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/n... [intro: molly-q]\nput your hands against the w... {bandanas, menudo, stashed, heavyweights} {barnyards} New York
23 1.4.0. Productions f/ Crunch Lo, Shawn Wigs Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/o... [chorus: shawn wigs (crunch lo)]\nwho am i? (w... {corns, bargained, pillage, antonym, offspring... {offsprings} One on One
24 1.4.0. Productions f/ Cheesey Rat, Crunch Lo, ... Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/r... [cheesey rat]\nwhat time is it, i checked the ... {blockhead, bruise, sneeze, bugged, clogging, ... {administrations, reinventing, wised, particip... Revelations
25 1.4.0. Productions f/ Othorized F.A.M. Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/s... [lounge lo]\ni'm in the jungle walking barefoo... {lounger, scuffed, automatics, splurge, lucked... {lounger, lulu, epilogues} Staten Islanders
26 1.4.0. Productions f/ Crunch Lo, Lighter Shade Lyrics/ohhla.com/anonymous/1pt_four/skoolyrd/w... [lighter shade]\nya'll want to battle, so i'mm... {scottie, jezebel, pillage, brainchild, chins,... {blitzing, spindle, equivalence} We in the Realm
29 1.4.0. Productions f/ Chapel, Livewire, Wise Lyrics/ohhla.com/anonymous/1pt_four/stand_up/d... [chorus: wise]\nthe block is hot, hot, son, be... {copped, gassed, polluters, fittest} {polluters} Da Block is Hot
30 1.4.0. Productions f/ Shawn Wigs Lyrics/ohhla.com/anonymous/1pt_four/stand_up/d... "drop!"\n\n[shawn wigs]\nyou should be thankfu... {coked, bounties, slumped, snotty} {regs, bounties, socialist} Drop!
33 1.4.0. Productions f/ Molly-Q, Shawn Wigs Lyrics/ohhla.com/anonymous/1pt_four/stand_up/p... [shawn wigs]\nwe've been on the map, since gho... {expels, snorting, hollows, prowl} {expels} Poisonous Poets
34 1.4.0. Productions f/ Othorized F.A.M. Lyrics/ohhla.com/anonymous/1pt_four/stand_up/s... [intro: lounge lo]\nthat's right.. i see you\n... {dipper, muscled, clapped, quitter, reefer, th... {breachers, muscled, thoroughness} Shot Me Down
35 1.4.0. Productions f/ King Just Lyrics/ohhla.com/anonymous/1pt_four/stand_up/t... [hook: king just singing]\nhey yo, you got mad... {interferer, poltergeists, vindicate, windpipe} {interferer, vindicate} This is Hip-Hop
36 1.4.0. Productions f/ Crunch Lo Lyrics/ohhla.com/anonymous/1pt_four/stand_up/t... [intro: crunch lo]\nit ain't nothing, son, wor... {corns, foreheads, goons, yapping, waltzed} {waltzed} Trouble to MC's
40 1st Infantry f/ Prodigy Lyrics/ohhla.com/anonymous/1st_infa/rm_bside/s... [prodigy]\ngangsta, yeah nigga you know (you k... {roaches, reefer, sovereignly} {sovereignly} Serious (The New Message)
46 213 Lyrics/ohhla.com/anonymous/213/hard_way/joysti... [nate dogg]\nshe said she wants to ride my joy... {} {joysticks} Joystick
50 213 Lyrics/ohhla.com/anonymous/213/hard_way/maryja... [chorus - nate dogg]\none for the treble, two ... {sipped, hooka, swisher} {respectful} Mary Jane
57 2 Chainz (Tity Boi) Lyrics/ohhla.com/anonymous/2_chainz/2residue/b... [chorus: x2]\ni just call her boo\ni don't kno... {layin, daycares, reefer, chirp} {mien, daycares} Boo
... ... ... ... ... ... ...
33294 Young Scooter f/ Bun B Lyrics/ohhla.com/anonymous/yscooter/lottery/st... [chorus]\ngrowing up as a young nigga\nyoung n... {noter, snitch, franklins} {noter} Street Lottery
33299 Young Scooter f/ Alley Boy, Gucci Mane Lyrics/ohhla.com/anonymous/yscooter/voice_of/f... [chorus]\nfaster\nhe did five, he got chill mo... {rapping} {bulled} Faster
33301 Yukmouth f/ Tech N9ne Lyrics/ohhla.com/anonymous/yukmouth/thugged/bu... [tech n9ne] (yukmouth)\nregime killas, ah\nwha... {geeked, strangulated, loony, woofers, fiends,... {clunk, strangulated} Bumbell
33302 Yukmouth f/ Rap-A-Lot Family Lyrics/ohhla.com/anonymous/yukmouth/thugged/RA... [yukmouth]\ni live the life of a hoodlum \ntak... {wiseguys, mobbed, riffles, caskets, layin, hu... {blackbirds, welly, spore, riffles, whooshes} RAL Mafia
33303 Yukmouth f/ N.W.A, Tech N9ne Lyrics/ohhla.com/anonymous/yukmouth/thugged/st... [dr. dre] \nthe motherfucking saga continues..... {guzzled, bauble, motherfucking, straddle, mot... {bauble, manipulative, caver} Stallion
33304 Yukmouth f/ Madd Maxx, Phats Bossalini, Poppa ... Lyrics/ohhla.com/anonymous/yukmouth/thugged/th... yo! let's do this!\nuh!\nuh-oh, uh-oh!\nha!\nu... {tatted, caulked, blindfold, timberlands, spit... {callas, clocker, caulked} Thugged Out
33306 Yukmouth f/ Phats Bossilini, Mad Max, Tech N9n... Lyrics/ohhla.com/anonymous/yukmouth/thuglord/r... [ chorus ]\ni got some killers on the payroll,... {blowed, tatted, crackhead, sowed, splatter, p... {patrolled} Regime Killers 2001
33307 Yukmouth f/ Kokane, Kurupt, Nate Dogg Lyrics/ohhla.com/anonymous/yukmouth/thuglord/s... [nate dogg]\neverytime i smoke\ni smoke indo s... {snitch, motherfuckers, subtractions, homies, ... {subtractions} So Ignorant
33308 Yukmouth f/ Gonzoe, Kris Kaliko, Tech N9ne Lyrics/ohhla.com/anonymous/yukmouth/unitedg2/k... [intro: gonzoe]\nyes yes, whuttup mayn\nit's t... {sawed, scrappers, gunshots, ambushed, cocked,... {scrappers} Kill 'Em Off
33310 Yung Joc f/ Rick Ross, Snoop Dogg Lyrics/ohhla.com/anonymous/yung_joc/hustlen/br... [snoop dogg] + (yung joc)\nyung joc (sup?)\nwh... {sideshows, bezels, sneaks} {sideshows} Brand New
33312 Yung Joc f/ The Game, Jim Jones Lyrics/ohhla.com/anonymous/yung_joc/hustlen/c_... [the game] (yung joc)\nhey joc (what up nigga?... {pumper, chirp, asthmatics, fiends, moguls, in... {asthmatics} Cut Throat
33313 Yung Joc f/ Jazze Pha, Trick Daddy Lyrics/ohhla.com/anonymous/yung_joc/hustlen/ch... {*screw voice: "hustlenomic$!"*}\n\n[intro: tr... {fishbowl, antifreeze, bougie, eights, minks, ... {fishbowl} Chevy Smile
33315 Yung Joc f/ Mike Carlito, Gorilla Zoe Lyrics/ohhla.com/anonymous/yung_joc/hustlen/ge... [intro/chorus - samples]\ni be getting to da m... {medulla, snitches, takin, earlobe} {derringers} Getting to Da Money
33319 Yung Joc f/ Southerngirl Lyrics/ohhla.com/anonymous/yung_joc/hustlen/lv... [chorus: southerngirl] + (yung joc)\ni'm just,... {broads, baller, takin} {bistro} Living the Life
33328 Yung Joc Lyrics/ohhla.com/anonymous/yung_joc/newjcity/g... * first single; send corrections to the typist... {typist, duces, playmaker, nitty, coups, mothe... {playmaker} It's Goin' Down
33329 Yung Joc Lyrics/ohhla.com/anonymous/yung_joc/newjcity/h... [chorus: repeat 2x]\nall the hoes lose they mi... {fiends, flinch, swang, headland, homies, reefer} {headland} Hear Me Coming
33340 Yung La f/ T.I., Young Dro Lyrics/ohhla.com/anonymous/yung_la/rm_bside/ai... [verse 1 - yung la]\nyou got to know your boy ... {commandeer, nauseous} {commandeer} Ain't I (Remix)
33341 Yung Ro f/ Chamillionaire, D-Black, Chamillion... Lyrics/ohhla.com/anonymous/yung_ro/nob_land/ya... * send corrections to the typist\n\n[mixing by... {typist, scuff, sicker, hotshot, blacked, inte... {hotshot} Y'all Don't Want Us to Come!
33346 Yung Berg f/ Jim Jones, Junior, Rich Boy Lyrics/ohhla.com/anonymous/yungberg/a_famous/s... [yung berg]\nyeah - so you know we had to do a... {adios, woolies, yellowed} {woolies, yellowed} Sexy Lady (Remix)
33349 Z-Ro f/ K-Rino Lyrics/ohhla.com/anonymous/z_ro/angeldst/today... (*talking*)\nuh oh, z-ro the crooked\nand my m... {chastises, forgave, hearse} {chastises, amoxicillin} Today
33351 Z-Ro f/ Bun B, P.O.P. Lyrics/ohhla.com/anonymous/z_ro/im_still/remem... (*talking*)\nhell yeah nigga, ay yo\n\n[z-ro]\... {sported, egghead, ridiculed, rapping} {egghead} Remember Me
33354 Z-Ro f/ Willie D Lyrics/ohhla.com/anonymous/z_ro/meth/onemotme.... (*talking*)\nso you getting married tomorrow h... {bowlegs, butterball, homeboy, dicking} {bowlegs} One Mo Time
33357 Z-Ro f/ Scarface Lyrics/ohhla.com/anonymous/z_ro/the_life/t_nig... [hook]\nthese niggaz, must of forgot\nthat i w... {denting, motherfucking, vaseline, snitch} {denting} These Niggaz
33362 Z-Ro f/ Daz, Thug Dirt Lyrics/ohhla.com/anonymous/z_ro/toleranc/time_... (*talking*)\nyeah, let me count that up right ... {barging, lounging, swerving, slanging, saggin... {trudging, uncock} Time and Time Again
33363 Zion I & The Grouch feat. Chali 2na Lyrics/ohhla.com/anonymous/zion_i/heroesin/too... [verse 1-chali 2na]\nyeah, some of us are caug... {devilish, noose, thumpers, scantily, recluse,... {quakers, scantily} Too Much
33364 Zion I f/ Talib Kweli Lyrics/ohhla.com/anonymous/zion_i/true_and/tem... [zion i]\nyeah, yeah, hey\nthe cityscape where... {slurring, pimped, emulating, confessing, popu... {cityscape, populating} Temperature
33366 Zo! f/ Eric Roberson, Phonte Lyrics/ohhla.com/anonymous/zo/justvis3/marzipa... [verse one: eric roberson]\nto know you is to ... {marzipan} {marzipan} Marzipan
33368 Zo! f/ Rapper Big Pooh, Darien Brockington, Er... Lyrics/ohhla.com/anonymous/zo/sunstorm/thscoul... {say brother, do you see all these foxys out h... {innocently, cocked} {innocently} This Could Be the Night
33369 Zoe Pound f/ Raekwon Lyrics/ohhla.com/anonymous/zoepound/bullethd/b... [intro: raekwon]\ndon't play, what, what, you ... {menses} {insiders, menses} Back to the Cages
33370 Zu Ninjaz f/ Drunken Dragon, Ol' Dirty Bastard Lyrics/ohhla.com/anonymous/zuninjaz/now_just/s... [sample - repeated in background]\nstanding.. ... {chumps, mathematic, shits, lyrically} {bloodstreams} Specially Trained Ninja

16461 rows × 6 columns


In [23]:
#rap_data[rap_data["artist"].str.startswith("Kanye")]
#rap_data[rap_data["artist"].str.startswith("Wu-Tang") & rap_data["filename"].str.contains("enter")]
rap_data[rap_data["artist"].str.startswith("Nas") & rap_data["filename"].str.contains("illmatic")]


Out[23]:
artist filename lyrics matches rap_matches song
23373 Nas w/ AZ, The Firm Lyrics/ohhla.com/anonymous/nas/illmatic/genesi... *sound of a subway train going overhead*\n*in ... {coupes, takin, infantryman, trifle} {infantryman} The Genesis
23375 Nas Lyrics/ohhla.com/anonymous/nas/illmatic/memory... \t(check that shit)\n\taight fuck that shit, w... {dingbats, reminisce, gassed, takin, trifle, s... {dingbats, physiology} Memory Lane (Sittin' in Da Park)
23376 Nas Lyrics/ohhla.com/anonymous/nas/illmatic/nysomi... [intro: nas]\nyeah yeah, aiyyo black it's time... {beepers, backtrack, snitch, gunfights, takin,... {peepholes} N.Y. State of Mind
23379 Nas Lyrics/ohhla.com/anonymous/nas/illmatic/rep.na... represent, represent!! (repeat 4x)\n\nstraight... {guzzle, dissed, dweller, hillbillies, blunts} {accelerator} Represent
23380 Nas w/ Pete Rock (uncredited chorus vocals) Lyrics/ohhla.com/anonymous/nas/illmatic/world_... "it's yours!" --> t la rock\n\nchorus: nas, pe... {amped, caved, phlegm, toothed} {toothed} The World is Yours

In [24]:
# Search and replace 23375 to 1173 in the lyrics subset on Github!
print(rap_data.loc[23375]["matches"])
print(rap_data.loc[23375]["rap_matches"])


{'dingbats', 'reminisce', 'gassed', 'takin', 'trifle', 'shoelaces', 'fiends', 'ganja', 'overdoses', 'vexed'}
{'dingbats', 'physiology'}

In [25]:
word = "trifle"

Finding 'Good' Songs


In [26]:
import spotipy
sp = spotipy.Spotify()
    
def test_track_search(sp, search_str):
    results = sp.search(q=search_str, type='track', limit=1)
    if len(results['tracks']['items']) > 0:
        print(results['tracks']['items'][0]['artists'][0]['name']," - ",
              results['tracks']['items'][0]['name'],
              results['tracks']['items'][0]['popularity'],)
    else:
        print("")

test_track_search(sp, 'Nas Memory Lane')


Nas  -  Memory Lane (Sittin' in da Park) 52

Finding 'Good' Songs with YouTube


In [27]:
# Activate Google Data API
DEVELOPER_KEY = ""

from apiclient.discovery import build
from datetime import datetime

def youtube_search(q):
  youtube = build("youtube", "v3", developerKey=DEVELOPER_KEY)

  search_response = youtube.search().list(
    q=q, type="video",
    part="id,snippet", maxResults=1
  ).execute()

  for search_result in  search_response.get("items", []):
      if search_result is not None:
          video_id = search_result["id"]["videoId"]
          date_posted = search_result["snippet"]["publishedAt"]
          results = youtube.videos().list(
                    part="statistics", id=video_id
                    ).execute()
  return (video_id,
          float(results["items"][0]["statistics"]["viewCount"]),
          float((datetime.now()-datetime.strptime(date_posted, "%Y-%m-%dT%H:%M:%S.000Z")).days))

In [28]:
youtube_data = youtube_search("Nas Memory Lane")
print(youtube_data)


('JXBFG2vsyCM', 5865358.0, 3402.0)

In [1]:
from IPython.display import HTML
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/JXBFG2vsyCM?rel=0&amp;controls=0&amp;showinfo=0" frameborder="0" allowfullscreen></iframe>')


Out[1]:

Getting Definitions


In [30]:
from wiktionaryparser import WiktionaryParser

def get_definition(word):
    parser = WiktionaryParser()
    worddef = parser.fetch(word)
    
    possible_defs = []
    for entymologies in worddef:
        for dd in entymologies["definitions"]:
            all_defs = re.sub(word+"\s*\u200e","",
                                  dd['text']).strip().split("\n")
            all_gdefs = [d for d in all_defs if re.match("^\(.*\)$",d) == None]
            possible_defs.append((dd['partOfSpeech'],
                                  all_gdefs))
    return possible_defs

In [31]:
get_definition("racket")


Out[31]:
[('noun',
  ['(countable) A racquet: an implement with a handle connected to a round frame strung with wire, sinew, or plastic cords, and used to hit a ball, such as in tennis or a birdie in badminton.',
   '(Canada) A snowshoe formed of cords stretched across a long and narrow frame of light wood.',
   'A broad wooden shoe or patten for a man or horse, to allow walking on marshy or soft ground.']),
 ('verb', ['To strike with, or as if with, a racket.']),
 ('noun',
  ['A loud noise.',
   'A fraud or swindle; an illegal scheme for profit.',
   '(dated, slang) A carouse; any reckless dissipation.',
   '(dated, slang) Something taking place considered as exciting, trying, unusual, etc. or as an ordeal.'])]

In [32]:
get_definition("trifle")


Out[32]:
[('noun',
  ['An English dessert made from a mixture of thick custard, fruit, sponge cake, jelly and whipped cream.',
   'An insignificant amount.',
   'Anything that is of little importance or worth.',
   'A particular kind of pewter.',
   '(uncountable) Utensils made from this particular kind of pewter.']),
 ('verb',
  ['(intransitive) To deal with something as if it were of little importance or worth.',
   '(intransitive) To act, speak, or otherwise behave with jest.',
   '(intransitive) To inconsequentially toy with something.',
   '(transitive) To squander or waste.'])]

Get Definitions from Context


In [33]:
lyrics = rap_data.loc[23375]["lyrics"]
rap_sentence = [line for line in lyrics.split("\n") if word in line][0]
print(rap_sentence)


word to christ, a disciple of streets, trifle on beats

In [34]:
import nltk
split_sentence = nltk.word_tokenize(rap_sentence)
tagged_sentence = nltk.pos_tag(split_sentence,tagset="universal")
print(tagged_sentence)


[('word', 'NOUN'), ('to', 'PRT'), ('christ', 'VERB'), (',', '.'), ('a', 'DET'), ('disciple', 'NOUN'), ('of', 'ADP'), ('streets', 'NOUN'), (',', '.'), ('trifle', 'NOUN'), ('on', 'ADP'), ('beats', 'NOUN')]

Universal Part of Speech Tags

  • VERB - verbs (all tenses and modes)
  • NOUN - nouns (common and proper)
  • PRON - pronouns
  • ADJ - adjectives
  • ADV - adverbs
  • ADP - adpositions (prepositions and postpositions)
  • CONJ - conjunctions
  • DET - determiners
  • NUM - cardinal numbers
  • PRT - particles or other function words
  • X - other: foreign words, typos, abbreviations

In [35]:
rap_sentence = "word to christ, a disciple of streets, I trifle on beats"
split_sentence = nltk.word_tokenize(rap_sentence)
tagged_sentence = nltk.pos_tag(split_sentence,tagset="universal")
print(tagged_sentence)


[('word', 'NOUN'), ('to', 'PRT'), ('christ', 'VERB'), (',', '.'), ('a', 'DET'), ('disciple', 'NOUN'), ('of', 'ADP'), ('streets', 'NOUN'), (',', '.'), ('I', 'PRON'), ('trifle', 'VERB'), ('on', 'ADP'), ('beats', 'NOUN')]

In [36]:
def get_definition_with_sentence(word, rap_sentence):
    split_sentence = nltk.word_tokenize(rap_sentence)
    tagged_sentence = nltk.pos_tag(split_sentence,
                                      tagset="universal")
    
    index_of_word = split_sentence.index(word)
    pos_of_word = tagged_sentence[index_of_word][1].lower()
    
    parser = WiktionaryParser()
    worddef = parser.fetch(word)

    possible_defs = []
    for entymologies in worddef:
        for dd in entymologies["definitions"]:
            part_of_speech = dd['partOfSpeech']
            all_defs = re.sub(word+"\s*\u200e","",
                                  dd['text']).strip().split("\n")
            all_gdefs = [d for d in all_defs if re.match("^\(.*\)$",d) == None]
            
            # Take the first definition that matches part of speech
            if part_of_speech == pos_of_word:
                return (dd['partOfSpeech'], all_gdefs)

    return ("N/A","N/A")

In [37]:
get_definition_with_sentence("trifle", rap_sentence)


Out[37]:
('verb',
 ['(intransitive) To deal with something as if it were of little importance or worth.',
  '(intransitive) To act, speak, or otherwise behave with jest.',
  '(intransitive) To inconsequentially toy with something.',
  '(transitive) To squander or waste.'])

In [38]:
def get_definition_with_lyrics(word, lyrics):
    rap_sentence = [line for line in lyrics.split("\n") if word in line][0]
    return get_definition_with_sentence(word, rap_sentence)

Extract Surrounding Lines


In [39]:
lyrics_split=["Twinkle, twinkle, little star",
"How I wonder what you are",
"Up above the world so high",
"Like a diamond in the sky"]

In [40]:
def get_rhymegroup(target_word, lyrics_split):
    group_index_of_target = -1
    
    for groupid, line in enumerate(lyrics_split):
        if target_word in line:
            group_index_of_target = groupid

    if group_index_of_target > 0 and group_index_of_target < len(lyrics_split)-1:
        return lyrics_split[group_index_of_target-1:group_index_of_target+2]
    else:
        return "N/A"

In [41]:
get_rhymegroup("high", lyrics_split)


Out[41]:
['How I wonder what you are',
 'Up above the world so high',
 'Like a diamond in the sky']

Extract Rhyming Couplet


In [42]:
import pronouncing

In [43]:
pronouncing.rhymes("star")[0:20]


Out[43]:
['adar',
 'afar',
 'ahr',
 'ajar',
 'algar',
 'all-star',
 'allar',
 'almanzar',
 'almodovar',
 'alvare',
 'amar',
 'andujar',
 'aquilar',
 'ar',
 'are',
 'avelar',
 'azar',
 'azhar',
 'baar',
 'babar']

In [44]:
print(pronouncing.phones_for_word("high"))
print(pronouncing.phones_for_word("sky"))


['HH AY1']
['S K AY1']

In [45]:
print(pronouncing.phones_for_word("orange"))
print(pronouncing.phones_for_word("hinge"))


['AO1 R AH0 N JH', 'AO1 R IH0 N JH']
['HH IH1 N JH']

In [46]:
def rhymes_per_line(lyrics_split):
    rhymes = []
    for line in lyrics_split:
        words = line.strip().split()
        last_word = words[-1].strip('.,?!;:')
        last_word_p = pronouncing.phones_for_word(last_word)
        if len(last_word_p) > 0:
            rhymes.append((pronouncing.rhyming_part(last_word_p[0]),line))
    return rhymes

In [47]:
rhymes_per_line(lyrics_split)


Out[47]:
[('AA1 R', 'Twinkle, twinkle, little star'),
 ('AA1 R', 'How I wonder what you are'),
 ('AY1', 'Up above the world so high'),
 ('AY1', 'Like a diamond in the sky')]


In [48]:
lyrics_split = [line for line in df_data["lyrics"][23375].split("\n") if len(line) > 0]
print(lyrics_split[10:-30])


['i rap for listeners, blunt heads, fly ladies and prisoners', 'henessey holders and old school niggaz, then i be dissin a', 'unofficial that smoke woolie thai', 'i dropped out of kooley high, gassed up by a cokehead cutie pie', "jungle survivor, fuck who's the liver", 'my man put the battery in my back, a differencem from energizer', 'sentence begins indented.. with formality', "my duration's infinite, moneywise or physiology", "poetry, that's a part of me, retardedly bop", 'i drop the ancient manifested hip-hop, straight off the block', 'i reminisce on park jams, my man was shot for his sheep coat', 'childhood lesson make me see him drop in my weed smoke', "it's real, grew up in trife life, did times or white lines", 'the hype vice, murderous nighttimes, and knife fights invite crimes', 'chill on the block with cog-nac, hold strap', "with my peeps that's into drug money, market into rap", 'no sign of the beast in the blue chrysler, i guess that means peace', 'for niggaz no sheisty vice to just snipe ya', 'start off the dice-rollin mats for craps to cee-lo ', 'with sidebets, i roll a deuce, nothin below (peace god!)', 'peace god -- now the shit is explained', "i'm takin niggaz on a trip straight through memory lane", "it's like that y'all .. it's like that y'all .. it's like that y'all", 'chorus: repeat scratches 4x', '"now let me take a trip down memory lane" -> bizmarkie', '\t"comin outta queensbridge"', '[nas]', 'one for the money', 'two for pussy and foreign cars', 'three for alize niggaz deceased or behind bars', 'i rap divine gods check the prognosis, is it real or showbiz?', 'my window faces shootouts, drug overdoses', 'live amongst no roses, only the drama, for real', 'a nickel-plate is my fate, my medicine is the ganja']

In [49]:
target_word = "trifle"

In [50]:
def get_rhymes(lyrics_split):
    lines_with_rhyming_parts = list()
    for line in lyrics_split:
        words = line.split()
        last_word = words[-1].strip('.,?!;:') # .strip() to remove any punctuation
        last_word_p = pronouncing.phones_for_word(last_word)
        
        if len(last_word_p) > 0:
            if len(last_word_p) > 1:
                last_word_p = [last_word_p[0],]
                
            for phones in last_word_p:
                rhyming_part = pronouncing.rhyming_part(phones)
                line_with_part = [rhyming_part[:2], line]
                #print(line_with_part)
                lines_with_rhyming_parts.append(line_with_part)
        else:
            line_with_part = ["N/A", line]
            lines_with_rhyming_parts.append(line_with_part)
            
    return lines_with_rhyming_parts

In [51]:
rhyme_lines = get_rhymes(lyrics_split)

In [52]:
rhyme_lines[40:60]


Out[52]:
[['OW', 'i rap divine gods check the prognosis, is it real or showbiz?'],
 ['OW', 'my window faces shootouts, drug overdoses'],
 ['IY', 'live amongst no roses, only the drama, for real'],
 ['N/A', 'a nickel-plate is my fate, my medicine is the ganja'],
 ['EY', "here's my basis, my razor embraces, many faces"],
 ['EY', 'your telephone blowin, black stitches or fat shoelaces'],
 ['OW', "peoples are petrol, dramatic automatic fo'-fo' i let blow"],
 ['OW', "and back down po-po when i'm vexed so"],
 ['AE', "my pen taps the paper then my brain's blank"],
 ['AE', 'i see dark streets, hustlin brothers who keep the same rank'],
 ['EY', 'pumpin for somethin, some uprise, plus some fail'],
 ['EY', 'judges hangin niggaz, uncorrect bails, for direct sales'],
 ['EY', 'my intellect prevails from a hangin cross with nails'],
 ['IY', "i reinforce the frail, with lyrics that's real"],
 ['IY', 'word to christ, a disciple of streets, trifle on beats'],
 ['IY', 'i decifer prophecies through a mic and say peace.'],
 ['N/A', 'i hung around the older crews while they sling smack to dingbats'],
 ['AE', "they spoke of fat cat, that nigga's name made bell rings, black"],
 ['IH', 'some fiends scream, about supreme team, a jamaica queens thing'],
 ['OW', 'uptown was alpo, son, heard he was kingpin, yo']]

In [53]:
import itertools
import operator

grouped_rhymes = []
for key,group in itertools.groupby(rhyme_lines, operator.itemgetter(0)):
    merged_group = [g[1] for g in group]
    grouped_rhymes.append(list(merged_group))

In [54]:
grouped_rhymes[15:35]


Out[54]:
[['chill on the block with cog-nac, hold strap',
  "with my peeps that's into drug money, market into rap"],
 ['no sign of the beast in the blue chrysler, i guess that means peace'],
 ['for niggaz no sheisty vice to just snipe ya'],
 ['start off the dice-rollin mats for craps to cee-lo ',
  'with sidebets, i roll a deuce, nothin below (peace god!)'],
 ['peace god -- now the shit is explained',
  "i'm takin niggaz on a trip straight through memory lane"],
 ["it's like that y'all .. it's like that y'all .. it's like that y'all"],
 ['chorus: repeat scratches 4x',
  '"now let me take a trip down memory lane" -> bizmarkie',
  '\t"comin outta queensbridge"',
  '[nas]'],
 ['one for the money'],
 ['two for pussy and foreign cars',
  'three for alize niggaz deceased or behind bars'],
 ['i rap divine gods check the prognosis, is it real or showbiz?',
  'my window faces shootouts, drug overdoses'],
 ['live amongst no roses, only the drama, for real'],
 ['a nickel-plate is my fate, my medicine is the ganja'],
 ["here's my basis, my razor embraces, many faces",
  'your telephone blowin, black stitches or fat shoelaces'],
 ["peoples are petrol, dramatic automatic fo'-fo' i let blow",
  "and back down po-po when i'm vexed so"],
 ["my pen taps the paper then my brain's blank",
  'i see dark streets, hustlin brothers who keep the same rank'],
 ['pumpin for somethin, some uprise, plus some fail',
  'judges hangin niggaz, uncorrect bails, for direct sales',
  'my intellect prevails from a hangin cross with nails'],
 ["i reinforce the frail, with lyrics that's real",
  'word to christ, a disciple of streets, trifle on beats',
  'i decifer prophecies through a mic and say peace.'],
 ['i hung around the older crews while they sling smack to dingbats'],
 ["they spoke of fat cat, that nigga's name made bell rings, black"],
 ['some fiends scream, about supreme team, a jamaica queens thing']]

In [55]:
def rhymegroup(target_word, grouped_rhymes):
    group_index_of_target = -1
    
    for groupid, rhymes in enumerate(grouped_rhymes):
        #print(groupid, rhymes)
        for line in rhymes:
            if target_word in line:
                group_index_of_target = groupid
    
    if group_index_of_target != -1:
        return grouped_rhymes[group_index_of_target]
    else:
        return "N/A"

In [56]:
rhymegroup("trifle", grouped_rhymes)


Out[56]:
["i reinforce the frail, with lyrics that's real",
 'word to christ, a disciple of streets, trifle on beats',
 'i decifer prophecies through a mic and say peace.']

In [57]:
def rhymegroup_from_word(word, lyrics):
    lyrics_split = [line for line in lyrics.split("\n") if len(line) > 0]
    grouped_rhymes = []
    for key,group in itertools.groupby(rhyme_lines, operator.itemgetter(0)):
        merged_group = [g[1] for g in group]
        grouped_rhymes.append(list(merged_group))
    return rhymegroup(word, grouped_rhymes)

Posting on Tumblr


In [58]:
# Get API key, https://www.tumblr.com/docs/en/api/v2 and do OATHv1
tumblr_client = ''
tumblr_secret = ''
access_key    = ''
access_secret = ''

import pytumblr

user = pytumblr.TumblrRestClient(
    tumblr_client, tumblr_secret,
    access_key, access_secret)

In [59]:
def post_template(word, part_of_speech, worddef, lyrics, artist, song, youtube=None):
    
    post = '''<p><a href="http://en.wiktionary.org/wiki/{}">{}</a> '''.format(word, word)
    post += '''- {} -\xa0 {}</p>'''.format(part_of_speech, worddef)

    post += "<p>"
    for line in lyrics:
        if word in line:
            post += line.replace(word,"<b>"+word+"</b>")+"<br />"
        else:
            post += line+"<br />"
    post += "</p>"
    
    if youtube is not None:
        post += '''<p>-{} on\xa0“'''.format(artist)
        post += '''<a href="https://www.youtube.com/watch?v={}">{}</a>”</p>'''.format(youtube,
                                                                                      song)
    else:
        post += '''<p>-{} on\xa0{}”</p>'''.format(artist,song)        
    
    return post

Puttin' it Together


In [60]:
print(rap_data.loc[23375])


artist                                                       Nas
filename       Lyrics/ohhla.com/anonymous/nas/illmatic/memory...
lyrics         \t(check that shit)\n\taight fuck that shit, w...
matches        {dingbats, reminisce, gassed, takin, trifle, s...
rap_matches                               {dingbats, physiology}
song                            Memory Lane (Sittin' in Da Park)
Name: 23375, dtype: object

In [61]:
word = "trifle"
artist = rap_data.loc[23375].artist
song = rap_data.loc[23375].song

part_of_speech, worddef = get_definition_with_lyrics(word,
                                                       rap_data.loc[23375].lyrics)
lyrics = rhymegroup_from_word(word, rap_data.loc[23375].lyrics)
youtube = youtube_search(artist + " " + song)[0]

slug = word+"-"+part_of_speech+"-"+artist

#print(word, part_of_speech, worddef[0], lyrics, artist, song, youtube)

In [62]:
post_body = post_template(word, part_of_speech, worddef[0], lyrics, artist, song, youtube)

In [63]:
user.create_text("rapwords",
                 format="html",
                 state="published",
                 slug=slug,
                 body=post_body,
                 tags=[part_of_speech],)


Out[63]:
{'id': 153129975615}

Peace