Sacar la lista de 250 Pelis



In [7]:

    
from imdb import IMDb
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()

ia = IMDb()
listaPelis = ia.get_top250_movies()
listaPelis









    Out[7]:





[<Movie id:0111161[http] title:_The Shawshank Redemption (1994)_>,
 <Movie id:0068646[http] title:_The Godfather (1972)_>,
 <Movie id:0071562[http] title:_The Godfather: Part II (1974)_>,
 <Movie id:0468569[http] title:_The Dark Knight (2008)_>,
 <Movie id:0050083[http] title:_12 Angry Men (1957)_>,
 <Movie id:0108052[http] title:_Schindler's List (1993)_>,
 <Movie id:0110912[http] title:_Pulp Fiction (1994)_>,
 <Movie id:0167260[http] title:_The Lord of the Rings: The Return of the King (2003)_>,
 <Movie id:0060196[http] title:_The Good, the Bad and the Ugly (1966)_>,
 <Movie id:0137523[http] title:_Fight Club (1999)_>,
 <Movie id:0120737[http] title:_The Lord of the Rings: The Fellowship of the Ring (2001)_>,
 <Movie id:0080684[http] title:_Star Wars: Episode V - The Empire Strikes Back (1980)_>,
 <Movie id:0109830[http] title:_Forrest Gump (1994)_>,
 <Movie id:1375666[http] title:_Inception (2010)_>,
 <Movie id:0167261[http] title:_The Lord of the Rings: The Two Towers (2002)_>,
 <Movie id:0073486[http] title:_One Flew Over the Cuckoo's Nest (1975)_>,
 <Movie id:0099685[http] title:_Goodfellas (1990)_>,
 <Movie id:0133093[http] title:_The Matrix (1999)_>,
 <Movie id:0047478[http] title:_Seven Samurai (1954)_>,
 <Movie id:0076759[http] title:_Star Wars: Episode IV - A New Hope (1977)_>,
 <Movie id:0317248[http] title:_City of God (2002)_>,
 <Movie id:0114369[http] title:_Se7en (1995)_>,
 <Movie id:0102926[http] title:_The Silence of the Lambs (1991)_>,
 <Movie id:0038650[http] title:_It's a Wonderful Life (1946)_>,
 <Movie id:0118799[http] title:_Life Is Beautiful (1997)_>,
 <Movie id:0114814[http] title:_The Usual Suspects (1995)_>,
 <Movie id:0110413[http] title:_Léon: The Professional (1994)_>,
 <Movie id:0245429[http] title:_Spirited Away (2001)_>,
 <Movie id:0120815[http] title:_Saving Private Ryan (1998)_>,
 <Movie id:0064116[http] title:_Once Upon a Time in the West (1968)_>,
 <Movie id:0120586[http] title:_American History X (1998)_>,
 <Movie id:0816692[http] title:_Interstellar (2014)_>,
 <Movie id:0034583[http] title:_Casablanca (1942)_>,
 <Movie id:0054215[http] title:_Psycho (1960)_>,
 <Movie id:0021749[http] title:_City Lights (1931)_>,
 <Movie id:0120689[http] title:_The Green Mile (1999)_>,
 <Movie id:1675434[http] title:_The Intouchables (2011)_>,
 <Movie id:0027977[http] title:_Modern Times (1936)_>,
 <Movie id:0082971[http] title:_Raiders of the Lost Ark (1981)_>,
 <Movie id:0047396[http] title:_Rear Window (1954)_>,
 <Movie id:0253474[http] title:_The Pianist (2002)_>,
 <Movie id:0407887[http] title:_The Departed (2006)_>,
 <Movie id:0103064[http] title:_Terminator 2: Judgment Day (1991)_>,
 <Movie id:0088763[http] title:_Back to the Future (1985)_>,
 <Movie id:2582802[http] title:_Whiplash (2014)_>,
 <Movie id:0172495[http] title:_Gladiator (2000)_>,
 <Movie id:0209144[http] title:_Memento (2000)_>,
 <Movie id:0482571[http] title:_The Prestige (2006)_>,
 <Movie id:0110357[http] title:_The Lion King (1994)_>,
 <Movie id:0078788[http] title:_Apocalypse Now (1979)_>,
 <Movie id:0078748[http] title:_Alien (1979)_>,
 <Movie id:0043014[http] title:_Sunset Boulevard (1950)_>,
 <Movie id:0057012[http] title:_Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)_>,
 <Movie id:0032553[http] title:_The Great Dictator (1940)_>,
 <Movie id:0095765[http] title:_Cinema Paradiso (1988)_>,
 <Movie id:0405094[http] title:_The Lives of Others (2006)_>,
 <Movie id:0095327[http] title:_Grave of the Fireflies (1988)_>,
 <Movie id:0050825[http] title:_Paths of Glory (1957)_>,
 <Movie id:1853728[http] title:_Django Unchained (2012)_>,
 <Movie id:0081505[http] title:_The Shining (1980)_>,
 <Movie id:0910970[http] title:_WALL·E (2008)_>,
 <Movie id:0169547[http] title:_American Beauty (1999)_>,
 <Movie id:1345836[http] title:_The Dark Knight Rises (2012)_>,
 <Movie id:0119698[http] title:_Princess Mononoke (1997)_>,
 <Movie id:0090605[http] title:_Aliens (1986)_>,
 <Movie id:0364569[http] title:_Old Boy (2003)_>,
 <Movie id:0087843[http] title:_Once Upon a Time in America (1984)_>,
 <Movie id:0051201[http] title:_Witness for the Prosecution (1957)_>,
 <Movie id:0082096[http] title:_Das Boot (1981)_>,
 <Movie id:0033467[http] title:_Citizen Kane (1941)_>,
 <Movie id:0053125[http] title:_North by Northwest (1959)_>,
 <Movie id:0052357[http] title:_Vertigo (1958)_>,
 <Movie id:0086190[http] title:_Star Wars: Episode VI - Return of the Jedi (1983)_>,
 <Movie id:0105236[http] title:_Reservoir Dogs (1992)_>,
 <Movie id:0112573[http] title:_Braveheart (1995)_>,
 <Movie id:0022100[http] title:_M (1931)_>,
 <Movie id:0180093[http] title:_Requiem for a Dream (2000)_>,
 <Movie id:5074352[http] title:_Dangal (2016)_>,
 <Movie id:0211915[http] title:_Amélie (2001)_>,
 <Movie id:0066921[http] title:_A Clockwork Orange (1971)_>,
 <Movie id:0986264[http] title:_Like Stars on Earth (2007)_>,
 <Movie id:0056172[http] title:_Lawrence of Arabia (1962)_>,
 <Movie id:0075314[http] title:_Taxi Driver (1976)_>,
 <Movie id:0036775[http] title:_Double Indemnity (1944)_>,
 <Movie id:0338013[http] title:_Eternal Sunshine of the Spotless Mind (2004)_>,
 <Movie id:0086879[http] title:_Amadeus (1984)_>,
 <Movie id:0056592[http] title:_To Kill a Mockingbird (1962)_>,
 <Movie id:0435761[http] title:_Toy Story 3 (2010)_>,
 <Movie id:0093058[http] title:_Full Metal Jacket (1987)_>,
 <Movie id:0062622[http] title:_2001: A Space Odyssey (1968)_>,
 <Movie id:0045152[http] title:_Singin' in the Rain (1952)_>,
 <Movie id:0070735[http] title:_The Sting (1973)_>,
 <Movie id:0114709[http] title:_Toy Story (1995)_>,
 <Movie id:0040522[http] title:_Bicycle Thieves (1948)_>,
 <Movie id:0012349[http] title:_The Kid (1921)_>,
 <Movie id:0361748[http] title:_Inglourious Basterds (2009)_>,
 <Movie id:0208092[http] title:_Snatch (2000)_>,
 <Movie id:1187043[http] title:_3 Idiots (2009)_>,
 <Movie id:5311514[http] title:_Your name (2016)_>,
 <Movie id:0071853[http] title:_Monty Python and the Holy Grail (1975)_>,
 <Movie id:0119488[http] title:_L.A. Confidential (1997)_>,
 <Movie id:0476735[http] title:_Mi padre y mi hijo (2005)_>,
 <Movie id:0059578[http] title:_For a Few Dollars More (1965)_>,
 <Movie id:0086250[http] title:_Scarface (1983)_>,
 <Movie id:2106476[http] title:_The Hunt (2012)_>,
 <Movie id:0119217[http] title:_Good Will Hunting (1997)_>,
 <Movie id:0053604[http] title:_The Apartment (1960)_>,
 <Movie id:0042876[http] title:_Rashomon (1950)_>,
 <Movie id:1832382[http] title:_A Separation (2011)_>,
 <Movie id:0017136[http] title:_Metrópolis (1927)_>,
 <Movie id:0097576[http] title:_Indiana Jones and the Last Crusade (1989)_>,
 <Movie id:0042192[http] title:_All About Eve (1950)_>,
 <Movie id:0055630[http] title:_Yojimbo (1961)_>,
 <Movie id:0372784[http] title:_Batman Begins (2005)_>,
 <Movie id:1049413[http] title:_Up (2009)_>,
 <Movie id:0053291[http] title:_Some Like It Hot (1959)_>,
 <Movie id:3315342[http] title:_Logan (2017)_>,
 <Movie id:0040897[http] title:_The Treasure of the Sierra Madre (1948)_>,
 <Movie id:0105695[http] title:_Unforgiven (1992)_>,
 <Movie id:0363163[http] title:_Downfall (2004)_>,
 <Movie id:0081398[http] title:_Raging Bull (1980)_>,
 <Movie id:0095016[http] title:_Die Hard (1988)_>,
 <Movie id:3783958[http] title:_La La Land (2016)_>,
 <Movie id:0041959[http] title:_The Third Man (1949)_>,
 <Movie id:0118849[http] title:_Children of Heaven (1997)_>,
 <Movie id:0113277[http] title:_Heat (1995)_>,
 <Movie id:0057115[http] title:_The Great Escape (1963)_>,
 <Movie id:0071315[http] title:_Chinatown (1974)_>,
 <Movie id:0044741[http] title:_Ikiru (1952)_>,
 <Movie id:0457430[http] title:_Pan's Labyrinth (2006)_>,
 <Movie id:0096283[http] title:_My Neighbor Totoro (1988)_>,
 <Movie id:2096673[http] title:_Inside Out (2015)_>,
 <Movie id:0089881[http] title:_Ran (1985)_>,
 <Movie id:0015864[http] title:_The Gold Rush (1925)_>,
 <Movie id:0047296[http] title:_On the Waterfront (1954)_>,
 <Movie id:1305806[http] title:_The Secret in Their Eyes (2009)_>,
 <Movie id:3170832[http] title:_La habitación (2015)_>,
 <Movie id:0050212[http] title:_The Bridge on the River Kwai (1957)_>,
 <Movie id:0083658[http] title:_Blade Runner (1982)_>,
 <Movie id:0347149[http] title:_Howl's Moving Castle (2004)_>,
 <Movie id:1255953[http] title:_Incendies (2010)_>,
 <Movie id:0055031[http] title:_Judgment at Nuremberg (1961)_>,
 <Movie id:0050976[http] title:_The Seventh Seal (1957)_>,
 <Movie id:0120735[http] title:_Lock, Stock and Two Smoking Barrels (1998)_>,
 <Movie id:0031679[http] title:_Mr. Smith Goes to Washington (1939)_>,
 <Movie id:0112641[http] title:_Casino (1995)_>,
 <Movie id:0060107[http] title:_Andrei Rublev (1966)_>,
 <Movie id:0268978[http] title:_A Beautiful Mind (2001)_>,
 <Movie id:0080678[http] title:_The Elephant Man (1980)_>,
 <Movie id:0050986[http] title:_Wild Strawberries (1957)_>,
 <Movie id:0434409[http] title:_V for Vendetta (2005)_>,
 <Movie id:0993846[http] title:_The Wolf of Wall Street (2013)_>,
 <Movie id:0017925[http] title:_The General (1926)_>,
 <Movie id:1291584[http] title:_Warrior (2011)_>,
 <Movie id:0116231[http] title:_The Bandit (1996)_>,
 <Movie id:0117951[http] title:_Trainspotting (1996)_>,
 <Movie id:0018455[http] title:_Sunrise (1927)_>,
 <Movie id:1205489[http] title:_Gran Torino (2008)_>,
 <Movie id:0046912[http] title:_Dial M for Murder (1954)_>,
 <Movie id:2119532[http] title:_Hasta el último hombre (2016)_>,
 <Movie id:0077416[http] title:_The Deer Hunter (1978)_>,
 <Movie id:0118715[http] title:_The Big Lebowski (1998)_>,
 <Movie id:0116282[http] title:_Fargo (1996)_>,
 <Movie id:0031381[http] title:_Gone with the Wind (1939)_>,
 <Movie id:0167404[http] title:_The Sixth Sense (1999)_>,
 <Movie id:0084787[http] title:_The Thing (1982)_>,
 <Movie id:0046438[http] title:_Tokyo Story (1953)_>,
 <Movie id:0266543[http] title:_Finding Nemo (2003)_>,
 <Movie id:0405508[http] title:_Rang De Basanti (2006)_>,
 <Movie id:0477348[http] title:_No Country for Old Men (2007)_>,
 <Movie id:0019254[http] title:_The Passion of Joan of Arc (1928)_>,
 <Movie id:1280558[http] title:_A Wednesday (2008)_>,
 <Movie id:0061512[http] title:_Cool Hand Luke (1967)_>,
 <Movie id:0032976[http] title:_Rebecca (1940)_>,
 <Movie id:0892769[http] title:_How to Train Your Dragon (2010)_>,
 <Movie id:0469494[http] title:_There Will Be Blood (2007)_>,
 <Movie id:0266697[http] title:_Kill Bill: Vol. 1 (2003)_>,
 <Movie id:0091251[http] title:_Come and See (1985)_>,
 <Movie id:0978762[http] title:_Mary and Max (2009)_>,
 <Movie id:0758758[http] title:_Into the Wild (2007)_>,
 <Movie id:2267998[http] title:_Gone Girl (2014)_>,
 <Movie id:0079470[http] title:_Life of Brian (1979)_>,
 <Movie id:0025316[http] title:_It Happened One Night (1934)_>,
 <Movie id:1130884[http] title:_Shutter Island (2010)_>,
 <Movie id:0091763[http] title:_Platoon (1986)_>,
 <Movie id:0395169[http] title:_Hotel Rwanda (2004)_>,
 <Movie id:1979320[http] title:_Rush (2013)_>,
 <Movie id:3011894[http] title:_Relatos salvajes (2014)_>,
 <Movie id:0074958[http] title:_Network (1976)_>,
 <Movie id:0046268[http] title:_The Wages of Fear (1953)_>,
 <Movie id:0107207[http] title:_En el nombre del padre (1993)_>,
 <Movie id:0092005[http] title:_Stand by Me (1986)_>,
 <Movie id:0053198[http] title:_The 400 Blows (1959)_>,
 <Movie id:1895587[http] title:_Spotlight (2015)_>,
 <Movie id:2278388[http] title:_The Grand Budapest Hotel (2014)_>,
 <Movie id:1392190[http] title:_Mad Max: Fury Road (2015)_>,
 <Movie id:2024544[http] title:_12 Years a Slave (2013)_>,
 <Movie id:0374887[http] title:_Munna Bhai M.B.B.S. (2003)_>,
 <Movie id:0052618[http] title:_Ben-Hur (1959)_>,
 <Movie id:0060827[http] title:_Persona (1966)_>,
 <Movie id:0064115[http] title:_Butch Cassidy and the Sundance Kid (1969)_>,
 <Movie id:0405159[http] title:_Million Dollar Baby (2004)_>,
 <Movie id:0245712[http] title:_Amores Perros (2000)_>,
 <Movie id:0107290[http] title:_Jurassic Park (1993)_>,
 <Movie id:0353969[http] title:_Memories of Murder (2003)_>,
 <Movie id:0033870[http] title:_The Maltese Falcon (1941)_>,
 <Movie id:0050783[http] title:_The Nights of Cabiria (1957)_>,
 <Movie id:0079944[http] title:_Stalker (1979)_>,
 <Movie id:0093779[http] title:_The Princess Bride (1987)_>,
 <Movie id:0120382[http] title:_The Truman Show (1998)_>,
 <Movie id:1028532[http] title:_Hachi: A Dog's Tale (2009)_>,
 <Movie id:0087544[http] title:_Nausicaä of the Valley of the Wind (1984)_>,
 <Movie id:0073707[http] title:_Sholay (1975)_>,
 <Movie id:2488496[http] title:_Star Wars: The Force Awakens (2015)_>,
 <Movie id:0112471[http] title:_Before Sunrise (1995)_>,
 <Movie id:0242519[http] title:_Hera Pheri (2000)_>,
 <Movie id:0032551[http] title:_The Grapes of Wrath (1940)_>,
 <Movie id:1201607[http] title:_Harry Potter and the Deathly Hallows: Part 2 (2011)_>,
 <Movie id:0075148[http] title:_Rocky (1976)_>,
 <Movie id:0052311[http] title:_Touch of Evil (1958)_>,
 <Movie id:1392214[http] title:_Prisoners (2013)_>,
 <Movie id:0083987[http] title:_Gandhi (1982)_>,
 <Movie id:0075686[http] title:_Annie Hall (1977)_>,
 <Movie id:0046911[http] title:_Diabolique (1955)_>,
 <Movie id:0246578[http] title:_Donnie Darko (2001)_>,
 <Movie id:0198781[http] title:_Monsters, Inc. (2001)_>,
 <Movie id:0264464[http] title:_Catch Me If You Can (2002)_>,
 <Movie id:0440963[http] title:_The Bourne Ultimatum (2007)_>,
 <Movie id:0088247[http] title:_The Terminator (1984)_>,
 <Movie id:0032138[http] title:_The Wizard of Oz (1939)_>,
 <Movie id:0056801[http] title:_8½ (1963)_>,
 <Movie id:0107048[http] title:_Groundhog Day (1993)_>,
 <Movie id:3896198[http] title:_Guardians of the Galaxy Vol. 2 (2017)_>,
 <Movie id:0072684[http] title:_Barry Lyndon (1975)_>,
 <Movie id:0113247[http] title:_La Haine (1995)_>,
 <Movie id:0114746[http] title:_Twelve Monkeys (1995)_>,
 <Movie id:0073195[http] title:_Jaws (1975)_>,
 <Movie id:0338564[http] title:_Infernal Affairs (2002)_>,
 <Movie id:0036868[http] title:_The Best Years of Our Lives (1946)_>,
 <Movie id:0109117[http] title:_Andaz Apna Apna (1994)_>,
 <Movie id:0072890[http] title:_Dog Day Afternoon (1975)_>,
 <Movie id:0058946[http] title:_The Battle of Algiers (1966)_>,
 <Movie id:1454029[http] title:_The Help (2011)_>,
 <Movie id:1954470[http] title:_Gangs of Wasseypur (2012)_>,
 <Movie id:0101414[http] title:_Beauty and the Beast (1991)_>,
 <Movie id:0056687[http] title:_What Ever Happened to Baby Jane? (1962)_>,
 <Movie id:0118694[http] title:_In the Mood for Love (2000)_>,
 <Movie id:0325980[http] title:_Pirates of the Caribbean: The Curse of the Black Pearl (2003)_>,
 <Movie id:2948356[http] title:_Zootopia (2016)_>,
 <Movie id:0169102[http] title:_Lagaan: Érase una vez en la India (2001)_>]

Sacar toda la info de una peli para poder meterla en un diccionario y usarla en ElasticSearch, indexandola (metodo todo en 1)

Tarda bastante en ejecutarse (5 a 15 min), mete 250 peliculas en elastic

quitado parametro de es.index (, id=i)

Coge el sumario de cada peli de la lista, y guarda la info en elasticSearch



In [8]:

    
for i in range(10,250):
    peli = listaPelis[i]
    peli2 = ia.get_movie(peli.movieID)
    string = peli2.summary()
    separado = string.split('\n')
    solucion = {}
    for i in range(2,len(separado)):
        sep2 = separado[i].split(':')
        #Forma de evitar que haya fallo al pasar el split a diccionario
        #Caso del fallo en los 2 cuadros de abajo
        sep2[1:len(sep2)] = [''.join(sep2[1:len(sep2)])]
        solucion.update(dict([sep2]))
    es.index(index='prueba-index', doc_type='text', body=solucion)



In [12]:

    
separado









    Out[12]:





[u'Movie',
 u'=====',
 u'Title: Lagaan: Once Upon a Time in India (2001)',
 u'Genres: Adventure, Drama, Musical, Romance, Sport.',
 u'Director: Ashutosh Gowariker.',
 u'Writer: Ashutosh Gowariker, Ashutosh Gowariker, Kumar Dave, Sanjay Dayma, K.P. Saxena.',
 u'Cast: Aamir Khan (Bhuvan), Gracy Singh (Gauri), Rachel Shelley (Elizabeth Russell), Paul Blackthorne (Captain Andrew Russell), Suhasini Mulay (Yashodamai).',
 u'Runtime: 224.',
 u'Country: India.',
 u'Language: Hindi, English, Awadhi, Urdu.',
 u'Rating: 8.2 (80051 votes).',
 u"Plot: This is the story about the resilience shown by the Indians when they were under the British Rule. They are already taxed to the bone by the British and their cronies, but when Jack Russell announces that he will double the Lagaan (tax) from all villagers, they decide to oppose it. Leading the villagers is a handsome young man named Bhuvan, who challenges them to a game of cricket, a game that is to be played by veteran British cricket players, versus villagers, including Bhuvan himself, who have never played this game before, and do not even know a bat from a piece of wood. As the challenge is accepted, the interest grows and attracts Indians from all over the region, as well as the British from all over the country - as everyone gathers to see the 'fair play' that the British will display against their counter-parts, who are aided by none other than the sister, Elizabeth, of Captain Rusell."]



In [9]:

    
sep2[1]









    Out[9]:





u" This is the story about the resilience shown by the Indians when they were under the British Rule. They are already taxed to the bone by the British and their cronies, but when Jack Russell announces that he will double the Lagaan (tax) from all villagers, they decide to oppose it. Leading the villagers is a handsome young man named Bhuvan, who challenges them to a game of cricket, a game that is to be played by veteran British cricket players, versus villagers, including Bhuvan himself, who have never played this game before, and do not even know a bat from a piece of wood. As the challenge is accepted, the interest grows and attracts Indians from all over the region, as well as the British from all over the country - as everyone gathers to see the 'fair play' that the British will display against their counter-parts, who are aided by none other than the sister, Elizabeth, of Captain Rusell."

Pruebas



In [21]:

    
import pandas as pd
lista=[]

for i in range(0400000,0400010,1):
    peli = ia.get_movie(i)
    lista.append(peli.summary())

    
datos = pd.DataFrame(lista)
print datos.values









    



[[ u'Movie\n=====\nTitle: Tonto Kid, The (1934)\nGenres: Action, Adventure, Crime, Drama, Romance, Western.\nDirector: Harry L. Fraser.\nWriter: Christopher Booth, Harry L. Fraser.\nCast: Rex Bell (Skeets Slawson aka The Tonto Kid), Ruth Mix (Nancy Cahill), Buzz Barton (Wesley Fritch), Theodore Lorch (Lawyer Sam Creech), Joseph W. Girard (Rance Cartwright).\nRuntime: 61.\nCountry: USA.\nLanguage: English.\nRating: 5.8 (24 votes).\nPlot: Lawyer Creech is after the ranch of the dying Cartwright. First he brings in Cahill to pose as the only living relative. Then when the Tonto Kid finds platinum on the ranch, Creech frames him for murder.']
 [ u'Movie\n=====\nTitle: Torres Vedras (1933)\nGenres: Documentary.\nCountry: Portugal.\n']
 [ u'Movie\n=====\nTitle: Torres Vedras e o Carnaval de 1933 (1933)\nGenres: Documentary.\nCountry: Portugal.\n']
 [ u'Movie\n=====\nTitle: Tractores Citroen no Ex\xe9rcito Portugu\xeas (1933)\nGenres: Documentary.\nDirector: Mota da Costa.\nCountry: Portugal.\nLanguage: Portuguese.\n']
 [ u'Movie\n=====\nTitle: Hapax Legomena IV: Travelling Matte (1971)\nDirector: Hollis Frampton.\nCountry: USA.\nRating: 6.7 (18 votes).\n']
 [ u'Movie\n=====\nTitle: tribunal de las Aguas, El (1960)\nGenres: Documentary, Short.\nDirector: Alberto Carles Blat.\nWriter: Alberto Carles Blat.\nCast: Juan Mart\xedn Navas.\nRuntime: 10.\nCountry: Spain.\n']
 [ u'Movie\n=====\nTitle: True Story of Eskimo Nell, The (1975)\nGenres: Comedy, Western.\nDirector: Richard Franklin.\nWriter: Richard Franklin, Alan Hopgood.\nCast: Max Gillies (Deadeye Dick), Serge Lazareff (Mexico Pete), Butcher Vachon (The Alaskan Kid), Jerry Thomas (The Sprunker), Kurt Beimel (Waldo the Great).\nRuntime: 103.\nCountry: Australia.\nLanguage: English.\nRating: 4.9 (58 votes).\n']
 [ u'Movie\n=====\nTitle: Two Idiots in Hollywood (1988)\nGenres: Musical, Comedy.\nDirector: Stephen Tobolowsky.\nWriter: Stephen Tobolowsky, Stephen Tobolowsky.\nCast: Jim McGrath (Murphy Wegg), Jeff Doucette (Taylor Dup), Cheryl Anderson (Marianne Plambo), Kat Sawyer, Lisa Robins (NBA Casting Secretary).\nRuntime: 85.\nCountry: USA.\nLanguage: English.\nRating: 6.6 (92 votes).\nPlot: Idiots Taylor Dupp and Murphy Wegg flee their humdrum existence in Dayton, Ohio for the glamour of Hollywood. Murphy turns his complete lack of talent into a career as a television producer ("The Pac-Man Show"), while Taylor is unjustly accused of murder.']]



In [22]:

    
import pandas as pd
lista=[]
datos = pd.DataFrame([])
for i in range(0005000,0005003):
        lista.append(ia.get_movie(i))
        lista.append(ia.get_movie_plot(i))

        datos = datos.append(lista)

print datos.values









    



[[<Movie id:0002560[http] title:_The Vicissitudes of a Top Hat (1912)_>]
 [ {'titlesRefs': {}, 'data': {'plot': [u'A man throws away an old top hat and a tramp uses it to sole his boots.']}, 'charactersRefs': {}, 'namesRefs': {}}]
 [<Movie id:0002560[http] title:_The Vicissitudes of a Top Hat (1912)_>]
 [ {'titlesRefs': {}, 'data': {'plot': [u'A man throws away an old top hat and a tramp uses it to sole his boots.']}, 'charactersRefs': {}, 'namesRefs': {}}]
 [<Movie id:0002561[http] title:_Victim of Circumstances (1912)_>]
 [ {'titlesRefs': {}, 'data': {'plot': [u'Richard Worthington on the way to work sees a thug knock a man down and apparently rob him. Rushing to the aid of the victim he is arrested for the crime. He is tried before a jury, found guilty, and sentenced to the convict camp. While working in the turpentine woods, under charge of a keeper, he is seen by Meg of the Everglades, who shows sympathy for the poor convict. The following Sunday morning word is passed around between the convicts that an attempt will be made to escape. At the opportune time one of the keepers at the gate is assaulted and fifteen of the convicts, Worthington among the lot, escape to the Everglades. Worthington is successful in eluding the bloodhounds and reaches a lonely hut in the Everglades, which proves to be the home of Meg and her father, where he successfully hides for two weeks, resulting in a strong friendship springing up between the convict and Meg. Becoming careless, Worthington goes outside of the hut to enjoy a smoke. He is seen by some of the guards from the convict camp and is arrested. A few weeks later Red Lopers, for whose crime Worthington is arrested, confesses. Worthington is immediately discharged and exonerated from all blame. Although his good name is restored and he is back with his mother, Worthington finds something is missing. His happiness is not complete, and he soon realizes that unconsciously he has fallen in love with the girl of the Everglades, who showed such practical sympathy for the poor unfortunate. Leaving his home, he goes to the little cabin in the woods, where he tells Meg of his love, and they agree to start a new life together.']}, 'charactersRefs': {}, 'namesRefs': {}}]
 [<Movie id:0002560[http] title:_The Vicissitudes of a Top Hat (1912)_>]
 [ {'titlesRefs': {}, 'data': {'plot': [u'A man throws away an old top hat and a tramp uses it to sole his boots.']}, 'charactersRefs': {}, 'namesRefs': {}}]
 [<Movie id:0002561[http] title:_Victim of Circumstances (1912)_>]
 [ {'titlesRefs': {}, 'data': {'plot': [u'Richard Worthington on the way to work sees a thug knock a man down and apparently rob him. Rushing to the aid of the victim he is arrested for the crime. He is tried before a jury, found guilty, and sentenced to the convict camp. While working in the turpentine woods, under charge of a keeper, he is seen by Meg of the Everglades, who shows sympathy for the poor convict. The following Sunday morning word is passed around between the convicts that an attempt will be made to escape. At the opportune time one of the keepers at the gate is assaulted and fifteen of the convicts, Worthington among the lot, escape to the Everglades. Worthington is successful in eluding the bloodhounds and reaches a lonely hut in the Everglades, which proves to be the home of Meg and her father, where he successfully hides for two weeks, resulting in a strong friendship springing up between the convict and Meg. Becoming careless, Worthington goes outside of the hut to enjoy a smoke. He is seen by some of the guards from the convict camp and is arrested. A few weeks later Red Lopers, for whose crime Worthington is arrested, confesses. Worthington is immediately discharged and exonerated from all blame. Although his good name is restored and he is back with his mother, Worthington finds something is missing. His happiness is not complete, and he soon realizes that unconsciously he has fallen in love with the girl of the Everglades, who showed such practical sympathy for the poor unfortunate. Leaving his home, he goes to the little cabin in the woods, where he tells Meg of his love, and they agree to start a new life together.']}, 'charactersRefs': {}, 'namesRefs': {}}]
 [<Movie id:0002562[http] title:_The Victoria Cross (1912)_>]
 [ {'titlesRefs': {}, 'data': {'plot': [u'Just previous to the Charge of the Light Brigade, made famous by Tennyson, in the fall of 1854, young Lieutenant Cholmondeley, of the English Army, asks Colonel Carson for the hand of his daughter Ellen. The Colonel replies: "When you have won your spurs, I will give my consent." Russia declares war against England and France and the Light Brigade is ordered to the front. After the departure of the Lieutenant and her father, Ellen decides to become a nurse under Florence Nightingale. She offers her services and though somewhat young, is accepted by Miss Nightingale. At her father\'s encampment he recognizes Ellen as the nurses pass before him in review. At first he is displeased, but upon second thought is justly proud of her. She is first upon the battlefield to aid and comfort the wounded and it is there that her sweetheart, the Lieutenant, learns of her presence at the seat of action. During the charge of the Six Hundred, Lieutenant Cholmondeley saves the life of his Colonel, defending him against the combined attacks of three Cossacks, lifting one bodily above his head and casting him maimed and helpless to the ground. Ellen watches through her father\'s field-glasses with palpitating heart the deeds of her sweetheart and the progress of the battle. After the war what is left of the few survivors of the Light Brigade are mustered before Queen Victoria and the young Lieutenant receives the Victoria Cross as a special mark of distinction for services rendered. The Colonel gives him his daughter Ellen, saying, "He has fairly won her and his spurs."', u'Ellen Carson volunteers to serve with Florence Nightingale in the Crimean war and witnesses the charge of the Light Brigade.']}, 'charactersRefs': {}, 'namesRefs': {}}]]

Elastic Seach (cabezera de ejemplo)



In [23]:

    
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()
'''
doc = {
    'prueba': 'Holi',
    'text': 'A man throws away an old top hat and a tramp uses it to sole his boots.',
}
res = es.index(index="movies-index", doc_type='text', id=1, body=doc)
print(res['created'])
'''

res = es.get(index="movies-index", doc_type='text', id=6)
print(res['_source'])

es.indices.refresh(index="movies-index")

res = es.search(index="movies-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
    print("%(text)s" % hit["_source"])









    



---------------------------------------------------------------------------
NotFoundError                             Traceback (most recent call last)
<ipython-input-23-b426fb989a9c> in <module>()
     11 '''
     12 
---> 13 res = es.get(index="movies-index", doc_type='text', id=6)
     14 print(res['_source'])
     15 

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\client\utils.pyc in _wrapped(*args, **kwargs)
     71                 if p in kwargs:
     72                     params[p] = kwargs.pop(p)
---> 73             return func(*args, params=params, **kwargs)
     74         return _wrapped
     75     return _wrapper

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\client\__init__.pyc in get(self, index, id, doc_type, params)
    407                 raise ValueError("Empty value passed for a required argument.")
    408         return self.transport.perform_request('GET', _make_path(index,
--> 409             doc_type, id), params=params)
    410 
    411     @query_params('_source', '_source_exclude', '_source_include', 'parent',

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\transport.pyc in perform_request(self, method, url, params, body)
    310 
    311             try:
--> 312                 status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
    313 
    314             except TransportError as e:

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\connection\http_urllib3.pyc in perform_request(self, method, url, params, body, timeout, ignore)
    126         if not (200 <= response.status < 300) and response.status not in ignore:
    127             self.log_request_fail(method, full_url, url, body, duration, response.status, raw_data)
--> 128             self._raise_error(response.status, raw_data)
    129 
    130         self.log_request_success(method, full_url, url, body, response.status,

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\connection\base.pyc in _raise_error(self, status_code, raw_data)
    123             logger.warning('Undecodable raw error response from server: %s', err)
    124 
--> 125         raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
    126 
    127 

NotFoundError: TransportError(404, u'index_not_found_exception', u'no such index')

Inicializacion real de Elastic Search (ejecutar)



In [7]:

    
# make sure ES is up and running
import requests
res = requests.get('http://localhost:9200')
print(res.content)

from elasticsearch import Elasticsearch
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])









    



{
  "name" : "olxdfU7",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "DQylXrS0QwCriq9AQDxkSQ",
  "version" : {
    "number" : "5.4.0",
    "build_hash" : "780f8c4",
    "build_date" : "2017-04-28T17:43:27.229Z",
    "build_snapshot" : false,
    "lucene_version" : "6.5.0"
  },
  "tagline" : "You Know, for Search"
}

Guardamos el top 250 dentro de elastic search (antiguo)



In [281]:

    
#Lista con el top 250 de peliculas
top = ia.get_top250_movies()
#Recorro la lista y saco los datos para indexarlos en elastic search, el id es el orden en la lista
for i in range(0,250):
    es.index(index='films-index', doc_type='text', id=i, body=top[i].data)

Buscamos los datos guardados (antiguo)



In [24]:

    
res = es.search(index="films-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total'])
#Modificar para que funcione
for hit in res['hits']['hits']:
    print("%(kind)s %(title)s %(year)s %(rating)s" % hit["_source"])









    



---------------------------------------------------------------------------
NotFoundError                             Traceback (most recent call last)
<ipython-input-24-716e325fcaba> in <module>()
----> 1 res = es.search(index="films-index", body={"query": {"match_all": {}}})
      2 print("Got %d Hits:" % res['hits']['total'])
      3 #Modificar para que funcione
      4 for hit in res['hits']['hits']:
      5     print("%(kind)s %(title)s %(year)s %(rating)s" % hit["_source"])

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\client\utils.pyc in _wrapped(*args, **kwargs)
     71                 if p in kwargs:
     72                     params[p] = kwargs.pop(p)
---> 73             return func(*args, params=params, **kwargs)
     74         return _wrapped
     75     return _wrapper

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\client\__init__.pyc in search(self, index, doc_type, body, params)
    621             index = '_all'
    622         return self.transport.perform_request('GET', _make_path(index,
--> 623             doc_type, '_search'), params=params, body=body)
    624 
    625     @query_params('_source', '_source_exclude', '_source_include',

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\transport.pyc in perform_request(self, method, url, params, body)
    310 
    311             try:
--> 312                 status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
    313 
    314             except TransportError as e:

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\connection\http_urllib3.pyc in perform_request(self, method, url, params, body, timeout, ignore)
    126         if not (200 <= response.status < 300) and response.status not in ignore:
    127             self.log_request_fail(method, full_url, url, body, duration, response.status, raw_data)
--> 128             self._raise_error(response.status, raw_data)
    129 
    130         self.log_request_success(method, full_url, url, body, response.status,

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\connection\base.pyc in _raise_error(self, status_code, raw_data)
    123             logger.warning('Undecodable raw error response from server: %s', err)
    124 
--> 125         raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
    126 
    127 

NotFoundError: TransportError(404, u'index_not_found_exception', u'no such index')

Sacar los hits e info de unos cuantos de ellos



In [28]:

    
res = es.search(index="prueba-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total'])

for hit in res['hits']['hits']:
    print("%(Title)s %(Genres)s %(Director)s %(Cast)s %(Writer)s %(Country)s %(Language)s %(Rating)s %(Plot)s" % hit["_source"])









    



Got 250 Hits:
 Leben der Anderen, Das (2006)  Drama, Thriller.  Florian Henckel von Donnersmarck.  Martina Gedeck (Christa-Maria Sieland), Ulrich Mühe (Hauptmann Gerd Wiesler), Sebastian Koch (Georg Dreyman), Ulrich Tukur (Oberstleutnant Anton Grubitz), Thomas Thieme (Minister Bruno Hempf).  Florian Henckel von Donnersmarck.  Germany.  German.  8.5 (278124 votes).  Gerd Wiesler is an officer with the Stasi, the East German secret police. The film begins in 1984 when Wiesler attends a play written by Georg Dreyman, who is considered by many to be the ultimate example of the loyal citizen. Wiesler has a gut feeling that Dreyman can't be as ideal as he seems, and believes surveillance is called for. The Minister of Culture agrees but only later does Wiesler learn that the Minister sees Dreyman as a rival and lusts after his partner Christa-Maria. The more time he spends listening in on them, the more he comes to care about them. The once rigid Stasi officer begins to intervene in their lives, in a positive way, protecting them whenever possible. Eventually, Wiesler's activities catch up to him and while there is no proof of wrongdoing, he finds himself in menial jobs - until the unbelievable happens.
 Nuovo Cinema Paradiso (1988)  Drama.  Giuseppe Tornatore.  Antonella Attili (Maria Di Vita - Younger), Enzo Cannavale (Spaccafico), Isa Danieli (Anna), Leo Gullotta (Usher), Marco Leonardi (Salvatore 'Totò' Di Vita - Teenager).  Giuseppe Tornatore, Giuseppe Tornatore, Vanna Paoli, Richard Epcar.  Italy, France.  Italian.  8.5 (165897 votes).  A boy who grew up in a native Sicilian Village returns home as a famous director after receiving news about the death of an old friend. Told in a flashback, Salvatore reminiscences about his childhood and his relationship with Alfredo, a projectionist at Cinema Paradiso. Under the fatherly influence of Alfredo, Salvatore fell in love with film making, with the duo spending many hours discussing about films and Alfredo painstakingly teaching Salvatore the skills that became a stepping stone for the young boy into the world of film making. The film brings the audience through the changes in cinema and the dying trade of traditional film making, editing and screening. It also explores a young boy's dream of leaving his little town to foray into the world outside.
 Shichinin no samurai (1954)  Adventure, Drama.  Akira Kurosawa.  Toshirô Mifune (Kikuchiyo), Takashi Shimura (Kambei Shimada), Keiko Tsushima (Shino), Yukiko Shimazaki (Wife), Kamatari Fujiwara (Farmer Manzo).  Akira Kurosawa, Shinobu Hashimoto, Hideo Oguni.  Japan.  Japanese.  8.7 (244938 votes).  A veteran samurai, who has fallen on hard times, answers a village's request for protection from bandits. He gathers 6 other samurai to help him, and they teach the townspeople how to defend themselves, and they supply the samurai with three small meals a day. The film culminates in a giant battle when 40 bandits attack the village.
 Se7en (1995)  Crime, Drama, Mystery, Thriller.  David Fincher.  Morgan Freeman (Somerset), Andrew Kevin Walker (Dead Man at 1st Crime Scene), Kevin Spacey (John Doe), Daniel Zacapa (Detective Taylor), Brad Pitt (Mills).  Andrew Kevin Walker.  USA.  English.  8.6 (1103064 votes).  A film about two homicide detectives' (Morgan Freeman and (Brad Pitt desperate hunt for a serial killer who justifies his crimes as absolution for the world's ignorance of the Seven Deadly Sins. The movie takes us from the tortured remains of one victim to the next as the sociopathic "John Doe" (Kevin Spacey) sermonizes to Detectives Somerset and Mills -- one sin at a time. The sin of Gluttony comes first and the murderer's terrible capacity is graphically demonstrated in the dark and subdued tones characteristic of film noir. The seasoned and cultured but jaded Somerset researches the Seven Deadly Sins in an effort to understand the killer's modus operandi while the bright but green and impulsive Detective Mills (Pitt) scoffs at his efforts to get inside the mind of a killer...
 Paths of Glory (1957)  Drama, War.  Stanley Kubrick.  Kirk Douglas (Col. Dax), Ralph Meeker (Cpl. Philippe Paris), Adolphe Menjou (Gen. George Broulard), George Macready (Gen. Paul Mireau), Wayne Morris (Lt. Roget).  Stanley Kubrick, Calder Willingham, Jim Thompson, Humphrey Cobb.  USA.  English, German, Latin.  8.5 (130703 votes).  The futility and irony of the war in the trenches in WWI is shown as a unit commander in the French army must deal with the mutiny of his men and a glory-seeking general after part of his force falls back under fire in an impossible attack.
 Pulp Fiction (1994)  Crime, Drama.  Quentin Tarantino.  Tim Roth (Pumpkin), Amanda Plummer (Honey Bunny), Laura Lovelace (Waitress), John Travolta (Vincent Vega), Samuel L. Jackson (Jules Winnfield).  Quentin Tarantino, Roger Avary, Quentin Tarantino.  USA.  English, Spanish, French.  8.9 (1418706 votes).  Jules Winnfield (Samuel L. Jackson) and Vincent Vega (John Travolta) are two hit men who are out to retrieve a suitcase stolen from their employer, mob boss Marsellus Wallace (Ving Rhames). Wallace has also asked Vincent to take his wife Mia (Uma Thurman) out a few days later when Wallace himself will be out of town. Butch Coolidge (Bruce Willis) is an aging boxer who is paid by Wallace to lose his fight. The lives of these seemingly unrelated people are woven together comprising of a series of funny, bizarre and uncalled-for incidents.
 Django Unchained (2012)  Drama, Western.  Quentin Tarantino.  Jamie Foxx (Django), Christoph Waltz (Dr. King Schultz), Leonardo DiCaprio (Calvin Candie), Kerry Washington (Broomhilda von Shaft), Samuel L. Jackson (Stephen).  Quentin Tarantino.  USA.  English, German, French, Italian.  8.4 (1039218 votes).  A German dentist buys the freedom of a slave and trains him with the intent to make him his deputy bounty hunter. Instead, he is led to the site of the slave's wife who belongs to a ruthless plantation owner.
 Pianist, The (2002)  Biography, Drama, War.  Roman Polanski.  Adrien Brody (Wladyslaw Szpilman), Emilia Fox (Dorota), Michal Zebrowski (Jurek), Ed Stoppard (Henryk), Maureen Lipman (Mother).  Ronald Harwood, Wladyslaw Szpilman.  France, Poland, Germany, UK.  English, German, Russian.  8.5 (541568 votes).  In this adaptation of the autobiography "The Pianist The Extraordinary True Story of One Man's Survival in Warsaw, 1939-1945," Wladyslaw Szpilman, a Polish Jewish radio station pianist, sees Warsaw change gradually as World War II begins. Szpilman is forced into the Warsaw Ghetto, but is later separated from his family during Operation Reinhard. From this time until the concentration camp prisoners are released, Szpilman hides in various locations among the ruins of Warsaw.
 Star Wars (1977)  Action, Adventure, Fantasy, Sci-Fi.  George Lucas.  Mark Hamill (Luke Skywalker), Harrison Ford (Han Solo), Carrie Fisher (Princess Leia Organa), Peter Cushing (Grand Moff Tarkin), Alec Guinness (Ben Obi-Wan Kenobi).  George Lucas.  USA.  English.  8.7 (978421 votes).  The Imperial Forces, under orders from cruel Darth Vader, hold Princess Leia hostage in their efforts to quell the rebellion against the Galactic Empire. Luke Skywalker and Han Solo, captain of the Millennium Falcon, work together with the companionable droid duo R2-D2 and C-3PO to rescue the beautiful princess, help the Rebel Alliance and restore freedom and justice to the Galaxy.
 Back to the Future (1985)  Adventure, Comedy, Sci-Fi.  Robert Zemeckis.  Michael J. Fox (Marty McFly), Christopher Lloyd (Dr. Emmett Brown), Lea Thompson (Lorraine Baines), Crispin Glover (George McFly), Thomas F. Wilson (Biff Tannen).  Robert Zemeckis, Bob Gale.  USA.  English.  8.5 (792473 votes).  Marty McFly, a typical American teenager of the Eighties, is accidentally sent back to 1955 in a plutonium-powered DeLorean "time machine" invented by a slightly mad scientist. During his often hysterical, always amazing trip back in time, Marty must make certain his teenage parents-to-be meet and fall in love - so he can get back to the future.



In [27]:

    
res = es.search(index="prueba-index", body={"query": {"match_all": {}}})
print("Got %d Hits:" % res['hits']['total'])

for hit in res['hits']['hits']:
    print("%(Title)s" % hit["_source"])









    



Got 250 Hits:
 Leben der Anderen, Das (2006)
 Nuovo Cinema Paradiso (1988)
 Shichinin no samurai (1954)
 Se7en (1995)
 Paths of Glory (1957)
 Pulp Fiction (1994)
 Django Unchained (2012)
 Pianist, The (2002)
 Star Wars (1977)
 Back to the Future (1985)



In [26]:

    
res = es.search(index="prueba-index", body={"query": {"match_all": {}}})
res









    Out[26]:





{u'_shards': {u'failed': 0, u'successful': 5, u'total': 5},
 u'hits': {u'hits': [{u'_id': u'AVxB2MAL6RBRcVMNlbJ2',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Martina Gedeck (Christa-Maria Sieland), Ulrich M\xfche (Hauptmann Gerd Wiesler), Sebastian Koch (Georg Dreyman), Ulrich Tukur (Oberstleutnant Anton Grubitz), Thomas Thieme (Minister Bruno Hempf).',
     u'Country': u' Germany.',
     u'Director': u' Florian Henckel von Donnersmarck.',
     u'Genres': u' Drama, Thriller.',
     u'Language': u' German.',
     u'Plot': u" Gerd Wiesler is an officer with the Stasi, the East German secret police. The film begins in 1984 when Wiesler attends a play written by Georg Dreyman, who is considered by many to be the ultimate example of the loyal citizen. Wiesler has a gut feeling that Dreyman can't be as ideal as he seems, and believes surveillance is called for. The Minister of Culture agrees but only later does Wiesler learn that the Minister sees Dreyman as a rival and lusts after his partner Christa-Maria. The more time he spends listening in on them, the more he comes to care about them. The once rigid Stasi officer begins to intervene in their lives, in a positive way, protecting them whenever possible. Eventually, Wiesler's activities catch up to him and while there is no proof of wrongdoing, he finds himself in menial jobs - until the unbelievable happens.",
     u'Rating': u' 8.5 (278124 votes).',
     u'Runtime': u' 137.',
     u'Title': u' Leben der Anderen, Das (2006)',
     u'Writer': u' Florian Henckel von Donnersmarck.'},
    u'_type': u'text'},
   {u'_id': u'AVxB2Lcr6RBRcVMNlbJ1',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u" Antonella Attili (Maria Di Vita - Younger), Enzo Cannavale (Spaccafico), Isa Danieli (Anna), Leo Gullotta (Usher), Marco Leonardi (Salvatore 'Tot\xf2' Di Vita - Teenager).",
     u'Country': u' Italy, France.',
     u'Director': u' Giuseppe Tornatore.',
     u'Genres': u' Drama.',
     u'Language': u' Italian.',
     u'Plot': u" A boy who grew up in a native Sicilian Village returns home as a famous director after receiving news about the death of an old friend. Told in a flashback, Salvatore reminiscences about his childhood and his relationship with Alfredo, a projectionist at Cinema Paradiso. Under the fatherly influence of Alfredo, Salvatore fell in love with film making, with the duo spending many hours discussing about films and Alfredo painstakingly teaching Salvatore the skills that became a stepping stone for the young boy into the world of film making. The film brings the audience through the changes in cinema and the dying trade of traditional film making, editing and screening. It also explores a young boy's dream of leaving his little town to foray into the world outside.",
     u'Rating': u' 8.5 (165897 votes).',
     u'Runtime': u' 155, Italy173(Europa Cinema Festival), 124(cut theatrical version).',
     u'Title': u' Nuovo Cinema Paradiso (1988)',
     u'Writer': u' Giuseppe Tornatore, Giuseppe Tornatore, Vanna Paoli, Richard Epcar.'},
    u'_type': u'text'},
   {u'_id': u'AVxB1wNE6RBRcVMNlbJR',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Toshir\xf4 Mifune (Kikuchiyo), Takashi Shimura (Kambei Shimada), Keiko Tsushima (Shino), Yukiko Shimazaki (Wife), Kamatari Fujiwara (Farmer Manzo).',
     u'Country': u' Japan.',
     u'Director': u' Akira Kurosawa.',
     u'Genres': u' Adventure, Drama.',
     u'Language': u' Japanese.',
     u'Plot': u" A veteran samurai, who has fallen on hard times, answers a village's request for protection from bandits. He gathers 6 other samurai to help him, and they teach the townspeople how to defend themselves, and they supply the samurai with three small meals a day. The film culminates in a giant battle when 40 bandits attack the village.",
     u'Rating': u' 8.7 (244938 votes).',
     u'Runtime': u' 207, 160(international version), Argentina163, Sweden202(2002 re-release), UK150(original version), UK190(1991 re-release), USA158(original version) (cut), USA203(re-release), USA207(restored version), Spain202(DVD edition).',
     u'Title': u' Shichinin no samurai (1954)',
     u'Writer': u' Akira Kurosawa, Shinobu Hashimoto, Hideo Oguni.'},
    u'_type': u'text'},
   {u'_id': u'AVxB1yad6RBRcVMNlbJU',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Morgan Freeman (Somerset), Andrew Kevin Walker (Dead Man at 1st Crime Scene), Kevin Spacey (John Doe), Daniel Zacapa (Detective Taylor), Brad Pitt (Mills).',
     u'Country': u' USA.',
     u'Director': u' David Fincher.',
     u'Genres': u' Crime, Drama, Mystery, Thriller.',
     u'Language': u' English.',
     u'Plot': u' A film about two homicide detectives\' (Morgan Freeman and (Brad Pitt desperate hunt for a serial killer who justifies his crimes as absolution for the world\'s ignorance of the Seven Deadly Sins. The movie takes us from the tortured remains of one victim to the next as the sociopathic "John Doe" (Kevin Spacey) sermonizes to Detectives Somerset and Mills -- one sin at a time. The sin of Gluttony comes first and the murderer\'s terrible capacity is graphically demonstrated in the dark and subdued tones characteristic of film noir. The seasoned and cultured but jaded Somerset researches the Seven Deadly Sins in an effort to understand the killer\'s modus operandi while the bright but green and impulsive Detective Mills (Pitt) scoffs at his efforts to get inside the mind of a killer...',
     u'Rating': u' 8.6 (1103064 votes).',
     u'Runtime': u' 127.',
     u'Title': u' Se7en (1995)',
     u'Writer': u' Andrew Kevin Walker.'},
    u'_type': u'text'},
   {u'_id': u'AVxB2PyN6RBRcVMNlbJ4',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Kirk Douglas (Col. Dax), Ralph Meeker (Cpl. Philippe Paris), Adolphe Menjou (Gen. George Broulard), George Macready (Gen. Paul Mireau), Wayne Morris (Lt. Roget).',
     u'Country': u' USA.',
     u'Director': u' Stanley Kubrick.',
     u'Genres': u' Drama, War.',
     u'Language': u' English, German, Latin.',
     u'Plot': u' The futility and irony of the war in the trenches in WWI is shown as a unit commander in the French army must deal with the mutiny of his men and a glory-seeking general after part of his force falls back under fire in an impossible attack.',
     u'Rating': u' 8.5 (130703 votes).',
     u'Runtime': u' 88.',
     u'Title': u' Paths of Glory (1957)',
     u'Writer': u' Stanley Kubrick, Calder Willingham, Jim Thompson, Humphrey Cobb.'},
    u'_type': u'text'},
   {u'_id': u'AVxB1YlE6RBRcVMNlbJF',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Tim Roth (Pumpkin), Amanda Plummer (Honey Bunny), Laura Lovelace (Waitress), John Travolta (Vincent Vega), Samuel L. Jackson (Jules Winnfield).',
     u'Country': u' USA.',
     u'Director': u' Quentin Tarantino.',
     u'Genres': u' Crime, Drama.',
     u'Language': u' English, Spanish, French.',
     u'Plot': u' Jules Winnfield (Samuel L. Jackson) and Vincent Vega (John Travolta) are two hit men who are out to retrieve a suitcase stolen from their employer, mob boss Marsellus Wallace (Ving Rhames). Wallace has also asked Vincent to take his wife Mia (Uma Thurman) out a few days later when Wallace himself will be out of town. Butch Coolidge (Bruce Willis) is an aging boxer who is paid by Wallace to lose his fight. The lives of these seemingly unrelated people are woven together comprising of a series of funny, bizarre and uncalled-for incidents.',
     u'Rating': u' 8.9 (1418706 votes).',
     u'Runtime': u' 154, 178(original cut).',
     u'Title': u' Pulp Fiction (1994)',
     u'Writer': u' Quentin Tarantino, Roger Avary, Quentin Tarantino.'},
    u'_type': u'text'},
   {u'_id': u'AVxB2QwF6RBRcVMNlbJ5',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Jamie Foxx (Django), Christoph Waltz (Dr. King Schultz), Leonardo DiCaprio (Calvin Candie), Kerry Washington (Broomhilda von Shaft), Samuel L. Jackson (Stephen).',
     u'Country': u' USA.',
     u'Director': u' Quentin Tarantino.',
     u'Genres': u' Drama, Western.',
     u'Language': u' English, German, French, Italian.',
     u'Plot': u" A German dentist buys the freedom of a slave and trains him with the intent to make him his deputy bounty hunter. Instead, he is led to the site of the slave's wife who belongs to a ruthless plantation owner.",
     u'Rating': u' 8.4 (1039218 votes).',
     u'Runtime': u' 165.',
     u'Title': u' Django Unchained (2012)',
     u'Writer': u' Quentin Tarantino.'},
    u'_type': u'text'},
   {u'_id': u'AVxB2APi6RBRcVMNlbJn',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Adrien Brody (Wladyslaw Szpilman), Emilia Fox (Dorota), Michal Zebrowski (Jurek), Ed Stoppard (Henryk), Maureen Lipman (Mother).',
     u'Country': u' France, Poland, Germany, UK.',
     u'Director': u' Roman Polanski.',
     u'Genres': u' Biography, Drama, War.',
     u'Language': u' English, German, Russian.',
     u'Plot': u' In this adaptation of the autobiography "The Pianist The Extraordinary True Story of One Man\'s Survival in Warsaw, 1939-1945," Wladyslaw Szpilman, a Polish Jewish radio station pianist, sees Warsaw change gradually as World War II begins. Szpilman is forced into the Warsaw Ghetto, but is later separated from his family during Operation Reinhard. From this time until the concentration camp prisoners are released, Szpilman hides in various locations among the ruins of Warsaw.',
     u'Rating': u' 8.5 (541568 votes).',
     u'Runtime': u' 150.',
     u'Title': u' Pianist, The (2002)',
     u'Writer': u' Ronald Harwood, Wladyslaw Szpilman.'},
    u'_type': u'text'},
   {u'_id': u'AVxB1xNo6RBRcVMNlbJS',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Mark Hamill (Luke Skywalker), Harrison Ford (Han Solo), Carrie Fisher (Princess Leia Organa), Peter Cushing (Grand Moff Tarkin), Alec Guinness (Ben Obi-Wan Kenobi).',
     u'Country': u' USA.',
     u'Director': u' George Lucas.',
     u'Genres': u' Action, Adventure, Fantasy, Sci-Fi.',
     u'Language': u' English.',
     u'Plot': u' The Imperial Forces, under orders from cruel Darth Vader, hold Princess Leia hostage in their efforts to quell the rebellion against the Galactic Empire. Luke Skywalker and Han Solo, captain of the Millennium Falcon, work together with the companionable droid duo R2-D2 and C-3PO to rescue the beautiful princess, help the Rebel Alliance and restore freedom and justice to the Galaxy.',
     u'Rating': u' 8.7 (978421 votes).',
     u'Runtime': u' 121, 125(special edition).',
     u'Title': u' Star Wars (1977)',
     u'Writer': u' George Lucas.'},
    u'_type': u'text'},
   {u'_id': u'AVxB2DIf6RBRcVMNlbJq',
    u'_index': u'prueba-index',
    u'_score': 1.0,
    u'_source': {u'Cast': u' Michael J. Fox (Marty McFly), Christopher Lloyd (Dr. Emmett Brown), Lea Thompson (Lorraine Baines), Crispin Glover (George McFly), Thomas F. Wilson (Biff Tannen).',
     u'Country': u' USA.',
     u'Director': u' Robert Zemeckis.',
     u'Genres': u' Adventure, Comedy, Sci-Fi.',
     u'Language': u' English.',
     u'Plot': u' Marty McFly, a typical American teenager of the Eighties, is accidentally sent back to 1955 in a plutonium-powered DeLorean "time machine" invented by a slightly mad scientist. During his often hysterical, always amazing trip back in time, Marty must make certain his teenage parents-to-be meet and fall in love - so he can get back to the future.',
     u'Rating': u' 8.5 (792473 votes).',
     u'Runtime': u' 116.',
     u'Title': u' Back to the Future (1985)',
     u'Writer': u' Robert Zemeckis, Bob Gale.'},
    u'_type': u'text'}],
  u'max_score': 1.0,
  u'total': 250},
 u'timed_out': False,
 u'took': 399}



In [56]:

    
res = es.search(index="prueba-index", body={
    "query": 
        {"match" : {'Director': 'Christopher Nolan'}
    },
    {
        "highlight" : {
            "fields" : {
                "Language" : {}
            }
        }
    }
})
res









    



  File "<ipython-input-56-6969bc6a6e45>", line 12
    })
    ^
SyntaxError: invalid syntax

Query sin fuzziness

No funciona si le quitas una letra, la query de abajo si al ser fuzzy



In [57]:

    
res = es.search(index="prueba-index", body={"query": {"match" : {'Director': 'Christophe Nola'}}})
print("Got %d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
    print("%(Title)s" % hit["_source"])









    



Got 0 Hits:

Query con fuzziness añadida



In [104]:

    
bodyQuery = {
    "query": {
       "multi_match" : {
            "query" : "Int",
            "fields": ["Plot", "Title"],
            "fuzziness": "2"
        }
    }
}
res = es.search(index="prueba-index", body=bodyQuery)
#print res
#print("Got %d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
    print("%(Title)s" % hit["_source"])









    



 Monsters, Inc. (2001)
 Into the Wild (2007)
 Some Like It Hot (1959)
 It Happened One Night (1934)
 Mou gaan dou (2002)
 Dr. Strangelove or How I Learned to Stop Worrying and Love the Bomb (1964)
 Per qualche dollaro in più (1965)
 Lion King, The (1994)
 sjunde inseglet, Det (1957)
 Faa yeung nin wa (2000)



In [102]:

    
bodyQuery = {
    "query": {
       "regexp":{
            "Title": "wonder.*"
        }
    }
}
res = es.search(index="prueba-index", body=bodyQuery)
#print res
#print("Got %d Hits:" % res['hits']['total'])
for hit in res['hits']['hits']:
    print("%(Title)s" % hit["_source"])









    



 It's a Wonderful Life (1946)

Query 2 con highlight de distintos campos y la forma de mostrarlo



In [93]:

    
bodyQuery2 = {
    "query": {
        "match" : {
            "Title" : {
                
                "query" : "wond",
                "operator" : "and",
                "zero_terms_query": "all"
            }
        }
    },
    "highlight" : {
        "fields" : {
            "Title" : {},
            "Plot" : {"fragment_size" : 150, "number_of_fragments" : 3}
        },
        #Permite el hightlight sobre campos que no se han hecho query
        #como Plot en este ejemplo
        "require_field_match" : False
    }
}
res = es.search(index="prueba-index", body=bodyQuery2)
print("Got %d Hits:" % res['hits']['total'])
# Uso el [0] porque solo hay 1 hit, si hubiese mas, pues habria mas campos
# de la lista, habria que usar el for de arriba para sacar el highlight de
# cada uno de la lista
#print res['hits']['hits'][0]['highlight']
for hit in res['hits']['hits']:
    print(hit)









    



Got 0 Hits:



In [114]:

    
bodyQuery2 = {
    "query": {
    "bool": {
  "should": [
     {   "match": {
            "Title": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},
     {   "match": {
            "Plot": {
                    "query": "wonder" + ".*",
                    "fuzziness": 2,
                    "prefix_length" : 1,
                    "operator": "and"
                }
        }
     },
     {   "match": {
            "Genres": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},
      {   "match": {
            "Director": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},
       {   "match": {
            "Writer": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},
       {   "match": {
            "Cast": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},
        {   "match": {
            "Country": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},
        {   "match": {
            "Language": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},
        {   "match": {
            "Rating": {
                "query": "wonder" + ".*",
                "fuzziness": "AUTO",
                "prefix_length" : 1,
                "operator": "and"

            }
        }},

     ]
    }
},
 "highlight": {
        "fields": {
            "Title": {},
            "Plot": {},
            "Director": {}
        },
        # Permite el hightlight sobre campos que no se han hecho query
        # como Plot en este ejemplo
        "require_field_match": False
}
}
'''
    "query": {
        "match": {
            "Title": {
                "query": buscado,
                "fuzziness": "AUTO",
                "boost" :         2.0,
                "prefix_length" : 1,
                "max_expansions": 100,
                #"minimum_should_match" : 10,

                "operator": "and"
            }

        }
    },
    "highlight": {
        "fields": {
            "Title": {},
            "Plot": {"fragment_size": 300, "number_of_fragments": 3}
        },
        # Permite el hightlight sobre campos que no se han hecho query
        # como Plot en este ejemplo
        "require_field_match": False
    }
'''
res = es.search(index="prueba-index", body= bodyQuery2)
print("Got %d Hits:" % res['hits']['total'])
# Uso el [0] porque solo hay 1 hit, si hubiese mas, pues habria mas campos
# de la lista, habria que usar el for de arriba para sacar el highlight de
# cada uno de la lista
# print res['hits']['hits'][0]['highlight']

resultado = []
for hit in res['hits']['hits']:
    resultado.append(hit)
print resultado[10]['_source']['Title']









    



Got 18 Hits:






    




IndexErrorTraceback (most recent call last)
<ipython-input-114-334dd3e62403> in <module>()
    136 for hit in res['hits']['hits']:
    137     resultado.append(hit)
--> 138 print resultado[10]['_source']['Title']

IndexError: list index out of range

Borrar datos



In [25]:

    
es.delete(index='prueba-index', doc_type='text', id=1)









    



---------------------------------------------------------------------------
NotFoundError                             Traceback (most recent call last)
<ipython-input-25-9a812aed628e> in <module>()
----> 1 es.delete(index='prueba-index', doc_type='text', id=1)

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\client\utils.pyc in _wrapped(*args, **kwargs)
     71                 if p in kwargs:
     72                     params[p] = kwargs.pop(p)
---> 73             return func(*args, params=params, **kwargs)
     74         return _wrapped
     75     return _wrapper

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\client\__init__.pyc in delete(self, index, doc_type, id, params)
   1076                 raise ValueError("Empty value passed for a required argument.")
   1077         return self.transport.perform_request('DELETE', _make_path(index,
-> 1078             doc_type, id), params=params)
   1079 
   1080     @query_params('allow_no_indices', 'analyze_wildcard', 'analyzer',

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\transport.pyc in perform_request(self, method, url, params, body)
    310 
    311             try:
--> 312                 status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
    313 
    314             except TransportError as e:

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\connection\http_urllib3.pyc in perform_request(self, method, url, params, body, timeout, ignore)
    126         if not (200 <= response.status < 300) and response.status not in ignore:
    127             self.log_request_fail(method, full_url, url, body, duration, response.status, raw_data)
--> 128             self._raise_error(response.status, raw_data)
    129 
    130         self.log_request_success(method, full_url, url, body, response.status,

C:\Users\cr\Anaconda2\lib\site-packages\elasticsearch\connection\base.pyc in _raise_error(self, status_code, raw_data)
    123             logger.warning('Undecodable raw error response from server: %s', err)
    124 
--> 125         raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
    126 
    127 

NotFoundError: TransportError(404, u'{"found":false,"_index":"prueba-index","_type":"text","_id":"1","_version":1,"result":"not_found","_shards":{"total":2,"successful":1,"failed":0}}')