notebook.community

Edit and run



In [7]:

    
import pandas as pd
import nltk
import re



In [8]:

    
stopwords = nltk.corpus.stopwords.words('english')



In [9]:

    
from nltk.stem.snowball import EnglishStemmer
stemmer = EnglishStemmer()



In [10]:

    
from nltk.tokenize import TreebankWordTokenizer, SpaceTokenizer
tokenizer = TreebankWordTokenizer()



In [11]:

    
tokenizer.tokenize("Hello, My Name i`s Guy (Really!)")









    Out[11]:





['Hello', ',', 'My', 'Name', 'i`s', 'Guy', '(', 'Really', '!', ')']



In [12]:

    
def tokenize_only(text):
    tokens = tokenizer.tokenize(text)
    
    # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation)
    filtered_tokens = [token for token in tokens if re.search('[a-zA-Z]', token)]
    
    return filtered_tokens

def tokenize_and_stem(text):
    tokens = tokenize_only(text)
    stems = map(stemmer.stem, tokens)
    return stems



In [13]:

    
tokenize_and_stem("Hello, My Name is Guy (Really!)")









    Out[13]:





[u'hello', 'my', u'name', 'is', u'guy', u'realli']

Below I use my stemming/tokenizing and tokenizing functions to iterate over the list of synopses to create two vocabularies: one stemmed and one only tokenized.



In [14]:

    
df = pd.read_csv('../data/wiki/wiki.csv.gz', encoding='utf8', index_col=None)



In [15]:

    
df['text'] = df.text.str[:3000]



In [16]:

    
totalvocab_stemmed = []
totalvocab_tokenized = []
for doc_text in df.text:
    allwords_stemmed = tokenize_and_stem(doc_text) #for each item in 'synopses', tokenize/stem
    totalvocab_stemmed.extend(allwords_stemmed) #extend the 'totalvocab_stemmed' list
    
    allwords_tokenized = tokenize_only(doc_text)
    totalvocab_tokenized.extend(allwords_tokenized)



In [19]:

    
vocab_frame = pd.DataFrame({'words': totalvocab_tokenized}, index = totalvocab_stemmed)



In [20]:

    
vocab_frame.head(10)









    Out[20]:






  
    
      
      words
    
  
  
    
      may
      may
    
    
      refer
      refer
    
    
      to
      to
    
    
      number
      number
    
    
      the
      the
    
    
      integ
      integer
    
    
      between
      between
    
    
      and
      and
    
    
      year
      year
    
    
      a
      a



In [21]:

    
len(vocab_frame)









    Out[21]:





358716



In [ ]:

Tf-idf and document similarity



In [24]:

    
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(max_df=0.01, min_df=3, 
                                   stop_words='english',
                                   tokenizer=tokenize_and_stem, ngram_range=(1,3))

%time tfidf_matrix = tfidf_vectorizer.fit_transform(df.text)

print(tfidf_matrix.shape)









    



Wall time: 12.9 s
(835, 10574)



In [25]:

    
terms = tfidf_vectorizer.get_feature_names()



In [ ]:



In [ ]:



In [26]:

    
len(vocab_frame)









    Out[26]:





358716



In [27]:

    
terms = tfidf_vectorizer.get_feature_names()



In [28]:

    
idx = 1000
terms[idx]









    Out[28]:





u'barn'



In [29]:

    
terms[2001]









    Out[29]:





u'concep t'



In [30]:

    
vocab_frame.ix[terms[idx]].head(5)









    Out[30]:






  
    
      
      words
    
  
  
    
      barn
      Barnes
    
    
      barn
      Barnes
    
    
      barn
      Barne
    
    
      barn
      Barnes
    
    
      barn
      Barnes



In [ ]:



In [ ]:

Lets cluster!



In [31]:

    
from sklearn.cluster import KMeans

num_clusters =30

km = KMeans(n_clusters=num_clusters)

%time km.fit(tfidf_matrix)

clusters = km.labels_.tolist()









    



Wall time: 1.44 s



In [32]:

    
len(clusters)









    Out[32]:





835



In [ ]:

But what did we get?



In [33]:

    
clustered = df.join(pd.DataFrame({'cluster': clusters}))



In [34]:

    
clustered.head()









    Out[34]:






  
    
      
      name
      text
      cluster
    
  
  
    
      0
      0
      0  may refer to:  0 (number) , the integer b...
      20
    
    
      1
      11.22.63
      11.22.63  is an American  science fiction ...
      9
    
    
      2
      2016
      2016 has been designated as:  Inte...
      8
    
    
      3
      4chan
      4chan  is an  English-language   image...
      24
    
    
      4
      8chan
      8chan,  also called  Infinitechan,  is a...
      28



In [ ]:



In [35]:

    
km.cluster_centers_









    Out[35]:





array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.00282989,  0.        ,  0.01752607, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.00734043,  0.        ,  0.        , ...,  0.00248845,
         0.        ,  0.        ],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])



In [ ]:



In [36]:

    
order_centroids = km.cluster_centers_.argsort()



In [37]:

    
order_centroids









    Out[37]:





array([[    0,  6917,  6918, ...,  4107,  1690,   898],
       [ 5286,  6824,  6825, ...,  1018,  7370,  4067],
       [ 5286,  6831,  6832, ...,  6916,  3260,  6915],
       ..., 
       [    0,  6938,  6940, ...,  2304, 10567,  3957],
       [    0,  6859,  6860, ...,  8891,  5964, 10326],
       [    0,  6950,  6951, ...,  1810,  9918,  6581]], dtype=int64)



In [ ]:



In [ ]:



In [ ]:



In [38]:

    
term_words = vocab_frame.ix[terms[idx]]



In [39]:

    
term_words['words'].value_counts().keys()[0]









    Out[39]:





u'Barnes'



In [ ]:



In [40]:

    
cluster_names = []

for cluster_centeroids in order_centroids:
    words = []
    for ind in cluster_centeroids[-6:]:
        term_words = vocab_frame.ix[terms[ind].split(' ')]
        
        best_word = term_words['words'].value_counts().keys()[0]
        
        words.append(best_word)
        
    cluster_names.append(', '.join(words))



In [41]:

    
cluster_names









    Out[41]:





[u'Republican, Julius, Senate, Hamlet, Cicero, Augustus',
 u'Eminem, punk, band, bassist, rabbit, guitar',
 u"Aristotle, ontology, metaphysics, 's, epistemology, Plato",
 u'psychologist, Netflix, Kosovo, schizophrenia, person, benzodiazepines',
 u'ether, methamphetamine, transgender, morphine, serotonin, amphetamine',
 u'Portugal, Latvia, Tunisia, Greenland, Finland, Norway',
 u'Nunavut, leopard, Uganda, Nile, Tanzania, Kenya',
 u'erosion, allotrope, nitrogen, aluminium, copper, sand',
 u'morning, sanctions, bin, Qatar, Afghan, sharia',
 u'Premis, Manhattan, algorithm, series, TV, syndicated',
 u'Michigan, Philadelphia, Boston, Massachusetts, Oregon, Colorado',
 u'Mali, caffeine, calculus, glucose, maize, Nigeria',
 u'Hollywood, Volkswagen, Bengal, bullying, Compton, gang',
 u'pig, pH, shark, horses, whale, elephants',
 u'Viking, Vancouver, eugenics, Guyana, Walmart, Seattle',
 u'Anthrax, cough, Panama, Pornhub, syphilis, pneumonia',
 u'Venus, Neptune, bisexual, Moses, Amsterdam, Hulk',
 u'FIFA, world, Ronaldo, WWE, Championship, Wrestling',
 u"Moscow, ROC, 's, Taiwan, prefecture, samurai",
 u'fox, kangaroo, jaguar, Tasmania, marsupial, dinosaurs',
 u'HDMI, USB, processor, connector, iPhone, REDIRECT',
 u'Kazakhstan, Vienna, Slovakia, Hungarian, Croatia, Prussia',
 u'Gmail, utility, database, Skype, HTML, JavaScript',
 u'Lebanon, Mauritius, eternal, Nepal, Hinduism, Judaism',
 u"film, appeal, Awards, 4chan, Burlesque, 's",
 u'Greek, helium, dragon, Athena, Artemis, Zeus',
 u'Jupiter, Saturn, fauna, Biosynthesi, ratio, ammonia',
 u'bonobo, alligator, chimpanzee, crocodile, Zoo, gorilla',
 u'X-Force, Batman, Crusade, Spider-Man, mutants, X-Men',
 u'masturbation, vulva, penis, clitoris, vaginal, orgasm']



In [44]:

    
clustered['cluster_name'] =clustered.cluster.map(lambda cluster: cluster_names[cluster])



In [45]:

    
clustered.head(60)









    Out[45]:






  
    
      
      name
      text
      cluster
      cluster_name
    
  
  
    
      0
      0
      0  may refer to:  0 (number) , the integer b...
      20
      HDMI, USB, processor, connector, iPhone, REDIRECT
    
    
      1
      11.22.63
      11.22.63  is an American  science fiction ...
      9
      Premis, Manhattan, algorithm, series, TV, synd...
    
    
      2
      2016
      2016 has been designated as:  Inte...
      8
      morning, sanctions, bin, Qatar, Afghan, sharia
    
    
      3
      4chan
      4chan  is an  English-language   image...
      24
      film, appeal, Awards, 4chan, Burlesque, 's
    
    
      4
      8chan
      8chan,  also called  Infinitechan,  is a...
      28
      X-Force, Batman, Crusade, Spider-Man, mutants,...
    
    
      5
      Aaliyah
      Aaliyah Dana Haughton  ( ; January 16,...
      24
      film, appeal, Awards, 4chan, Burlesque, 's
    
    
      6
      ABBA
      ABBA  (stylised  ᗅᗺᗷᗅ ;  ) were a Swed...
      1
      Eminem, punk, band, bassist, rabbit, guitar
    
    
      7
      Abraham
      Abraham  (   ;  ,  ), originally  Abra...
      23
      Lebanon, Mauritius, eternal, Nepal, Hinduism, ...
    
    
      8
      Achilles
      Achilles and the  Nereid  Cymothoe: Attic ...
      18
      Moscow, ROC, 's, Taiwan, prefecture, samurai
    
    
      9
      Adderall
      Adderall  is a  co mbination drug  con...
      4
      ether, methamphetamine, transgender, morphine,...
    
    
      10
      Adele
      Adele Laurie Blue Adkins <ref name...
      24
      film, appeal, Awards, 4chan, Burlesque, 's
    
    
      11
      Aerosmith
      Aerosmith  is an American  rock  band, som...
      1
      Eminem, punk, band, bassist, rabbit, guitar
    
    
      12
      Afghanistan
      fobox country   Afghanistan    ( Pasht...
      8
      morning, sanctions, bin, Qatar, Afghan, sharia
    
    
      13
      Africa
      Map of Africa   Africa  is the world's s...
      11
      Mali, caffeine, calculus, glucose, maize, Nigeria
    
    
      14
      Agnosticism
      Agnosticism  is the view that the ...
      2
      Aristotle, ontology, metaphysics, 's, epistemo...
    
    
      15
      Airbnb
      Airbnb  is an  online marketplace   that ena...
      22
      Gmail, utility, database, Skype, HTML, JavaScript
    
    
      16
      AK-47
      The  AK-47  (also known as the  Kalashniko...
      12
      Hollywood, Volkswagen, Bengal, bullying, Compt...
    
    
      17
      Akihito
      ( ) is the reigning  , the 125th Empe...
      18
      Moscow, ROC, 's, Taiwan, prefecture, samurai
    
    
      18
      Akon
      Aliaume Damala Badara Akon Thiam  (born ...
      27
      bonobo, alligator, chimpanzee, crocodile, Zoo,...
    
    
      19
      Al-Qaeda
      Al-Qaeda  (  or  IPAc-en ;    ,  ,...
      8
      morning, sanctions, bin, Qatar, Afghan, sharia
    
    
      20
      Alabama
      Alabama  ( ) is a  state  located ...
      10
      Michigan, Philadelphia, Boston, Massachusetts,...
    
    
      21
      Alaska
      Templa te:Coord     Alaska  ( ) is a...
      10
      Michigan, Philadelphia, Boston, Massachusetts,...
    
    
      22
      Albania
      Albania  ( ,  ;  ;  ), officially the ...
      10
      Michigan, Philadelphia, Boston, Massachusetts,...
    
    
      23
      Alcohol
      180px| Ball-and-stick model  of the hy...
      11
      Mali, caffeine, calculus, glucose, maize, Nigeria
    
    
      24
      Algeria
      fobox country     Algeria  (   ;    ...
      5
      Portugal, Latvia, Tunisia, Greenland, Finland,...
    
    
      25
      Algorithm
      Flow chart  of an algorithm ( Euclid's a...
      9
      Premis, Manhattan, algorithm, series, TV, synd...
    
    
      26
      Alligator
      An  alligator  is a  crocodilian  in the...
      27
      bonobo, alligator, chimpanzee, crocodile, Zoo,...
    
    
      27
      Alprazolam
      Alprazolam    or  , available under the tr...
      3
      psychologist, Netflix, Kosovo, schizophrenia, ...
    
    
      28
      Aluminium
      Aluminium  (in  Common wealth English ...
      7
      erosion, allotrope, nitrogen, aluminium, coppe...
    
    
      29
      Amazon.com
      Amazon.com, Inc.  (  or  ), often referr...
      14
      Viking, Vancouver, eugenics, Guyana, Walmart, ...
    
    
      30
      Ammonia
      Ammonia  or  azane  is a  compound  of...
      26
      Jupiter, Saturn, fauna, Biosynthesi, ratio, am...
    
    
      31
      Amphetamine
      Amphetamine  (contracted from  ) is ...
      4
      ether, methamphetamine, transgender, morphine,...
    
    
      32
      Amsterdam
      Amsterdam  ( ;     ) is the  capital  ...
      16
      Venus, Neptune, bisexual, Moses, Amsterdam, Hulk
    
    
      33
      Anberlin
      Anberlin   was an American rock band forme...
      1
      Eminem, punk, band, bassist, rabbit, guitar
    
    
      34
      Andorra
      Andorra  ( ;  ,  ), officially the...
      5
      Portugal, Latvia, Tunisia, Greenland, Finland,...
    
    
      35
      Angola
      Angola   , officially the  Republic ...
      6
      Nunavut, leopard, Uganda, Nile, Tanzania, Kenya
    
    
      36
      Anhedonia
      Anhedonia  (   ;  Greek :  ἀ ν-  an -, "wi...
      3
      psychologist, Netflix, Kosovo, schizophrenia, ...
    
    
      37
      Animal
      Animals  are  multicellular ,  euk...
      13
      pig, pH, shark, horses, whale, elephants
    
    
      38
      Anime
      is  Japanese  hand-drawn or compute...
      9
      Premis, Manhattan, algorithm, series, TV, synd...
    
    
      39
      Antarctica
      Antarctica  (US English  , UK Englis...
      26
      Jupiter, Saturn, fauna, Biosynthesi, ratio, am...
    
    
      40
      Anthrax
      Anthrax  is an infection caused by the b...
      15
      Anthrax, cough, Panama, Pornhub, syphilis, pne...
    
    
      41
      Anthropology
      thumb|5 volume Encyclopedia of Anthropol...
      2
      Aristotle, ontology, metaphysics, 's, epistemo...
    
    
      42
      Anubis
      Anubis  (  or  ;    ) is the  Greek  nam...
      19
      fox, kangaroo, jaguar, Tasmania, marsupial, di...
    
    
      43
      Apartheid
      Templat e:Segregation     Apartheid  (...
      5
      Portugal, Latvia, Tunisia, Greenland, Finland,...
    
    
      44
      Aphrodite
      Aphrodite  ( Template :IPAc-en  ...
      25
      Greek, helium, dragon, Athena, Artemis, Zeus
    
    
      45
      Apollo
      Apollo  ( Attic ,  Ionic , and...
      25
      Greek, helium, dragon, Athena, Artemis, Zeus
    
    
      46
      Apple
      The  apple tree  ( Malus domestic...
      11
      Mali, caffeine, calculus, glucose, maize, Nigeria
    
    
      47
      AR-15
      thumb|Modified AR-15  The  AR-15  is a l...
      11
      Mali, caffeine, calculus, glucose, maize, Nigeria
    
    
      48
      Archimedes
      Archimedes of Syracuse  ( ;    ;   BC ...
      0
      Republican, Julius, Senate, Hamlet, Cicero, Au...
    
    
      49
      Argentina
      Argentina  ( ;  ), officially the  A...
      5
      Portugal, Latvia, Tunisia, Greenland, Finland,...
    
    
      50
      Aristotle
      Aristotle  ( ;      ,  Aristotélēs ;...
      2
      Aristotle, ontology, metaphysics, 's, epistemo...
    
    
      51
      Arizona
      tate symbols   thumb| Saguaro  cactus ...
      10
      Michigan, Philadelphia, Boston, Massachusetts,...
    
    
      52
      Armadillo
      thumb| Nine-banded armadillo  skeleton. ...
      19
      fox, kangaroo, jaguar, Tasmania, marsupial, di...
    
    
      53
      Armenia
      Armenia  ( Ac-en ,  ;    ,  tr.   Haya...
      21
      Kazakhstan, Vienna, Slovakia, Hungarian, Croat...
    
    
      54
      Arrowverse
      The  Arrowverse  is a  shared   fictional un...
      20
      HDMI, USB, processor, connector, iPhone, REDIRECT
    
    
      55
      Art
      Clockwise from upper left: a   self-po...
      19
      fox, kangaroo, jaguar, Tasmania, marsupial, di...
    
    
      56
      Artemis
      Temp late:Ancient Greek religion   Art...
      25
      Greek, helium, dragon, Athena, Artemis, Zeus
    
    
      57
      Arthropod
      An  arthropod  (from Greek  arthro- , joint +...
      29
      masturbation, vulva, penis, clitoris, vaginal,...
    
    
      58
      Aruba
      Aruba  (   ;  ) is a  constituent coun...
      6
      Nunavut, leopard, Uganda, Nile, Tanzania, Kenya
    
    
      59
      Asbestos
      right|thumb|Asbestos   thumb|Asbestos ...
      7
      erosion, allotrope, nitrogen, aluminium, coppe...



In [ ]:



In [ ]:



In [ ]:



In [46]:

    
from sklearn.metrics.pairwise import cosine_similarity
dist = 1 - cosine_similarity(tfidf_matrix)



In [47]:

    
import os  # for os.path.basename

import matplotlib.pyplot as plt
import matplotlib as mpl

from sklearn.manifold import MDS

MDS()

# two components as we're plotting points in a two-dimensional plane
# "precomputed" because we provide a distance matrix
# we will also specify `random_state` so the plot is reproducible.
mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)

pos = mds.fit_transform(dist)  # shape (n_components, n_samples)

xs, ys = pos[:, 0], pos[:, 1]



In [48]:

    
import seaborn as sns



In [49]:

    
#set up colors per clusters using a dict
cluster_colors = sns.color_palette(n_colors=len(clusters))



In [50]:

    
%matplotlib inline



In [51]:

    
#create data frame that has the result of the MDS plus the cluster numbers and titles
df = pd.DataFrame(dict(x=xs, y=ys, label=clusters, title=df.name)) 

#group by cluster
groups = df.groupby('label')


# set up plot
fig, ax = plt.subplots(figsize=(30, 30)) # set size
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling

#iterate through groups to layer the plot
#note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label
for name, group in groups:
    ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, label=cluster_names[name], color=cluster_colors[name], mec='none')
    ax.set_aspect('auto')
    ax.tick_params(\
        axis= 'x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off')
    ax.tick_params(\
        axis= 'y',         # changes apply to the y-axis
        which='both',      # both major and minor ticks are affected
        left='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelleft='off')
    
ax.legend(numpoints=1)  #show legend with only 1 point

#add label in x,y position with the label as the film title
for i in range(len(df)):
    ax.text(df.ix[i]['x'], df.ix[i]['y'], df.ix[i]['title'], size=8)  

    
    
#plt.show() #show the plot

#uncomment the below to save the plot if need be
plt.savefig('clusters_small_noaxes1.png', dpi=300)



In [ ]:

	words
may	may
refer	refer
to	to
number	number
the	the
integ	integer
between	between
and	and
year	year
a	a

	name	text	cluster
0	0	0 may refer to: 0 (number) , the integer b...	20
1	11.22.63	11.22.63 is an American science fiction ...	9
2	2016	2016 has been designated as: Inte...	8
3	4chan	4chan is an English-language image...	24
4	8chan	8chan, also called Infinitechan, is a...	28

	name	text	cluster	cluster_name
0	0	0 may refer to: 0 (number) , the integer b...	20	HDMI, USB, processor, connector, iPhone, REDIRECT
1	11.22.63	11.22.63 is an American science fiction ...	9	Premis, Manhattan, algorithm, series, TV, synd...
2	2016	2016 has been designated as: Inte...	8	morning, sanctions, bin, Qatar, Afghan, sharia
3	4chan	4chan is an English-language image...	24	film, appeal, Awards, 4chan, Burlesque, 's
4	8chan	8chan, also called Infinitechan, is a...	28	X-Force, Batman, Crusade, Spider-Man, mutants,...
5	Aaliyah	Aaliyah Dana Haughton ( ; January 16,...	24	film, appeal, Awards, 4chan, Burlesque, 's
6	ABBA	ABBA (stylised ᗅᗺᗷᗅ ; ) were a Swed...	1	Eminem, punk, band, bassist, rabbit, guitar
7	Abraham	Abraham ( ; , ), originally Abra...	23	Lebanon, Mauritius, eternal, Nepal, Hinduism, ...
8	Achilles	Achilles and the Nereid Cymothoe: Attic ...	18	Moscow, ROC, 's, Taiwan, prefecture, samurai
9	Adderall	Adderall is a co mbination drug con...	4	ether, methamphetamine, transgender, morphine,...
10	Adele	Adele Laurie Blue Adkins <ref name...	24	film, appeal, Awards, 4chan, Burlesque, 's
11	Aerosmith	Aerosmith is an American rock band, som...	1	Eminem, punk, band, bassist, rabbit, guitar
12	Afghanistan	fobox country Afghanistan ( Pasht...	8	morning, sanctions, bin, Qatar, Afghan, sharia
13	Africa	Map of Africa Africa is the world's s...	11	Mali, caffeine, calculus, glucose, maize, Nigeria
14	Agnosticism	Agnosticism is the view that the ...	2	Aristotle, ontology, metaphysics, 's, epistemo...
15	Airbnb	Airbnb is an online marketplace that ena...	22	Gmail, utility, database, Skype, HTML, JavaScript
16	AK-47	The AK-47 (also known as the Kalashniko...	12	Hollywood, Volkswagen, Bengal, bullying, Compt...
17	Akihito	( ) is the reigning , the 125th Empe...	18	Moscow, ROC, 's, Taiwan, prefecture, samurai
18	Akon	Aliaume Damala Badara Akon Thiam (born ...	27	bonobo, alligator, chimpanzee, crocodile, Zoo,...
19	Al-Qaeda	Al-Qaeda ( or IPAc-en ; , ,...	8	morning, sanctions, bin, Qatar, Afghan, sharia
20	Alabama	Alabama ( ) is a state located ...	10	Michigan, Philadelphia, Boston, Massachusetts,...
21	Alaska	Templa te:Coord Alaska ( ) is a...	10	Michigan, Philadelphia, Boston, Massachusetts,...
22	Albania	Albania ( , ; ; ), officially the ...	10	Michigan, Philadelphia, Boston, Massachusetts,...
23	Alcohol	180px\| Ball-and-stick model of the hy...	11	Mali, caffeine, calculus, glucose, maize, Nigeria
24	Algeria	fobox country Algeria ( ; ...	5	Portugal, Latvia, Tunisia, Greenland, Finland,...
25	Algorithm	Flow chart of an algorithm ( Euclid's a...	9	Premis, Manhattan, algorithm, series, TV, synd...
26	Alligator	An alligator is a crocodilian in the...	27	bonobo, alligator, chimpanzee, crocodile, Zoo,...
27	Alprazolam	Alprazolam or , available under the tr...	3	psychologist, Netflix, Kosovo, schizophrenia, ...
28	Aluminium	Aluminium (in Common wealth English ...	7	erosion, allotrope, nitrogen, aluminium, coppe...
29	Amazon.com	Amazon.com, Inc. ( or ), often referr...	14	Viking, Vancouver, eugenics, Guyana, Walmart, ...
30	Ammonia	Ammonia or azane is a compound of...	26	Jupiter, Saturn, fauna, Biosynthesi, ratio, am...
31	Amphetamine	Amphetamine (contracted from ) is ...	4	ether, methamphetamine, transgender, morphine,...
32	Amsterdam	Amsterdam ( ; ) is the capital ...	16	Venus, Neptune, bisexual, Moses, Amsterdam, Hulk
33	Anberlin	Anberlin was an American rock band forme...	1	Eminem, punk, band, bassist, rabbit, guitar
34	Andorra	Andorra ( ; , ), officially the...	5	Portugal, Latvia, Tunisia, Greenland, Finland,...
35	Angola	Angola , officially the Republic ...	6	Nunavut, leopard, Uganda, Nile, Tanzania, Kenya
36	Anhedonia	Anhedonia ( ; Greek : ἀ ν- an -, "wi...	3	psychologist, Netflix, Kosovo, schizophrenia, ...
37	Animal	Animals are multicellular , euk...	13	pig, pH, shark, horses, whale, elephants
38	Anime	is Japanese hand-drawn or compute...	9	Premis, Manhattan, algorithm, series, TV, synd...
39	Antarctica	Antarctica (US English , UK Englis...	26	Jupiter, Saturn, fauna, Biosynthesi, ratio, am...
40	Anthrax	Anthrax is an infection caused by the b...	15	Anthrax, cough, Panama, Pornhub, syphilis, pne...
41	Anthropology	thumb\|5 volume Encyclopedia of Anthropol...	2	Aristotle, ontology, metaphysics, 's, epistemo...
42	Anubis	Anubis ( or ; ) is the Greek nam...	19	fox, kangaroo, jaguar, Tasmania, marsupial, di...
43	Apartheid	Templat e:Segregation Apartheid (...	5	Portugal, Latvia, Tunisia, Greenland, Finland,...
44	Aphrodite	Aphrodite ( Template :IPAc-en ...	25	Greek, helium, dragon, Athena, Artemis, Zeus
45	Apollo	Apollo ( Attic , Ionic , and...	25	Greek, helium, dragon, Athena, Artemis, Zeus
46	Apple	The apple tree ( Malus domestic...	11	Mali, caffeine, calculus, glucose, maize, Nigeria
47	AR-15	thumb\|Modified AR-15 The AR-15 is a l...	11	Mali, caffeine, calculus, glucose, maize, Nigeria
48	Archimedes	Archimedes of Syracuse ( ; ; BC ...	0	Republican, Julius, Senate, Hamlet, Cicero, Au...
49	Argentina	Argentina ( ; ), officially the A...	5	Portugal, Latvia, Tunisia, Greenland, Finland,...
50	Aristotle	Aristotle ( ; , Aristotélēs ;...	2	Aristotle, ontology, metaphysics, 's, epistemo...
51	Arizona	tate symbols thumb\| Saguaro cactus ...	10	Michigan, Philadelphia, Boston, Massachusetts,...
52	Armadillo	thumb\| Nine-banded armadillo skeleton. ...	19	fox, kangaroo, jaguar, Tasmania, marsupial, di...
53	Armenia	Armenia ( Ac-en , ; , tr. Haya...	21	Kazakhstan, Vienna, Slovakia, Hungarian, Croat...
54	Arrowverse	The Arrowverse is a shared fictional un...	20	HDMI, USB, processor, connector, iPhone, REDIRECT
55	Art	Clockwise from upper left: a self-po...	19	fox, kangaroo, jaguar, Tasmania, marsupial, di...
56	Artemis	Temp late:Ancient Greek religion Art...	25	Greek, helium, dragon, Athena, Artemis, Zeus
57	Arthropod	An arthropod (from Greek arthro- , joint +...	29	masturbation, vulva, penis, clitoris, vaginal,...
58	Aruba	Aruba ( ; ) is a constituent coun...	6	Nunavut, leopard, Uganda, Nile, Tanzania, Kenya
59	Asbestos	right\|thumb\|Asbestos thumb\|Asbestos ...	7	erosion, allotrope, nitrogen, aluminium, coppe...