Artificial Intelligence Publications


In [1]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -d -v -p plotly,pandas


Sebastian Raschka 08/31/2015 

CPython 3.4.3
IPython 4.0.0

plotly 1.8.3
pandas 0.16.2

Preparing the Data


In [2]:
import pandas as pd

templ = pd.read_csv('./data/template.csv')
templ = templ[['COUNTRY', 'CODE']]
templ.columns = ['Country', 'Code']
templ.head()


Out[2]:
Country Code
0 Afghanistan AFG
1 Albania ALB
2 Algeria DZA
3 American Samoa ASM
4 Andorra AND

In [3]:
rank_ai = pd.read_csv('./data/scimagojr_ai_countryrank.csv')
rank_ai.head()


Out[3]:
Rank Country Documents Citable documents Citations Self-Citations Citations per Document H index
0 1 China 80685 0 286809 179188 11.22 150
1 2 United States 65565 0 1063805 339426 23.22 345
2 3 Japan 27049 0 139799 45035 8.44 116
3 4 United Kingdom 22460 0 266440 55920 18.00 174
4 5 Germany 17364 0 146713 30620 14.99 142

In [4]:
rank_ai['Clean Citation/Doc'] = (rank_ai['Citations'] - rank_ai['Self-Citations']) / rank_ai['Documents']
rank_ai=rank_ai[['Country', 
             'Documents', 
             'Citations per Document', 
             'Clean Citation/Doc']]
rank_ai.head()


Out[4]:
Country Documents Citations per Document Clean Citation/Doc
0 China 80685 11.22 1.333841
1 United States 65565 23.22 11.048257
2 Japan 27049 8.44 3.503420
3 United Kingdom 22460 18.00 9.373108
4 Germany 17364 14.99 6.685844

In [5]:
rank_all = pd.read_csv('./data/scimagojr_all_countryrank.csv')
rank_all=rank_all[['Country', 'Documents']]
rank_all.columns = ['Country', 'Documents_all']

rank = rank_ai.merge(rank_all, on='Country')
rank['AI/All Documents'] = rank['Documents'] / rank['Documents_all'] * 100

rank.head()


Out[5]:
Country Documents Citations per Document Clean Citation/Doc Documents_all AI/All Documents
0 China 80685 11.22 1.333841 3617355 2.230497
1 United States 65565 23.22 11.048257 8626193 0.760069
2 Japan 27049 8.44 3.503420 2074872 1.303647
3 United Kingdom 22460 18.00 9.373108 2397817 0.936685
4 Germany 17364 14.99 6.685844 2176860 0.797663

In [6]:
df = rank.merge(templ, on='Country')
df.to_csv('./data/citations.csv', index=False)

Number of "Artificial Intelligence" Publications from 1996 to 2014


In [7]:
import plotly.plotly as py
from plotly.graph_objs import Annotation, Annotations
import pandas as pd

df = pd.read_csv('./data/citations.csv')

data=[dict(
        type='choropleth',
        locations=df['Code'],
        z = df['Documents'],
        text = df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='Number of Publications'))]

layout=dict(
    title='Number of "Artificial Intelligence" Publications from 1996 to 2014',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            xref='paper',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_1.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-1')


Out[7]:

Number of Citations/Publication (self-citations included)


In [8]:
data = [dict(
        type='choropleth',
        locations=df['Code'],
        z=df['Citations per Document'],
        text=df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='Number of Citations/Publication<br>(self-citations included)'))]

layout=dict(
    title='Number Citations per Publication in "Artificial Intelligence" Research from 1996 to 2014'\
           '<br>(self-citations included)',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_2.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-2')


Out[8]:

Number of Citations/Publication
(self-citations excluded)


In [9]:
data = [dict(
        type='choropleth',
        locations=df['Code'],
        z=df['Clean Citation/Doc'],
        text=df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='Number of Citations/Publication<br>(self-citations excluded)'))]

layout=dict(
    title='Number Citations per Publication in "Artificial Intelligence" Research from 1996 to 2014'\
           '<br>(self-citations excluded)',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_3.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-3')


Out[9]:

AI Publications per Total Publications


In [10]:
data = [dict(
        type='choropleth',
        locations=df['Code'],
        z=df['AI/All Documents'],
        text=df['Country'],
        colorscale=[[0,"rgb(5, 10, 172)"],
                    [0.35,"rgb(40, 60, 190)"],
                    [0.5,"rgb(70, 100, 245)"],
                    [0.6,"rgb(90, 120, 245)"],
                    [0.7,"rgb(106, 137, 247)"],
                    [1,"rgb(220, 220, 220)"]],
        autocolorscale=False,
        reversescale=True,
        marker=dict(line=dict(color='rgb(180,180,180)',width=0.5)),
        tick0=0,
        zmin=0,
        dtick=1000,
        colorbar=dict(
            autotick=False,
            tickprefix='',
            title='AI/all publications in percent'))]

layout=dict(
    title='Percentage of Publications in "Artificial Intelligence" Research from 1996 to 2014',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection=dict(type='Mercator')),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=0,
            y=0,
            yanchor='bottom',
            showarrow=False)]))

fig = dict(data=data, layout=layout)
#py.image.save_as({'data': data}, './images/ai_publications_4.svg')
py.iplot(fig, validate=False, filename='ai-publications-chloropleth-4')


Out[10]:

Scatterplots


In [11]:
import plotly.plotly as py
from plotly.graph_objs import *

trace0 = Scatter(
    x=df.loc[df['Documents']>=10000, 'Documents'],
    y=df.loc[df['Documents']>=10000, 'Clean Citation/Doc'],
    mode='markers+text',
    text=df['Country'],
    textposition='top center',
    textfont=Font(family='Arial'),
    marker=Marker(size=12),)


data = Data([trace0])
layout = Layout(
    xaxis=XAxis(autorange=True, title='Number of A.I. documents published'),
    yaxis=YAxis(autorange=True, title='Number of citations (self-citations excluded'),
    legend=Legend(
        y=0.5,
        yref='paper',
        font=Font(size=18)),
    annotations=Annotations([
        Annotation(
            text='Data source: http://www.scimagojr.com<br>'\
                  'Author: Sebastian Raschka<br>'\
                  '(sebastianraschka.com, @rasbt)',
            yref='paper',
            align='left',
            x=-0.5,
            y=-0.2,
            yanchor='bottom',
            showarrow=False,
            font={'size': 7}),
                           
    Annotation(
            text='*Countries with < 10k publications excluded.',
            yref='paper',
            xref='paper',
            align='center',
            y=1.05,
            x=0.5,
            yanchor='bottom',
            showarrow=False,
            font={'size': 12})]),
    title='A.I. Publications By Country* from 1996-2014',)

fig = Figure(data=data, layout=layout)
py.iplot(fig, filename='ai-publications-scatter-1')


Out[11]:

In [ ]:


In [ ]: