In [1]:
from datetime import datetime
start = datetime.utcnow() # For measuring the total processing time
In [2]:
import json
from urllib.request import urlopen
import pandas as pd
import numpy as np
/home/ednilson/.virtualenvs/jupyter/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
In [3]:
AMC_URL = "http://articlemeta.scielo.org/api/v1/collection/identifiers/"
amc_data = pd.DataFrame(json.load(urlopen(AMC_URL)))
print("Number of collections: " + str(amc_data.shape[0]+1))
amc_data.head(2)
Number of collections: 34
Out[3]:
acron
acron2
code
document_count
domain
has_analytics
is_active
journal_count
name
original_name
status
type
0
arg
ar
arg
39006.0
www.scielo.org.ar
True
True
{'deceased': 22, 'current': 125}
{'en': 'Argentina', 'pt': 'Argentina', 'es': '...
Argentina
certified
journals
1
chl
cl
chl
63467.0
www.scielo.cl
True
True
{'deceased': 13, 'suspended': 1, 'current': 105}
{'en': 'Chile', 'pt': 'Chile', 'es': 'Chile'}
Chile
certified
journals
Some collections won't be analyzed, mainly to avoid duplicates
(there are articles in more than one collection).
The spa
(Public Health collection) should have part of it
kept in the result, but it's not a collection
whose journals/articles are assigned to a single country.
The collections below are linked to a single country:
In [4]:
dont_evaluate = ["bio", "cci", "cic", "ecu", "psi", "pry", "rve", "rvo", "rvt", "sss", "spa", "wid"]
amc_names_map = {"code": "collection"}
amc_pairs = amc_data[(amc_data["acron2"].str.len() == 2) &
~amc_data["code"].isin(dont_evaluate)]\
.rename(columns=amc_names_map)\
# "~amc_data["code"].isin(dont_evaluate)]" is denying the list "dont_evaluate"
print("Number of collections: " + str(amc_pairs.shape[0]+1))
collections = amc_pairs[['collection']].copy()
collections
Number of collections: 15
Out[4]:
collection
0
arg
1
chl
2
col
3
cub
4
esp
5
mex
6
prt
8
scl
11
sza
12
ven
14
bol
15
cri
16
per
19
ury
These journals in the spa
collection have the following countries:
In [5]:
spa_issn_country = pd.DataFrame([
("0021-2571"),
("0042-9686"),
("1020-4989"),
("1555-7960"),
], columns=["issn"])
spa_issn_country # For collection = "spa", only!
Out[5]:
issn
0
0021-2571
1
0042-9686
2
1020-4989
3
1555-7960
This dataset is the Network spreadsheet/CSV pack which can be found in the SciELO Analytics report web page. The first two rows of it are:
In [6]:
import zipfile
# Use the Zip file in jcatalog/data/scielo
with zipfile.ZipFile('../../data/scielo/tabs_network_190210.zip', 'r') as zip_ref:
zip_ref.extract('documents_languages.csv', 'csv_files')
In [7]:
dataset = pd.read_csv("csv_files/documents_languages.csv", keep_default_na=False)
dataset.shape
Out[7]:
(877068, 26)
In [8]:
names_map = {
"ISSN SciELO": "issn",
"collection": "collection",
"title at SciELO": "title",
"document publishing ID (PID SciELO)": "docs",
"document type":"type",
"document languages": "languages",
"document is citable": "is_citable",
"document publishing year": "year",
"document pt": "document_pt",
"document es": "document_es",
"document en": "document_en",
"document other languages": "document_other_languages"
}
df0 = dataset[list(names_map.keys())].rename(columns=names_map)
df0.head(2)
Out[8]:
issn
collection
title
docs
type
languages
is_citable
year
document_pt
document_es
document_en
document_other_languages
0
0100-879X
scl
Brazilian Journal of Medical and Biological Re...
S0100-879X1998000800006
research-article
en
1
1998
0
0
1
0
1
0100-879X
scl
Brazilian Journal of Medical and Biological Re...
S0100-879X1998000800011
rapid-communication
en
1
1998
0
0
1
0
In [9]:
df = pd.concat([
pd.merge(df0[df0["collection"] != "spa"], collections, how="inner", on="collection"),
pd.merge(df0[df0["collection"] == "spa"], spa_issn_country, how="inner", on="issn"),
])
In [10]:
df.head(2)
Out[10]:
issn
collection
title
docs
type
languages
is_citable
year
document_pt
document_es
document_en
document_other_languages
0
0100-879X
scl
Brazilian Journal of Medical and Biological Re...
S0100-879X1998000800006
research-article
en
1
1998
0
0
1
0
1
0100-879X
scl
Brazilian Journal of Medical and Biological Re...
S0100-879X1998000800011
rapid-communication
en
1
1998
0
0
1
0
In [11]:
# compare
df0.shape
Out[11]:
(877068, 12)
In [12]:
df.shape
Out[12]:
(793648, 12)
In [13]:
set(df.collection)
Out[13]:
{'arg',
'bol',
'chl',
'col',
'cri',
'cub',
'esp',
'mex',
'per',
'prt',
'scl',
'spa',
'sza',
'ury',
'ven'}
In [14]:
# df["pub_year"] = np.where(df['year'] <= 1996, 'ate_1996', df["year"])
df["pub_year"] = np.where(df['year'] <= 1996, 'anterior', df["year"])
In [15]:
df["tipo_review"] = np.where(df['type'] == "review-article", 1, 0)
In [16]:
df["citable_pt"] = np.where((df['document_pt'] == 1) & (df['is_citable'] == 1), 1, 0)
df["citable_es"] = np.where((df['document_es'] == 1) & (df['is_citable'] == 1), 1, 0)
df["citable_en"] = np.where((df['document_en'] == 1) & (df['is_citable'] == 1), 1, 0)
df["citable_other_lang"] = np.where((df['document_other_languages'] == 1) & (df['is_citable'] == 1), 1, 0)
In [17]:
df['sum_to_2_more_lang'] = np.sum([df['document_en'], df['document_pt'], df['document_es'], df['document_other_languages']], axis=0)
df[(df['sum_to_2_more_lang'] == 3)].T
Out[17]:
31760
32728
34448
36347
37268
37270
37272
39283
42191
42453
...
681017
681019
681020
681025
681028
681029
2915
3247
3249
3257
issn
0104-1169
0104-1169
0104-1169
0104-1169
0104-1169
0104-1169
0104-1169
0104-1169
0104-1169
0104-1169
...
2007-6835
2007-6835
2007-6835
2007-6835
2007-6835
2007-6835
1020-4989
1020-4989
1020-4989
1020-4989
collection
scl
scl
scl
scl
scl
scl
scl
scl
scl
scl
...
mex
mex
mex
mex
mex
mex
spa
spa
spa
spa
title
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
Revista Latino-Americana de Enfermagem
...
Revista ALCONPAT
Revista ALCONPAT
Revista ALCONPAT
Revista ALCONPAT
Revista ALCONPAT
Revista ALCONPAT
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
docs
S0104-11692003000400001
S0104-11692003000600001
S0104-11692004000200001
S0104-11692003000500001
S0104-11692004000700001
S0104-11692004000700002
S0104-11692004000700003
S0104-11692004000400001
S0104-11692004000500001
S0104-11692004000600001
...
S2007-68352015000200138
S2007-68352015000300162
S2007-68352015000300190
S2007-68352015000300203
S2007-68352015000200097
S2007-68352015000200151
S1020-49892012001000007
S1020-49892016000800080
S1020-49892016000800076
S1020-49892016000800078
type
editorial
editorial
editorial
editorial
editorial
research-article
research-article
editorial
editorial
editorial
...
review-article
research-article
research-article
research-article
research-article
review-article
research-article
undefined
editorial
editorial
languages
en;pt;es
en;pt;es
en;pt;es
en;pt;es
en;pt;es
en;pt;es
en;pt;es
en;pt;es
en;pt;es
en;pt;es
...
en;es;pt
en;es;pt
en;es;pt
en;es;pt
en;es;pt
en;es;pt
en;pt;es
fr;en;es
fr;en;es
fr;en;es
is_citable
0
0
0
0
0
1
1
0
0
0
...
1
1
1
1
1
1
1
0
0
0
year
2003
2003
2004
2003
2004
2004
2004
2004
2004
2004
...
2015
2015
2015
2015
2015
2015
2012
2016
2016
2016
document_pt
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
0
0
0
document_es
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
document_en
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
document_other_languages
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
1
1
1
pub_year
2003
2003
2004
2003
2004
2004
2004
2004
2004
2004
...
2015
2015
2015
2015
2015
2015
2012
2016
2016
2016
tipo_review
0
0
0
0
0
0
0
0
0
0
...
1
0
0
0
0
1
0
0
0
0
citable_pt
0
0
0
0
0
1
1
0
0
0
...
1
1
1
1
1
1
1
0
0
0
citable_es
0
0
0
0
0
1
1
0
0
0
...
1
1
1
1
1
1
1
0
0
0
citable_en
0
0
0
0
0
1
1
0
0
0
...
1
1
1
1
1
1
1
0
0
0
citable_other_lang
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
sum_to_2_more_lang
3
3
3
3
3
3
3
3
3
3
...
3
3
3
3
3
3
3
3
3
3
19 rows × 4327 columns
In [18]:
df["doc_2_more_lang"] = np.where(((df['sum_to_2_more_lang']) >= 2), 1, 0)
df["citable_doc_2_more_lang"] = np.where((df['sum_to_2_more_lang'] >= 2) & (df['is_citable'] == 1), 1, 0)
# remove sum_to_2_more_lang column
del df['sum_to_2_more_lang']
df[(df["doc_2_more_lang"] == 1)].T
Out[18]:
11706
11710
11711
11713
11718
11721
11725
31273
31274
31275
...
3135
3247
3249
3256
3257
3320
3325
3479
3485
3668
issn
0102-311X
0102-311X
0102-311X
0102-311X
0102-311X
0102-311X
0102-311X
0066-782X
0066-782X
0066-782X
...
1020-4989
1020-4989
1020-4989
1020-4989
1020-4989
1020-4989
1020-4989
1020-4989
1020-4989
1020-4989
collection
scl
scl
scl
scl
scl
scl
scl
scl
scl
scl
...
spa
spa
spa
spa
spa
spa
spa
spa
spa
spa
title
Cadernos de Saúde Pública
Cadernos de Saúde Pública
Cadernos de Saúde Pública
Cadernos de Saúde Pública
Cadernos de Saúde Pública
Cadernos de Saúde Pública
Cadernos de Saúde Pública
Arquivos Brasileiros de Cardiologia
Arquivos Brasileiros de Cardiologia
Arquivos Brasileiros de Cardiologia
...
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
Revista Panamericana de Salud Pública
docs
S0102-311X1998000200011
S0102-311X1998000200015
S0102-311X1998000200016
S0102-311X1998000200018
S0102-311X1998000200024
S0102-311X1998000200003
S0102-311X1998000200007
S0066-782X2003001400001
S0066-782X2003001400004
S0066-782X2003001400005
...
S1020-49892015000600001
S1020-49892016000800080
S1020-49892016000800076
S1020-49892016000800085
S1020-49892016000800078
S1020-49892016000500215
S1020-49892016000500213
S1020-49892018000100101
S1020-49892018000100100
S1020-49892018000100106
type
research-article
research-article
research-article
brief-report
article-commentary
research-article
research-article
research-article
research-article
research-article
...
research-article
undefined
editorial
undefined
editorial
undefined
editorial
editorial
editorial
editorial
languages
en;pt
pt;es
pt;es
en;pt
es;pt
en;pt
pt;es
en;pt
en;pt
en;pt
...
en;es
fr;en;es
fr;en;es
en;es
fr;en;es
en;es
en;es
en;es
en;es
en;es
is_citable
1
1
1
1
1
1
1
1
1
1
...
1
0
0
0
0
0
0
0
0
0
year
1998
1998
1998
1998
1998
1998
1998
2003
2003
2003
...
2015
2016
2016
2016
2016
2016
2016
2018
2018
2018
document_pt
1
1
1
1
1
1
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
document_es
0
1
1
0
1
0
1
0
0
0
...
1
1
1
1
1
1
1
1
1
1
document_en
1
0
0
1
0
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
document_other_languages
0
0
0
0
0
0
0
0
0
0
...
0
1
1
0
1
0
0
0
0
0
pub_year
1998
1998
1998
1998
1998
1998
1998
2003
2003
2003
...
2015
2016
2016
2016
2016
2016
2016
2018
2018
2018
tipo_review
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_pt
1
1
1
1
1
1
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
citable_es
0
1
1
0
1
0
1
0
0
0
...
1
0
0
0
0
0
0
0
0
0
citable_en
1
0
0
1
0
1
0
1
1
1
...
1
0
0
0
0
0
0
0
0
0
citable_other_lang
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
doc_2_more_lang
1
1
1
1
1
1
1
1
1
1
...
1
1
1
1
1
1
1
1
1
1
citable_doc_2_more_lang
1
1
1
1
1
1
1
1
1
1
...
1
0
0
0
0
0
0
0
0
0
20 rows × 50703 columns
In [ ]:
In [19]:
df.columns
Out[19]:
Index(['issn', 'collection', 'title', 'docs', 'type', 'languages',
'is_citable', 'year', 'document_pt', 'document_es', 'document_en',
'document_other_languages', 'pub_year', 'tipo_review', 'citable_pt',
'citable_es', 'citable_en', 'citable_other_lang', 'doc_2_more_lang',
'citable_doc_2_more_lang'],
dtype='object')
In [20]:
values_list = [
"docs",
"is_citable",
"tipo_review",
"document_pt",
"document_es",
"document_en",
"document_other_languages",
"doc_2_more_lang",
"citable_pt",
"citable_es",
"citable_en",
"citable_other_lang",
"citable_doc_2_more_lang"]
td = df.pivot_table(
index=["issn"],
values=values_list,
columns=["pub_year"],
aggfunc=np.count_nonzero,
fill_value=0)
In [21]:
td[:12].T
Out[21]:
issn
0001-3714
0001-3765
0001-6002
0001-6365
0002-0591
0002-192X
0002-7014
0003-2573
0004-0592
0004-0614
0004-0622
0004-0649
pub_year
citable_doc_2_more_lang
1997
0
0
0
0
0
0
0
0
0
0
0
0
1998
0
0
0
0
0
0
0
0
0
0
0
0
1999
0
0
0
0
0
0
0
0
0
0
0
0
2000
0
0
0
0
0
0
0
0
0
0
0
0
2001
0
0
0
0
0
0
0
0
0
0
0
0
2002
0
0
0
0
0
0
0
0
0
0
0
0
2003
0
0
0
0
0
0
0
0
0
0
0
0
2004
0
0
0
0
0
0
0
0
0
0
0
0
2005
0
0
0
0
0
0
0
0
0
0
0
0
2006
0
0
0
0
0
0
0
0
0
0
0
0
2007
0
0
0
0
0
0
0
0
0
0
0
0
2008
0
0
0
0
0
0
0
0
0
0
0
0
2009
0
0
0
0
0
0
0
0
0
0
0
0
2010
0
0
0
0
0
0
0
0
0
0
0
0
2011
0
0
7
0
0
0
0
0
0
0
0
0
2012
0
0
26
0
0
0
0
0
0
0
0
0
2013
0
0
14
0
0
0
0
0
0
0
0
0
2014
0
0
0
0
0
0
0
0
0
0
0
0
2015
0
0
0
0
0
0
0
0
0
0
0
0
2016
0
0
0
0
0
0
0
0
0
0
0
0
2017
0
0
0
0
0
0
0
0
0
0
0
0
2018
0
0
0
0
0
0
0
0
0
0
0
0
2019
0
0
0
0
0
0
0
0
0
0
0
0
anterior
0
0
0
0
0
0
0
0
0
0
0
0
citable_en
1997
0
0
0
0
0
0
0
0
0
0
0
0
1998
33
0
0
0
0
0
0
0
0
0
0
0
1999
64
0
0
0
0
0
0
0
0
0
0
0
2000
0
64
0
0
0
0
0
0
0
0
7
0
2001
0
74
0
0
0
0
0
0
0
0
22
0
2002
0
51
0
0
0
0
0
0
0
0
4
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
is_citable
2015
0
185
36
0
0
0
0
31
0
0
31
23
2016
0
200
29
0
0
0
0
29
0
0
40
22
2017
0
286
25
0
0
0
0
25
0
0
19
10
2018
0
287
17
0
0
0
0
25
0
0
0
0
2019
0
1
4
0
0
0
0
0
0
0
0
0
anterior
0
0
0
0
0
0
0
0
0
0
0
0
tipo_review
1997
0
0
0
0
0
0
0
0
0
0
0
0
1998
1
0
0
0
0
0
0
0
0
0
0
0
1999
2
0
0
0
0
0
0
0
0
0
0
0
2000
0
0
3
0
0
0
0
0
0
0
0
0
2001
0
0
5
19
0
0
0
0
0
0
0
0
2002
0
0
7
14
0
0
0
0
0
0
0
0
2003
0
0
7
0
0
0
0
0
0
0
0
0
2004
0
0
9
7
0
0
0
21
0
0
0
0
2005
0
0
4
8
0
0
0
3
0
4
0
0
2006
0
0
8
8
0
0
0
7
0
1
0
0
2007
0
0
8
3
0
0
0
12
0
13
0
2
2008
0
0
8
4
0
0
1
19
0
32
0
1
2009
0
0
6
20
0
0
0
5
0
0
0
1
2010
0
0
3
7
0
0
0
0
0
13
0
0
2011
0
0
6
0
0
0
0
6
0
0
0
0
2012
0
0
6
0
0
0
0
2
0
0
0
0
2013
0
0
4
0
0
0
0
0
0
0
0
0
2014
0
0
2
0
0
0
0
0
0
0
0
0
2015
0
0
4
0
0
0
0
0
0
0
0
2
2016
0
0
2
0
0
0
0
0
0
0
0
0
2017
0
0
5
0
0
0
0
0
0
0
0
2
2018
0
0
3
0
0
0
0
0
0
0
0
0
2019
0
0
2
0
0
0
0
0
0
0
0
0
anterior
0
0
0
0
0
0
0
0
0
0
0
0
312 rows × 12 columns
In [22]:
td.columns.levels
Out[22]:
FrozenList([['citable_doc_2_more_lang', 'citable_en', 'citable_es', 'citable_other_lang', 'citable_pt', 'doc_2_more_lang', 'docs', 'document_en', 'document_es', 'document_other_languages', 'document_pt', 'is_citable', 'tipo_review'], ['1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', 'anterior']])
In [23]:
td.keys()
for k in td.keys():
print(k)
('citable_doc_2_more_lang', '1997')
('citable_doc_2_more_lang', '1998')
('citable_doc_2_more_lang', '1999')
('citable_doc_2_more_lang', '2000')
('citable_doc_2_more_lang', '2001')
('citable_doc_2_more_lang', '2002')
('citable_doc_2_more_lang', '2003')
('citable_doc_2_more_lang', '2004')
('citable_doc_2_more_lang', '2005')
('citable_doc_2_more_lang', '2006')
('citable_doc_2_more_lang', '2007')
('citable_doc_2_more_lang', '2008')
('citable_doc_2_more_lang', '2009')
('citable_doc_2_more_lang', '2010')
('citable_doc_2_more_lang', '2011')
('citable_doc_2_more_lang', '2012')
('citable_doc_2_more_lang', '2013')
('citable_doc_2_more_lang', '2014')
('citable_doc_2_more_lang', '2015')
('citable_doc_2_more_lang', '2016')
('citable_doc_2_more_lang', '2017')
('citable_doc_2_more_lang', '2018')
('citable_doc_2_more_lang', '2019')
('citable_doc_2_more_lang', 'anterior')
('citable_en', '1997')
('citable_en', '1998')
('citable_en', '1999')
('citable_en', '2000')
('citable_en', '2001')
('citable_en', '2002')
('citable_en', '2003')
('citable_en', '2004')
('citable_en', '2005')
('citable_en', '2006')
('citable_en', '2007')
('citable_en', '2008')
('citable_en', '2009')
('citable_en', '2010')
('citable_en', '2011')
('citable_en', '2012')
('citable_en', '2013')
('citable_en', '2014')
('citable_en', '2015')
('citable_en', '2016')
('citable_en', '2017')
('citable_en', '2018')
('citable_en', '2019')
('citable_en', 'anterior')
('citable_es', '1997')
('citable_es', '1998')
('citable_es', '1999')
('citable_es', '2000')
('citable_es', '2001')
('citable_es', '2002')
('citable_es', '2003')
('citable_es', '2004')
('citable_es', '2005')
('citable_es', '2006')
('citable_es', '2007')
('citable_es', '2008')
('citable_es', '2009')
('citable_es', '2010')
('citable_es', '2011')
('citable_es', '2012')
('citable_es', '2013')
('citable_es', '2014')
('citable_es', '2015')
('citable_es', '2016')
('citable_es', '2017')
('citable_es', '2018')
('citable_es', '2019')
('citable_es', 'anterior')
('citable_other_lang', '1997')
('citable_other_lang', '1998')
('citable_other_lang', '1999')
('citable_other_lang', '2000')
('citable_other_lang', '2001')
('citable_other_lang', '2002')
('citable_other_lang', '2003')
('citable_other_lang', '2004')
('citable_other_lang', '2005')
('citable_other_lang', '2006')
('citable_other_lang', '2007')
('citable_other_lang', '2008')
('citable_other_lang', '2009')
('citable_other_lang', '2010')
('citable_other_lang', '2011')
('citable_other_lang', '2012')
('citable_other_lang', '2013')
('citable_other_lang', '2014')
('citable_other_lang', '2015')
('citable_other_lang', '2016')
('citable_other_lang', '2017')
('citable_other_lang', '2018')
('citable_other_lang', '2019')
('citable_other_lang', 'anterior')
('citable_pt', '1997')
('citable_pt', '1998')
('citable_pt', '1999')
('citable_pt', '2000')
('citable_pt', '2001')
('citable_pt', '2002')
('citable_pt', '2003')
('citable_pt', '2004')
('citable_pt', '2005')
('citable_pt', '2006')
('citable_pt', '2007')
('citable_pt', '2008')
('citable_pt', '2009')
('citable_pt', '2010')
('citable_pt', '2011')
('citable_pt', '2012')
('citable_pt', '2013')
('citable_pt', '2014')
('citable_pt', '2015')
('citable_pt', '2016')
('citable_pt', '2017')
('citable_pt', '2018')
('citable_pt', '2019')
('citable_pt', 'anterior')
('doc_2_more_lang', '1997')
('doc_2_more_lang', '1998')
('doc_2_more_lang', '1999')
('doc_2_more_lang', '2000')
('doc_2_more_lang', '2001')
('doc_2_more_lang', '2002')
('doc_2_more_lang', '2003')
('doc_2_more_lang', '2004')
('doc_2_more_lang', '2005')
('doc_2_more_lang', '2006')
('doc_2_more_lang', '2007')
('doc_2_more_lang', '2008')
('doc_2_more_lang', '2009')
('doc_2_more_lang', '2010')
('doc_2_more_lang', '2011')
('doc_2_more_lang', '2012')
('doc_2_more_lang', '2013')
('doc_2_more_lang', '2014')
('doc_2_more_lang', '2015')
('doc_2_more_lang', '2016')
('doc_2_more_lang', '2017')
('doc_2_more_lang', '2018')
('doc_2_more_lang', '2019')
('doc_2_more_lang', 'anterior')
('docs', '1997')
('docs', '1998')
('docs', '1999')
('docs', '2000')
('docs', '2001')
('docs', '2002')
('docs', '2003')
('docs', '2004')
('docs', '2005')
('docs', '2006')
('docs', '2007')
('docs', '2008')
('docs', '2009')
('docs', '2010')
('docs', '2011')
('docs', '2012')
('docs', '2013')
('docs', '2014')
('docs', '2015')
('docs', '2016')
('docs', '2017')
('docs', '2018')
('docs', '2019')
('docs', 'anterior')
('document_en', '1997')
('document_en', '1998')
('document_en', '1999')
('document_en', '2000')
('document_en', '2001')
('document_en', '2002')
('document_en', '2003')
('document_en', '2004')
('document_en', '2005')
('document_en', '2006')
('document_en', '2007')
('document_en', '2008')
('document_en', '2009')
('document_en', '2010')
('document_en', '2011')
('document_en', '2012')
('document_en', '2013')
('document_en', '2014')
('document_en', '2015')
('document_en', '2016')
('document_en', '2017')
('document_en', '2018')
('document_en', '2019')
('document_en', 'anterior')
('document_es', '1997')
('document_es', '1998')
('document_es', '1999')
('document_es', '2000')
('document_es', '2001')
('document_es', '2002')
('document_es', '2003')
('document_es', '2004')
('document_es', '2005')
('document_es', '2006')
('document_es', '2007')
('document_es', '2008')
('document_es', '2009')
('document_es', '2010')
('document_es', '2011')
('document_es', '2012')
('document_es', '2013')
('document_es', '2014')
('document_es', '2015')
('document_es', '2016')
('document_es', '2017')
('document_es', '2018')
('document_es', '2019')
('document_es', 'anterior')
('document_other_languages', '1997')
('document_other_languages', '1998')
('document_other_languages', '1999')
('document_other_languages', '2000')
('document_other_languages', '2001')
('document_other_languages', '2002')
('document_other_languages', '2003')
('document_other_languages', '2004')
('document_other_languages', '2005')
('document_other_languages', '2006')
('document_other_languages', '2007')
('document_other_languages', '2008')
('document_other_languages', '2009')
('document_other_languages', '2010')
('document_other_languages', '2011')
('document_other_languages', '2012')
('document_other_languages', '2013')
('document_other_languages', '2014')
('document_other_languages', '2015')
('document_other_languages', '2016')
('document_other_languages', '2017')
('document_other_languages', '2018')
('document_other_languages', '2019')
('document_other_languages', 'anterior')
('document_pt', '1997')
('document_pt', '1998')
('document_pt', '1999')
('document_pt', '2000')
('document_pt', '2001')
('document_pt', '2002')
('document_pt', '2003')
('document_pt', '2004')
('document_pt', '2005')
('document_pt', '2006')
('document_pt', '2007')
('document_pt', '2008')
('document_pt', '2009')
('document_pt', '2010')
('document_pt', '2011')
('document_pt', '2012')
('document_pt', '2013')
('document_pt', '2014')
('document_pt', '2015')
('document_pt', '2016')
('document_pt', '2017')
('document_pt', '2018')
('document_pt', '2019')
('document_pt', 'anterior')
('is_citable', '1997')
('is_citable', '1998')
('is_citable', '1999')
('is_citable', '2000')
('is_citable', '2001')
('is_citable', '2002')
('is_citable', '2003')
('is_citable', '2004')
('is_citable', '2005')
('is_citable', '2006')
('is_citable', '2007')
('is_citable', '2008')
('is_citable', '2009')
('is_citable', '2010')
('is_citable', '2011')
('is_citable', '2012')
('is_citable', '2013')
('is_citable', '2014')
('is_citable', '2015')
('is_citable', '2016')
('is_citable', '2017')
('is_citable', '2018')
('is_citable', '2019')
('is_citable', 'anterior')
('tipo_review', '1997')
('tipo_review', '1998')
('tipo_review', '1999')
('tipo_review', '2000')
('tipo_review', '2001')
('tipo_review', '2002')
('tipo_review', '2003')
('tipo_review', '2004')
('tipo_review', '2005')
('tipo_review', '2006')
('tipo_review', '2007')
('tipo_review', '2008')
('tipo_review', '2009')
('tipo_review', '2010')
('tipo_review', '2011')
('tipo_review', '2012')
('tipo_review', '2013')
('tipo_review', '2014')
('tipo_review', '2015')
('tipo_review', '2016')
('tipo_review', '2017')
('tipo_review', '2018')
('tipo_review', '2019')
('tipo_review', 'anterior')
In [24]:
newlabel = []
for k in td.keys():
newlabel.append(k[0]+'_'+k[1])
In [25]:
newlabel
Out[25]:
['citable_doc_2_more_lang_1997',
'citable_doc_2_more_lang_1998',
'citable_doc_2_more_lang_1999',
'citable_doc_2_more_lang_2000',
'citable_doc_2_more_lang_2001',
'citable_doc_2_more_lang_2002',
'citable_doc_2_more_lang_2003',
'citable_doc_2_more_lang_2004',
'citable_doc_2_more_lang_2005',
'citable_doc_2_more_lang_2006',
'citable_doc_2_more_lang_2007',
'citable_doc_2_more_lang_2008',
'citable_doc_2_more_lang_2009',
'citable_doc_2_more_lang_2010',
'citable_doc_2_more_lang_2011',
'citable_doc_2_more_lang_2012',
'citable_doc_2_more_lang_2013',
'citable_doc_2_more_lang_2014',
'citable_doc_2_more_lang_2015',
'citable_doc_2_more_lang_2016',
'citable_doc_2_more_lang_2017',
'citable_doc_2_more_lang_2018',
'citable_doc_2_more_lang_2019',
'citable_doc_2_more_lang_anterior',
'citable_en_1997',
'citable_en_1998',
'citable_en_1999',
'citable_en_2000',
'citable_en_2001',
'citable_en_2002',
'citable_en_2003',
'citable_en_2004',
'citable_en_2005',
'citable_en_2006',
'citable_en_2007',
'citable_en_2008',
'citable_en_2009',
'citable_en_2010',
'citable_en_2011',
'citable_en_2012',
'citable_en_2013',
'citable_en_2014',
'citable_en_2015',
'citable_en_2016',
'citable_en_2017',
'citable_en_2018',
'citable_en_2019',
'citable_en_anterior',
'citable_es_1997',
'citable_es_1998',
'citable_es_1999',
'citable_es_2000',
'citable_es_2001',
'citable_es_2002',
'citable_es_2003',
'citable_es_2004',
'citable_es_2005',
'citable_es_2006',
'citable_es_2007',
'citable_es_2008',
'citable_es_2009',
'citable_es_2010',
'citable_es_2011',
'citable_es_2012',
'citable_es_2013',
'citable_es_2014',
'citable_es_2015',
'citable_es_2016',
'citable_es_2017',
'citable_es_2018',
'citable_es_2019',
'citable_es_anterior',
'citable_other_lang_1997',
'citable_other_lang_1998',
'citable_other_lang_1999',
'citable_other_lang_2000',
'citable_other_lang_2001',
'citable_other_lang_2002',
'citable_other_lang_2003',
'citable_other_lang_2004',
'citable_other_lang_2005',
'citable_other_lang_2006',
'citable_other_lang_2007',
'citable_other_lang_2008',
'citable_other_lang_2009',
'citable_other_lang_2010',
'citable_other_lang_2011',
'citable_other_lang_2012',
'citable_other_lang_2013',
'citable_other_lang_2014',
'citable_other_lang_2015',
'citable_other_lang_2016',
'citable_other_lang_2017',
'citable_other_lang_2018',
'citable_other_lang_2019',
'citable_other_lang_anterior',
'citable_pt_1997',
'citable_pt_1998',
'citable_pt_1999',
'citable_pt_2000',
'citable_pt_2001',
'citable_pt_2002',
'citable_pt_2003',
'citable_pt_2004',
'citable_pt_2005',
'citable_pt_2006',
'citable_pt_2007',
'citable_pt_2008',
'citable_pt_2009',
'citable_pt_2010',
'citable_pt_2011',
'citable_pt_2012',
'citable_pt_2013',
'citable_pt_2014',
'citable_pt_2015',
'citable_pt_2016',
'citable_pt_2017',
'citable_pt_2018',
'citable_pt_2019',
'citable_pt_anterior',
'doc_2_more_lang_1997',
'doc_2_more_lang_1998',
'doc_2_more_lang_1999',
'doc_2_more_lang_2000',
'doc_2_more_lang_2001',
'doc_2_more_lang_2002',
'doc_2_more_lang_2003',
'doc_2_more_lang_2004',
'doc_2_more_lang_2005',
'doc_2_more_lang_2006',
'doc_2_more_lang_2007',
'doc_2_more_lang_2008',
'doc_2_more_lang_2009',
'doc_2_more_lang_2010',
'doc_2_more_lang_2011',
'doc_2_more_lang_2012',
'doc_2_more_lang_2013',
'doc_2_more_lang_2014',
'doc_2_more_lang_2015',
'doc_2_more_lang_2016',
'doc_2_more_lang_2017',
'doc_2_more_lang_2018',
'doc_2_more_lang_2019',
'doc_2_more_lang_anterior',
'docs_1997',
'docs_1998',
'docs_1999',
'docs_2000',
'docs_2001',
'docs_2002',
'docs_2003',
'docs_2004',
'docs_2005',
'docs_2006',
'docs_2007',
'docs_2008',
'docs_2009',
'docs_2010',
'docs_2011',
'docs_2012',
'docs_2013',
'docs_2014',
'docs_2015',
'docs_2016',
'docs_2017',
'docs_2018',
'docs_2019',
'docs_anterior',
'document_en_1997',
'document_en_1998',
'document_en_1999',
'document_en_2000',
'document_en_2001',
'document_en_2002',
'document_en_2003',
'document_en_2004',
'document_en_2005',
'document_en_2006',
'document_en_2007',
'document_en_2008',
'document_en_2009',
'document_en_2010',
'document_en_2011',
'document_en_2012',
'document_en_2013',
'document_en_2014',
'document_en_2015',
'document_en_2016',
'document_en_2017',
'document_en_2018',
'document_en_2019',
'document_en_anterior',
'document_es_1997',
'document_es_1998',
'document_es_1999',
'document_es_2000',
'document_es_2001',
'document_es_2002',
'document_es_2003',
'document_es_2004',
'document_es_2005',
'document_es_2006',
'document_es_2007',
'document_es_2008',
'document_es_2009',
'document_es_2010',
'document_es_2011',
'document_es_2012',
'document_es_2013',
'document_es_2014',
'document_es_2015',
'document_es_2016',
'document_es_2017',
'document_es_2018',
'document_es_2019',
'document_es_anterior',
'document_other_languages_1997',
'document_other_languages_1998',
'document_other_languages_1999',
'document_other_languages_2000',
'document_other_languages_2001',
'document_other_languages_2002',
'document_other_languages_2003',
'document_other_languages_2004',
'document_other_languages_2005',
'document_other_languages_2006',
'document_other_languages_2007',
'document_other_languages_2008',
'document_other_languages_2009',
'document_other_languages_2010',
'document_other_languages_2011',
'document_other_languages_2012',
'document_other_languages_2013',
'document_other_languages_2014',
'document_other_languages_2015',
'document_other_languages_2016',
'document_other_languages_2017',
'document_other_languages_2018',
'document_other_languages_2019',
'document_other_languages_anterior',
'document_pt_1997',
'document_pt_1998',
'document_pt_1999',
'document_pt_2000',
'document_pt_2001',
'document_pt_2002',
'document_pt_2003',
'document_pt_2004',
'document_pt_2005',
'document_pt_2006',
'document_pt_2007',
'document_pt_2008',
'document_pt_2009',
'document_pt_2010',
'document_pt_2011',
'document_pt_2012',
'document_pt_2013',
'document_pt_2014',
'document_pt_2015',
'document_pt_2016',
'document_pt_2017',
'document_pt_2018',
'document_pt_2019',
'document_pt_anterior',
'is_citable_1997',
'is_citable_1998',
'is_citable_1999',
'is_citable_2000',
'is_citable_2001',
'is_citable_2002',
'is_citable_2003',
'is_citable_2004',
'is_citable_2005',
'is_citable_2006',
'is_citable_2007',
'is_citable_2008',
'is_citable_2009',
'is_citable_2010',
'is_citable_2011',
'is_citable_2012',
'is_citable_2013',
'is_citable_2014',
'is_citable_2015',
'is_citable_2016',
'is_citable_2017',
'is_citable_2018',
'is_citable_2019',
'is_citable_anterior',
'tipo_review_1997',
'tipo_review_1998',
'tipo_review_1999',
'tipo_review_2000',
'tipo_review_2001',
'tipo_review_2002',
'tipo_review_2003',
'tipo_review_2004',
'tipo_review_2005',
'tipo_review_2006',
'tipo_review_2007',
'tipo_review_2008',
'tipo_review_2009',
'tipo_review_2010',
'tipo_review_2011',
'tipo_review_2012',
'tipo_review_2013',
'tipo_review_2014',
'tipo_review_2015',
'tipo_review_2016',
'tipo_review_2017',
'tipo_review_2018',
'tipo_review_2019',
'tipo_review_anterior']
In [26]:
newlabel[::24]
Out[26]:
['citable_doc_2_more_lang_1997',
'citable_en_1997',
'citable_es_1997',
'citable_other_lang_1997',
'citable_pt_1997',
'doc_2_more_lang_1997',
'docs_1997',
'document_en_1997',
'document_es_1997',
'document_other_languages_1997',
'document_pt_1997',
'is_citable_1997',
'tipo_review_1997']
In [27]:
td.columns = newlabel
In [28]:
td.T
Out[28]:
issn
0001-3714
0001-3765
0001-6002
0001-6365
0002-0591
0002-192X
0002-7014
0003-2573
0004-0592
0004-0614
...
2504-3145
2518-4431
2520-9868
2526-8910
2531-0488
2531-1379
2545-7756
2594-1321
2595-3192
2619-6573
citable_doc_2_more_lang_1997
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_1998
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_1999
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2000
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2001
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2002
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2003
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2004
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2005
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2006
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2007
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2008
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2009
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2010
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2011
0
0
7
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2012
0
0
26
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2013
0
0
14
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2014
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2015
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2016
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2017
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_2018
0
0
0
0
0
0
0
0
0
0
...
0
0
0
37
0
0
0
0
57
0
citable_doc_2_more_lang_2019
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_doc_2_more_lang_anterior
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_en_1997
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_en_1998
33
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_en_1999
64
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_en_2000
0
64
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_en_2001
0
74
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
citable_en_2002
0
51
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
is_citable_2015
0
185
36
0
0
0
0
31
0
0
...
0
12
0
0
0
0
6
0
0
0
is_citable_2016
0
200
29
0
0
0
0
29
0
0
...
0
14
0
0
0
0
6
0
0
0
is_citable_2017
0
286
25
0
0
0
0
25
0
0
...
21
8
26
0
0
0
8
0
0
17
is_citable_2018
0
287
17
0
0
0
0
25
0
0
...
0
0
13
57
42
63
5
8
66
10
is_citable_2019
0
1
4
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
is_citable_anterior
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_1997
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_1998
1
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_1999
2
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2000
0
0
3
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2001
0
0
5
19
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2002
0
0
7
14
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2003
0
0
7
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2004
0
0
9
7
0
0
0
21
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2005
0
0
4
8
0
0
0
3
0
4
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2006
0
0
8
8
0
0
0
7
0
1
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2007
0
0
8
3
0
0
0
12
0
13
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2008
0
0
8
4
0
0
1
19
0
32
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2009
0
0
6
20
0
0
0
5
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2010
0
0
3
7
0
0
0
0
0
13
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2011
0
0
6
0
0
0
0
6
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2012
0
0
6
0
0
0
0
2
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2013
0
0
4
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2014
0
0
2
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2015
0
0
4
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2016
0
0
2
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2017
0
0
5
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_2018
0
0
3
0
0
0
0
0
0
0
...
0
0
0
9
1
3
0
0
15
0
tipo_review_2019
0
0
2
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
tipo_review_anterior
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
312 rows × 1540 columns
In [29]:
td.to_csv("output/td_documents_languages_network.csv")
# td.to_csv("output/td_documents_languages_bra_190123.csv")
In [30]:
print(f"Notebook processing duration: {datetime.utcnow() - start}")
Notebook processing duration: 0:00:09.775706
Content source: scieloorg/journals-catalog
Similar notebooks: