Affiliations between authors and journals in the 2-mode NASA Astrophysics Data Systems dataset.

Find small sub-networks of important authors that are frequently collaborating together. In doing so we can also see which journals stand out as focal points for these types of collaborations.


In [44]:
import networkx as nx
import os
import ads as ads 
import json as json
import matplotlib.pyplot as plt
import pandas as pd
from networkx.algorithms import bipartite as bi
from networkx.readwrite import json_graph

In [3]:
os.environ["ADS_DEV_KEY"] = "kNUoTurJ5TXV9hsw9KQN1k8wH4U0D7Oy0CJoOvyw"

In [4]:
ads.config.token = 'ADS_DEV_KEY'

In [5]:
#Search for papers (200 most cited) on asteroseismology
papers1 = list(ads.SearchQuery(q= "asteroseismology", sort="citation_count", max_pages=4 ))

In [6]:
# find author names
a = []
for i in papers1:
    authors1 = i.author
    a.append(authors1)
author_names = a


//anaconda/lib/python3.5/site-packages/ads/utils.py:23: UserWarning: You are lazy loading attributes via 'author', and so are making multiple calls to the API. This will impact your overall rate limits.
  UserWarning,

In [7]:
# find the journals
j = []
for i in papers1:
    journals1 = i.pub
    j.append(journals1)
journals = j


//anaconda/lib/python3.5/site-packages/ads/utils.py:23: UserWarning: You are lazy loading attributes via 'pub', and so are making multiple calls to the API. This will impact your overall rate limits.
  UserWarning,

In [8]:
# create an initial df
df = pd.DataFrame({'Author_Names' : author_names,
 'Journal':journals
  })

In [9]:
# Expand the df with melt
s1 = df.apply(lambda x: pd.Series(x['Author_Names']),axis=1).stack().reset_index(level=1, drop=True)
s1.name = 'Author_Name'
df_m = df.drop('Author_Names', axis=1).join(s1)
df_m.head()


Out[9]:
Journal Author_Name
0 Annual Review of Astronomy and Astrophysics Asplund, Martin
0 Annual Review of Astronomy and Astrophysics Grevesse, Nicolas
0 Annual Review of Astronomy and Astrophysics Sauval, A. Jacques
0 Annual Review of Astronomy and Astrophysics Scott, Pat
1 Solar Physics Scherrer, P. H.

In [10]:
df_m.Journal.unique()


Out[10]:
array(['Annual Review of Astronomy and Astrophysics', 'Solar Physics',
       'Cosmic Abundances as Records of Stellar Evolution and Nucleosynthesis',
       'Instrumentation in Astronomy VIII',
       'Communications in Asteroseismology',
       'Monthly Notices of the Royal Astronomical Society',
       'The Astrophysical Journal',
       'Astronomy and Astrophysics Supplement Series',
       'Publications of the Astronomical Society of the Pacific',
       'Living Reviews in Relativity',
       'The Astrophysical Journal Supplement Series',
       'Astronomy and Astrophysics', 'Astronomy and Astrophysics Review',
       'Classical and Quantum Gravity', 'Physics Reports',
       'The Astronomical Journal', 'Nature', 'ArXiv e-prints',
       'Reviews of Modern Physics', None, 'Space Science Reviews',
       'Acta Astronomica', 'Astrophysics and Space Science',
       'Experimental Astronomy', 'Icarus', 'Science',
       'Annual Review of Nuclear and Particle Science',
       'Proceedings of the National Academy of Science',
       'The CoRoT Mission Pre-Launch Status - Stellar Seismology and Planet Finding',
       'Living Reviews in Solar Physics', 'Delta Scuti and Related Stars',
       'Journal of Computational Physics',
       'A Giant Step: from Milli- to Micro-arcsecond Astrometry',
       'International Journal of Modern Physics A'], dtype=object)

In [11]:
author_nodes = pd.DataFrame(df_m.Author_Name.unique(),columns=['Author_Name'])
author_nodes['node_type'] = 'Author_Name'
journal_nodes = pd.DataFrame(df_m.Journal.unique(), columns=['Journal'])
journal_nodes['node_type'] = 'Journal'

In [12]:
# Build the graph from the node sets and edges
# set bipartite attribute to ensure weighted projection will work
a_nodes = list(author_nodes['Author_Name'])
j_nodes = list(journal_nodes['Journal'])
edge_bunch = [tuple(i) for i in df_m.values]

g = nx.Graph()
g.add_nodes_from(a_nodes,node_type='Author_Name', bipartite=0)
g.add_nodes_from(j_nodes,node_type='Journal', bipartite=1)
g.add_edges_from(edge_bunch)

In [14]:
# find the largest most connected graph
big_subg = [i for i in nx.connected_component_subgraphs(g) if len(i) > 200]
sg_largest = big_subg[0]

In [15]:
# weighted_projections can be applied to this subgraph to separate the two components
Journals,Author_Names = bi.sets(sg_largest)  # split into bipartites

In [16]:
j_proj_sg_largest = bi.weighted_projected_graph(sg_largest, Journals)

In [17]:
a_proj_sg_largest = bi.weighted_projected_graph(sg_largest, Author_Names)

In [18]:
# Island Method 
j = j_proj_sg_largest.edges(data=True) 
a = a_proj_sg_largest.edges(data=True)

In [19]:
# With a min threshold of edge weight = 1, find the nodes with strong relationships within the sub-graphs -SNAS Ch. 4
def trim(g, weight):
    g_temp = nx.Graph()
    edge_bunch2 = [i for i in g.edges(data=True) if i[2]['weight'] > weight]    
    g_temp.add_edges_from(edge_bunch2)
    return g_temp

In [20]:
a_sg_island =  trim(a_proj_sg_largest, 1)
j_sg_island = trim(j_proj_sg_largest,1)

In [21]:
# We now have two islands of the projected authors and journals. 
# degree centrality will help show which nodes are the key to the networks.
a_degree = nx.degree_centrality(a_sg_island)
j_degree = nx.degree_centrality(j_sg_island)
pd.DataFrame.from_dict(a_degree,orient='index').sort_values(0,ascending=False)


Out[21]:
0
The Astrophysical Journal 0.764706
The Astrophysical Journal Supplement Series 0.588235
Astronomy and Astrophysics 0.529412
Science 0.529412
Experimental Astronomy 0.470588
Publications of the Astronomical Society of the Pacific 0.411765
Nature 0.411765
Monthly Notices of the Royal Astronomical Society 0.352941
The Astronomical Journal 0.352941
Annual Review of Astronomy and Astrophysics 0.294118
Solar Physics 0.235294
NaN 0.176471
ArXiv e-prints 0.176471
The CoRoT Mission Pre-Launch Status - Stellar Seismology and Planet Finding 0.117647
Physics Reports 0.117647
Astronomy and Astrophysics Supplement Series 0.117647
Astronomy and Astrophysics Review 0.117647
Proceedings of the National Academy of Science 0.117647

In [22]:
pd.DataFrame.from_dict(j_degree,orient='index').sort_values(0,ascending=False).head()


Out[22]:
0
García, R. A. 0.784946
Appourchaux, T. 0.774194
Miglio, A. 0.774194
Mathur, S. 0.774194
Christensen-Dalsgaard, J. 0.774194

In [23]:
# examine the connected subgraphs of islands
j_connected = [i for i in nx.connected_component_subgraphs(j_proj_sg_largest) if len(i) > 1]
a_connected = [i for i in nx.connected_component_subgraphs(a_proj_sg_largest) if len(i) > 1]

In [24]:
print ("Author Components:", [len(i) for i in a_connected]) 
print ("Journal Components:", [len(i) for i in j_connected])


Author Components: [31]
Journal Components: [1648]

In [25]:
# combining the graphs 
def merge_graph(connected_g):
    g = nx.Graph()
    for h in connected_g:
        g = nx.compose(g,h)
    return g

a_islands = merge_graph(a_connected)
j_islands = merge_graph(j_connected)

In [27]:
%matplotlib inline 
#ugly plots
nx.draw(a_islands, with_labels=True)



In [28]:
nx.draw(j_islands, with_labels=False) # Extra not useful graph



In [41]:
# creating json data
j_json_data = json_graph.node_link_data(j_islands)
a_json_data = json_graph.node_link_data(a_islands)

In [55]:
a_json_data


Out[55]:
{'directed': False,
 'graph': {'name': 'compose( ,  )'},
 'links': [{'source': 0, 'target': 15, 'weight': 1},
  {'source': 18, 'target': 4, 'weight': 2},
  {'source': 18, 'target': 8, 'weight': 2},
  {'source': 18, 'target': 19, 'weight': 2},
  {'source': 18, 'target': 7, 'weight': 1},
  {'source': 18, 'target': 20, 'weight': 4},
  {'source': 18, 'target': 15, 'weight': 1},
  {'source': 18, 'target': 24, 'weight': 1},
  {'source': 1, 'target': 20, 'weight': 1},
  {'source': 1, 'target': 29, 'weight': 1},
  {'source': 20, 'target': 25, 'weight': 1},
  {'source': 20, 'target': 30, 'weight': 12},
  {'source': 20, 'target': 3, 'weight': 4},
  {'source': 20, 'target': 28, 'weight': 1},
  {'source': 20, 'target': 21, 'weight': 1},
  {'source': 20, 'target': 4, 'weight': 34},
  {'source': 20, 'target': 22, 'weight': 1},
  {'source': 20, 'target': 29, 'weight': 1},
  {'source': 20, 'target': 19, 'weight': 30},
  {'source': 20, 'target': 7, 'weight': 17},
  {'source': 20, 'target': 23, 'weight': 4},
  {'source': 20, 'target': 15, 'weight': 29},
  {'source': 20, 'target': 24, 'weight': 21},
  {'source': 2, 'target': 8, 'weight': 1},
  {'source': 2, 'target': 9, 'weight': 6},
  {'source': 2, 'target': 10, 'weight': 1},
  {'source': 2, 'target': 11, 'weight': 16},
  {'source': 2, 'target': 12, 'weight': 1},
  {'source': 2, 'target': 21, 'weight': 2},
  {'source': 2, 'target': 4, 'weight': 37},
  {'source': 2, 'target': 5, 'weight': 3},
  {'source': 2, 'target': 16, 'weight': 1},
  {'source': 2, 'target': 26, 'weight': 1},
  {'source': 2, 'target': 7, 'weight': 32},
  {'source': 2, 'target': 15, 'weight': 1},
  {'source': 2, 'target': 24, 'weight': 20},
  {'source': 2, 'target': 17, 'weight': 1},
  {'source': 3, 'target': 30, 'weight': 2},
  {'source': 19, 'target': 4, 'weight': 18},
  {'source': 19, 'target': 30, 'weight': 3},
  {'source': 19, 'target': 7, 'weight': 10},
  {'source': 19, 'target': 28, 'weight': 1},
  {'source': 19, 'target': 15, 'weight': 10},
  {'source': 19, 'target': 24, 'weight': 11},
  {'source': 19, 'target': 23, 'weight': 2},
  {'source': 21, 'target': 4, 'weight': 6},
  {'source': 21, 'target': 9, 'weight': 2},
  {'source': 21, 'target': 14, 'weight': 1},
  {'source': 21, 'target': 11, 'weight': 2},
  {'source': 21, 'target': 15, 'weight': 1},
  {'source': 21, 'target': 7, 'weight': 4},
  {'source': 21, 'target': 24, 'weight': 1},
  {'source': 4, 'target': 8, 'weight': 2},
  {'source': 4, 'target': 9, 'weight': 5},
  {'source': 4, 'target': 11, 'weight': 14},
  {'source': 4, 'target': 12, 'weight': 1},
  {'source': 4, 'target': 5, 'weight': 1},
  {'source': 4, 'target': 6, 'weight': 1},
  {'source': 4, 'target': 26, 'weight': 2},
  {'source': 4, 'target': 7, 'weight': 131},
  {'source': 4, 'target': 16, 'weight': 2},
  {'source': 4, 'target': 15, 'weight': 32},
  {'source': 4, 'target': 24, 'weight': 72},
  {'source': 4, 'target': 17, 'weight': 1},
  {'source': 5, 'target': 11, 'weight': 2},
  {'source': 5, 'target': 26, 'weight': 1},
  {'source': 5, 'target': 12, 'weight': 1},
  {'source': 5, 'target': 7, 'weight': 1},
  {'source': 5, 'target': 24, 'weight': 2},
  {'source': 5, 'target': 17, 'weight': 1},
  {'source': 6, 'target': 11, 'weight': 1},
  {'source': 6, 'target': 7, 'weight': 1},
  {'source': 6, 'target': 24, 'weight': 1},
  {'source': 7, 'target': 8, 'weight': 1},
  {'source': 7, 'target': 9, 'weight': 7},
  {'source': 7, 'target': 12, 'weight': 1},
  {'source': 7, 'target': 26, 'weight': 1},
  {'source': 7, 'target': 11, 'weight': 20},
  {'source': 7, 'target': 15, 'weight': 25},
  {'source': 7, 'target': 16, 'weight': 2},
  {'source': 7, 'target': 24, 'weight': 66},
  {'source': 7, 'target': 17, 'weight': 1},
  {'source': 24, 'target': 8, 'weight': 1},
  {'source': 24, 'target': 9, 'weight': 4},
  {'source': 24, 'target': 12, 'weight': 1},
  {'source': 24, 'target': 26, 'weight': 1},
  {'source': 24, 'target': 11, 'weight': 17},
  {'source': 24, 'target': 15, 'weight': 29},
  {'source': 24, 'target': 16, 'weight': 1},
  {'source': 24, 'target': 17, 'weight': 1},
  {'source': 8, 'target': 27, 'weight': 1},
  {'source': 9, 'target': 11, 'weight': 4},
  {'source': 26, 'target': 11, 'weight': 1},
  {'source': 26, 'target': 15, 'weight': 2},
  {'source': 26, 'target': 12, 'weight': 1},
  {'source': 26, 'target': 17, 'weight': 1},
  {'source': 12, 'target': 11, 'weight': 1},
  {'source': 12, 'target': 17, 'weight': 1},
  {'source': 13, 'target': 15, 'weight': 1},
  {'source': 30, 'target': 15, 'weight': 1},
  {'source': 11, 'target': 17, 'weight': 1}],
 'multigraph': False,
 'nodes': [{'bipartite': 1,
   'id': 'Living Reviews in Relativity',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Communications in Asteroseismology',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Publications of the Astronomical Society of the Pacific',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Astronomy and Astrophysics Supplement Series',
   'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'The Astrophysical Journal', 'node_type': 'Journal'},
  {'bipartite': 1, 'id': None, 'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Icarus', 'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'The Astrophysical Journal Supplement Series',
   'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Physics Reports', 'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'The Astronomical Journal', 'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Instrumentation in Astronomy VIII',
   'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Nature', 'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Reviews of Modern Physics', 'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Acta Astronomica', 'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Living Reviews in Solar Physics',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Monthly Notices of the Royal Astronomical Society',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Proceedings of the National Academy of Science',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Astrophysics and Space Science',
   'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Solar Physics', 'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Experimental Astronomy', 'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Astronomy and Astrophysics', 'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Annual Review of Astronomy and Astrophysics',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'A Giant Step: from Milli- to Micro-arcsecond Astrometry',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'The CoRoT Mission Pre-Launch Status - Stellar Seismology and Planet Finding',
   'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'Science', 'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Journal of Computational Physics',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Astronomy and Astrophysics Review',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Annual Review of Nuclear and Particle Science',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Cosmic Abundances as Records of Stellar Evolution and Nucleosynthesis',
   'node_type': 'Journal'},
  {'bipartite': 1,
   'id': 'Delta Scuti and Related Stars',
   'node_type': 'Journal'},
  {'bipartite': 1, 'id': 'ArXiv e-prints', 'node_type': 'Journal'}]}

In [50]:
# write the files
# a_islands
with open('a_json_data.json', 'w') as outfile1:
    outfile1.write(json.dumps(json_graph.node_link_data(a_islands)))
# j_islands
with open('j_json_data.json', 'w') as outfile1:
    outfile1.write(json.dumps(json_graph.node_link_data(j_islands)))

In [57]:
# I want to add to the nodes:
# "x": 0,
# "y": 0,
# "size": 3  
# "centrality": 

# Try adding x
with open('a_json_data.json') as json_file:
    json_data = json.load(json_file)
links = json_data['links']
for link in links:
    links['x'] = '0'
with tempfile.NamedTemporaryFile(dir='.', delete=False) as temp_file:
    json.dump(temp_file, json_data)
os.replace(temp_file.name, 'a_json_data.json')


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-57-7a1c503b53e8> in <module>()
     11 links = json_data['links']
     12 for link in links:
---> 13     links['x'] = '0'
     14 with tempfile.NamedTemporaryFile(dir='.', delete=False) as temp_file:
     15     json.dump(temp_file, json_data)

TypeError: list indices must be integers or slices, not str

In [ ]:


In [ ]:


In [ ]: