In [1]:
import numpy as np
import scipy as sc
import pandas as pd
import json
import blaze
import os
import glob
from prettyprint import pp

In [2]:
all_json_dirs = glob.glob('JSONs/*.json')

In [3]:
all_jsons = []
for jdir in all_json_dirs:
    with open(jdir, 'r') as f:
        jsn = json.load(f)
        all_jsons.append(jsn)
print len(all_jsons)


485

In [4]:
pp(all_jsons[0])


{
    "authors": [
        {
            "name": "Heinz Guderian", 
            "url": "http://www.goodreads.com/author/show/148433.Heinz_Guderian"
        }
    ], 
    "average": 3.94, 
    "cover": "http://d.gr-assets.com/books/1180640987l/1060412.jpg", 
    "description": ""Heinz Guderian's most famous book is really about armored fighting vehicles, but its enduring value comes from its approach....Guderian looked at armor from a variety of perspectives....The text is remarkably accessible....The many photographs included in the 1937 German edition are included, as are the excellent maps and, perhaps best of all, the little silhouettes used in the original to end each chapter."-- "Marine Corps Gazette. "No armored enthusiast should be without a copy of this book."-- "Aus Europa.208 pages, 40 b/w illus., 6 1/4 x 9 1/4.", 
    "outlinks": [
        "http://www.goodreads.com/book/show/1558779.Panzer_Battles", 
        "http://www.goodreads.com/book/show/152175.Lost_Victories", 
        "http://www.goodreads.com/book/show/1226071.The_Battle_of_Kursk", 
        "http://www.goodreads.com/book/show/1054645.The_Franco_Prussian_War", 
        "http://www.goodreads.com/book/show/958743.Panzer_Commander", 
        "http://www.goodreads.com/book/show/671657.Black_Edelweiss", 
        "http://www.goodreads.com/book/show/377287.Sniper_on_the_Eastern_Front", 
        "http://www.goodreads.com/book/show/796809.Tigers_In_The_Mud", 
        "http://www.goodreads.com/book/show/618453.In_Deadly_Combat", 
        "http://www.goodreads.com/book/show/250838.Infantry_Attacks", 
        "http://www.goodreads.com/book/show/1510241.Hitler_Moves_East_1941_1943", 
        "http://www.goodreads.com/book/show/389341.The_Campaigns_of_Napoleon", 
        "http://www.goodreads.com/book/show/115133.A_Stranger_to_Myself", 
        "http://www.goodreads.com/book/show/1508654.Decision_in_Normandy", 
        "http://www.goodreads.com/book/show/482109.Blitzkrieg", 
        "http://www.goodreads.com/book/show/748526.Barbarossa", 
        "http://www.goodreads.com/book/show/1498079.To_Lose_a_Battle", 
        "http://www.goodreads.com/book/show/55649.Soldat", 
        "http://www.goodreads.com/book/show/254024.Panzer_Leader", 
        "http://www.goodreads.com/book/show/1060413.Erinnerungen_Eines_Soldaten", 
        "http://www.goodreads.com/book/show/9566105-blitzkreig-in-their-own-words", 
        "http://www.goodreads.com/book/show/18945171-heinz-guderian", 
        "http://www.goodreads.com/book/show/21851367-guderian-panzer-leader"
    ], 
    "ratings": 318, 
    "reviews": 22, 
    "title": "Achtung-Panzer!: The Development of Armoured Forces, Their Tactics and Operational Potential", 
    "url": "http://www.goodreads.com/book/show/1060412.Achtung_Panzer_", 
    "userreviews": [
        {
            "userName": "Olethros", 
            "userReview": "-En palabras del propio autor, “narrar el desarrollo del tanque desde el punto de vista del soldado que lo utiliza”.-Género. Ensayo.Lo que nos cuenta. Primero, una revisión de la concepción, actividad operativa en el campo de batalla y desarrollo de los tanques durante la Gran Guerra. Después, análisis de las innovaciones técnicas y tácticas del arma blindada (y de sus contramedidas) durante la postguerra. A continuación, una revisión de las fuerzas motorizadas y blindadas alemanas bajo las condiciones del Diktat de Versalles. Por último, exposición de postulados sobre la forma de combate de las tropas blindadas (analizando situaciones reales pasadas), su combinación con otras armas y la valoración de potenciales situaciones contemporáneas al autor.¿Quiere saber más de este libro, sin spoilers? Visite:http://librosdeolethros.blogspot.com/...", 
            "userReviewDate": "Nov 14, 2014", 
            "userURL": "http://www.goodreads.com/user/show/18503124-olethros"
        }, 
        {
            "userName": "Christian", 
            "userReview": "Not bad. Essentially, a tactician's textbook. Written pre-WWII, when tanks were still very much in their infancy. Guderian was ahead of all the leading foreign military "armoured specialists", in the way armour was to be employed in the field, and went on to prove it in Germany's early successes of the war. Reading through some of the theories and strategies could, at times, be a little tedious. Being a history buff, I found the best parts to be the actual recounting of tank battles in WWI, how they failed/succeeded, and what could be learnt from them. What struck me most about this work is that it was written in a time when tanks were, at least by some, considered secondary to infantry. Tanks had only been used in relateively small numbers during WWI (compared with the actions that were to be seen in the following war), and were introduced later in the war (August, 1916). And these first tanks were slow, prone to mechanical failure, and difficult to handle over rough terrain. Consequently, the interwar years tanks were an untried weapon leading up to the outbreak of WWII, and the "Blitzkrieg" campaigns owed much of their success to (apart from Guderian) these fast, mechanically reliable, versitile machines, and the tank was never doubted again.", 
            "userReviewDate": "Aug 04, 2011", 
            "userURL": "http://www.goodreads.com/user/show/5957700-christian"
        }, 
        {
            "userName": "Mike Harbert", 
            "userReview": "This book is not for the casual reader of World War II history.  Written between the wars,  this is Guderian's account of the development of German armored tactics which would later be labeled as "Blitzkreig" For the more serious WWII scholar,  Guderian's book is essential reading that should sit beside Rommel's "Infantry Attacks".", 
            "userReviewDate": "Aug 18, 2012", 
            "userURL": "http://www.goodreads.com/user/show/11624072-mike-harbert"
        }, 
        {
            "userName": "Larry", 
            "userReview": "If ever you find a tank parked in your driveway and suddenly develop a need to invade a small country.. this is the book for you.", 
            "userReviewDate": "Feb 13, 2011", 
            "userURL": "http://www.goodreads.com/user/show/3440402-larry"
        }, 
        {
            "userName": "Mjl", 
            "userReview": "The Great War examples were astonishing and clearly pointed out how and why the tanks were invented and how they were misused at that point. This then reveals the thought process behind the combined arms approach to (tank) warfare.In the "what goes on now" part one can see pretty clearly some of the forecoming developments, vehicle-wise at least.A very, very interesting read. I had a good amount of "of course that's the way it should work!" moments while reading about the "modern panzer tactics" and I guess that's just proof of how advanced Guderian's work was, as it pretty much holds true today. As far as I know, that is.", 
            "userReviewDate": "Aug 05, 2014", 
            "userURL": "http://www.goodreads.com/user/show/3412476-mjl"
        }, 
        {
            "userName": "Witek", 
            "userReview": "A very far-sighted analysis of the panzer weapons capabilities. One of the most talented military commanders of the 20th century begins with thorough dissection of WW1 positional warfare, proceeds to analyse allied tank attacks at the Somme, Cambrai and Soisson and then prognoses, how the future of tanks and war in general looks. His insights were proved very soon after this book was published- and when german tanks overran Poland, France, and a big chunk of USSR, Germany had mainly Guderian to thank for it.", 
            "userReviewDate": "Jan 21, 2015", 
            "userURL": "http://www.goodreads.com/user/show/14170317-witek"
        }, 
        {
            "userName": "Hugo De oliveira", 
            "userReview": "I was inside the mind of a genious.This isn´t an easy book to read, I tought that it is a bit boring, specially for someone that doesn´t like military strategy and it isn´t familiarize with the theme. Also it would be great to update the maps of the battles.Unfortunately for mankind, Guderian created, at the time, the best army in the world thanks to it´s revolutionary theories about the use of tanks and the creation of a mecanized independent tank brigade and the close cooperation between the panzer groups and the other military forces, creating the famous Blitzkrieg tatics that allowed german army to defeat the French army.Guderian made a superior analisys of the reasons for the collapse of the WW I german frontlines after the introduction of tanks by France, UK and USA, and explained why some of the advantages created by their use were not use in the best way, specially because the tank forces were subordinate to the infantry, were used only as a close support for infantry, conducted by officers from other army forces that were not experienced in their comand, loosing all the offensive push and tactical advantage and the fact that they were, at the begining, use in small numbers.Despite this, they were decisive in the battlefields, specially in the battles of Cambrai and Amiens in 1918, that forced Germany to request the armistice and ended the war.He also pointed out that despite the evidences in 1916 that the German army wasn´t prepare to face this new weapon, they did nothing to prevent it, specially in what concerned the creation of counter measures to face this new threat (anti-tank weapons, direct artillery fire, tanks) mostly because of the german army doctrine (and also in some allied forces) that the king of the battlefield was the infantry, and the persistence in using cavalry, when they were, at the time, completely outdated thanks to the machine guns.Thanks to Guderian revolutionary analysis and ideas, the german army was umbeatable in the battlefields until they invaded USSR in Operation Barbarossa, in 22 June 1941 and the declaration of war to the USA in December of the same year.", 
            "userReviewDate": "Aug 12, 2014", 
            "userURL": "http://www.goodreads.com/user/show/16660371-hugo-de-oliveira"
        }, 
        {
            "userName": "J.G. Cully", 
            "userReview": "It is interesting reading a book about history, that is itself, history.  Published just before the second world war, this exceptional book is Heinz Guderian's play book for armoured warfare.  He in the text he first of all goes into exact detail on a number of battles from the First World War.   Not necessarily the famous one's but battles that slowly build up a picture of the First World War methods used by the then infant tank troops.  Then, he takes us through the developments after the First World War, concentrating on how Britian, France and Russia adapted the armoured warfare concept.  Finally, he states the way he believes the next war should be fought.  That of using the tank as the primary weapon to achieve victory.It is very prophetic reading through his proposals and seeing just how many were applied by the Panzer forces, almost to the letter.  In turn, some of his predictions on how enemies would react are spot-on.  It is no wonder, reading through this book, that the Germany tank forces and indeed military as a whole were so successful during the opening years of the Second World War.For military buff's, this is a must read and exceptionally well translated.  An excellent book.", 
            "userReviewDate": "Oct 15, 2014", 
            "userURL": "http://www.goodreads.com/user/show/24154806-j-g-cully"
        }, 
        {
            "userName": "Charles", 
            "userReview": "Written in 1937, this book is an interesting study of "what went wrong" in World War I, which is Guderian's focus throughout the text, as he probes different strategies that failed and succeeded. The text is certainly phrophetic with the rise of armoured vehicles' importance in modern warfare with their emphasis on speed, armour, and radios (communication and coordination), which is quite the leap from trenches and fixed artillery positions from years before. Not only that, but two years after this book was published World War II started and the same tactics Guderian outlines in his text are used. A lot of technical details make this read not necessarily a "pleasure" read, but scholars and history buffs will love Guderian's in-depth analysis (also, the ending of the book ends on an interesting flourish commonly found in ancient Greek texts, where the writer invokes the favour of the gods for victory, revealing Guderian's classical education, common for officers in that era, adding a personal touch to this purposeful study). This book is well worth it if you have an interest in World War I and World War II history.", 
            "userReviewDate": "Aug 30, 2013", 
            "userURL": "http://www.goodreads.com/user/show/23376658-charles"
        }, 
        {
            "userName": "Jacob", 
            "userReview": "I forced myself to read this before reading Guderian's 'Panzer Leader', just to give myself a little more background or insight into the author. I'm very happy that I did.'Achtung Panzer' shows how Guderian brings about his concept of how to use armour (or any new technology to gain the upper hand) for future conflicts; the blueprint for the so-called 'blitzkrieg' or combined arms approach, with an emphasis on armoured/mechanized units leading the way.It starts off rather slow, but, I guess, necessarily, with accounts of the Great War, interspersed with comments on how things should have been done/could have been done better.It's perhaps the last 4th of the book that contains the more straightforward discussion of what is needed to create such a force and how to employ it (using surprise and terrain, and deploying the force 'en masse' at a decisive point, and his insistence on the use of radios, being the most memorable ones for me).", 
            "userReviewDate": "Aug 13, 2008", 
            "userURL": "http://www.goodreads.com/user/show/1423784-jacob"
        }, 
        {
            "userName": "Dave Walls", 
            "userReview": "An excellent history and study of early tank warfare. Would have been four stars but for the helpful notes, context, and corrections by the English language editor. Guderian does a great job of examining WW 1 tank operations and explaining the lessons of what went right and wrong in those battles. He then applies those lessons to the then-current (mid-late 1930s) political situation to argue how armor warfare should be fought. Guderian's arguments are clear, concise, and prophetic. When the Germans followed his outline, they rolled over their enemies.", 
            "userReviewDate": "Jul 31, 2014", 
            "userURL": "http://www.goodreads.com/user/show/12676069-dave-walls"
        }, 
        {
            "userName": "Olethros", 
            "userReview": "-En palabras del propio autor, “narrar el desarrollo del tanque desde el punto de vista del soldado que lo utiliza”.-Género. Ensayo.Lo que nos cuenta. Primero, una revisión de la concepción, actividad operativa en el campo de batalla y desarrollo de los tanques durante la Gran Guerra. Después, análisis de las innovaciones técnicas y tácticas del arma blindada (y de sus contramedidas) durante la postguerra. A continuación, una revisión de las fuerzas motorizadas y blindadas alemanas bajo las condiciones del Diktat de Versalles. Por último, exposición de postulados sobre la forma de combate de las tropas blindadas (analizando situaciones reales pasadas), su combinación con otras armas y la valoración de potenciales situaciones contemporáneas al autor.¿Quiere saber más de este libro, sin spoilers? Visite:http://librosdeolethros.blogspot.com/...", 
            "userReviewDate": "May 23, 2013", 
            "userURL": "http://www.goodreads.com/user/show/18503124-olethros"
        }, 
        {
            "userName": "Chris", 
            "userReview": "Heinz Guderian was a pioneer in the development of armored warfare and the most important military strategist in the first half of the 20th century. When distilled down to its most basic components, Guderian was an innovator in the application of a relatively new technology (though we tend not to view these things through that lens). Achtung-Panzer! is a military classic and a good read. As a primmer, I would recommend "Kaiserschlacht 1918" by  Randal Gray from Osprey Publishing. The  Ludendorff Offensive represented a paradigm shift in war-fighting, and helped shape Guderian's views on the importance of armor, maneuver warfare, adaptability and initiative.  Heinz Guderian's autobiography is a thick read and should be avoided for all but the history and strategy buffs.", 
            "userReviewDate": "Aug 18, 2013", 
            "userURL": "http://www.goodreads.com/user/show/22367990-chris"
        }, 
        {
            "userName": "Paul", 
            "userReview": "A solid primary source for those interested in learning about the pre-WWII evolution of German armoured theory. However, enthusiasts should take note that much of the book centres on Guderian's analysis of Allied tank usage during the Great War, so be prepared for an historical lesson within an historical lesson. :-)Also, be aware that carrying this book around in public makes you look like a neo-nazi. Steal the book-cover off something else (preferably something socially acceptable like the newest addition to the YA fiction craze) if you're going to read this on the bus.", 
            "userReviewDate": "Nov 03, 2013", 
            "userURL": "http://www.goodreads.com/user/show/7053529-paul"
        }, 
        {
            "userName": "Dan", 
            "userReview": "Quite possibly the seminal book in 20th Century War Studies because out of it came the then-most fearful war machine in the history of the world.  The key to this book is in understanding Guderian's broad theory: mobilization, coordination and communication.  If you don't know anything about WWII, steer clear of this one.  If you do, then this might interest you.  If you're an armoured warfare buff, this is a must-read.", 
            "userReviewDate": "Nov 10, 2007", 
            "userURL": "http://www.goodreads.com/user/show/612689-dan"
        }, 
        {
            "userName": "Jean-Vincent", 
            "userReview": "I wouldn't say I "really liked" Guderian's Achtung Panzer, but this is such a fundamental book for anyone interested in the development of mechanized warfare in the inter war period that it deserves this 4-star mark.One can see there the core of Guderian's main ideas on C3, combined arms cooperation and mobility as the decisive factors in armored operations.", 
            "userReviewDate": "Apr 28, 2010", 
            "userURL": "http://www.goodreads.com/user/show/3641901-jean-vincent"
        }, 
        {
            "userName": "Martin Landry", 
            "userReview": "I read this book in English, the Brockhampton Press edition, and found it to be a fascinating read, every bit the book others promised it to be. A standard reference for more than half a century, this book must be on your bookshelf if you have any interest in armoured warfare.  It may even improve your skill at Avalon Hill panzer games.", 
            "userReviewDate": "May 29, 2013", 
            "userURL": "http://www.goodreads.com/user/show/19978607-martin-landry"
        }, 
        {
            "userName": "Terence Hiscock", 
            "userReview": "Guderian is very in depth of how armored forces were developed in World War 1, and even expresses the importance of armored divisions (rather than infantry combined with armored).Any military Historian should read this great book!", 
            "userReviewDate": "Nov 27, 2009", 
            "userURL": "http://www.goodreads.com/user/show/2986956-terence-hiscock"
        }, 
        {
            "userName": "Bill Potter", 
            "userReview": "A fantastic book only nominally about tank warfare. It is actually about integrating new technology. Fantastic. A must read.", 
            "userReviewDate": "Oct 18, 2014", 
            "userURL": "http://www.goodreads.com/user/show/20718790-bill-potter"
        }, 
        {
            "userName": "Jeroen", 
            "userReview": "Very good insights in how tanks were used in WW1 and how that influenced the tactics used by the germans in WW2.", 
            "userReviewDate": "Oct 05, 2009", 
            "userURL": "http://www.goodreads.com/user/show/2663890-jeroen"
        }, 
        {
            "userName": "Peter", 
            "userReview": "Guderian was a pretty good military writer, and a handsome devil. Too bad he fought for the Nazis and all.", 
            "userReviewDate": "Jun 05, 2009", 
            "userURL": "http://www.goodreads.com/user/show/1418009-peter"
        }, 
        {
            "userName": "Patrick", 
            "userReview": "a true must read for anyone interested in tank warfare.", 
            "userReviewDate": "Apr 20, 2008", 
            "userURL": "http://www.goodreads.com/user/show/1099833-patrick"
        }
    ]
}

In [5]:
links_set = set()
for js in all_jsons:
    links_set.add(js["url"])
    for l in js["outlinks"]:
        links_set.add(l)
print len(links_set)
links = list(links_set)


7303

In [6]:
adjacency = np.zeros((len(links_set), len(links_set)))
for js in all_jsons:
    node_idx = links.index(js["url"])
    for l in js["outlinks"]:
        out_idx = links.index(l)
        adjacency[node_idx, out_idx] += 1
print np.sum(adjacency)


12602.0

In [8]:
def normalize(adjmat, tele_const = 0.2):
        """
        This method will try and normalize the adjacency matrix, so that it will be suitable for the PageRank algorithm. Using the teleporting constant it will remove the effect of deadends in the PageRank algorithm.

        Parameters
        ----------
        adjmat: numpy array
                a square adjacency matrix
        tele_const: float
                    teleporting constant for the PageRank algorithm (P' = (1-alpha)*P + alpha*v)

        Returns
        -------
        mat: numpy array
             an square matrix of size equal to the adjmat matrix and normalized
        """

        mat = np.zeros(adjmat.shape)
        cols = adjmat.shape[0]
        deadend_const = 1.0 / cols
        for i in range(cols):
            s = np.sum(adjmat[i,:])
            if s == 0:
                mat[i,:] = deadend_const
            else:
                mat[i,:] = adjmat[i,:] / s
            mat[i,:] = mat[i,:] * (1 - tele_const) + deadend_const * tele_const
        return mat

In [9]:
normmat = normalize(adjacency)
sw, sv = sc.sparse.linalg.eigs(normmat.T, k=1, which='LR')
print np.sum(sv), sw, np.linalg.norm(sv)
print np.sum(normmat)
print sv
sv = np.abs(sv)
sv /= np.sum(sv)
print np.abs(sv[sv.argmax()])[0], sv.argmax()
print links[sv.argmax()]


(-85.031292218+0j) [ 1.+0.j] 1.0
7303.0
[[-0.01120042+0.j]
 [-0.01131719+0.j]
 [-0.01132907+0.j]
 ..., 
 [-0.01147637+0.j]
 [-0.01194146+0.j]
 [-0.01225460+0.j]]
0.000415273404696 7206
http://www.goodreads.com/book/show/1413465.Badger_in_the_Basement

In [10]:
rsv = np.abs(sv)
print rsv.sum()
rsv /= rsv.sum()
print rsv.sum()
print np.linalg.norm(rsv)


1.0
1.0
0.0117603763734

In [11]:
tsv = sv.T[0]
tsv[0]


Out[11]:
0.00013172114007759732