Targeted Data-Analysis

After exploring the data we aim to create qualitative and quantitative Information from it. Here we will show which Entities have relations to which organisation and show statistics for the entire collection.


In [2]:
#import of the needed libraries
from pymongo import MongoClient
from bson.objectid import ObjectId

#connection to the local mongodb
client = MongoClient()
#select the db lobbyradar 
db = client.lobbyradar

#the two needed collections
Entities = db.entities
Relations = db.relations

In [3]:
def show_relations_of_type(relation_type, display = 5):
    ''' 
    
    person or entity (organization) and 
    '''
    relation_cursor = Relations.find({'type': relation_type})
    general_count = 0
    person_to_org = 0
    org_to_org = 0
    errors = 0

    # iterates over all relations with the given type
    for relation in relation_cursor:
        # remove faulty relations (which have less than two entities):
        if len(relation['entities']) < 2:
            errors += 1
            continue
            
        source_id = relation['entities'][0]
        target_id = relation['entities'][1]
        source = Entities.find_one({'_id': ObjectId(source_id)})
        target = Entities.find_one({'_id': ObjectId(target_id)})
        # check the type of the subject and object of a relation and
        # counts the combinations: person to organization,  organization to organization
        # (the object should always be an organization)
        if source and target:
            if source['type'] == 'person' and target['type'] == 'entity':
                person_to_org += 1
            elif source['type'] == 'entity' and target['type'] == 'entity':
                org_to_org += 1
            else:
                errors += 1
            if general_count < display:
                print source['name'] + ' (' + source['type'] + ')' + " has relation " + relation_type + ' to ' + target['name'] + ' (' + target['type'] + ')'
                print "\n"
        general_count += 1
    # display a specifiable number of entries (default 5)
    print "statisics for relation type: " + relation_type
    print "count relation: " + str(general_count)
    print "\t person to organization: " + str(person_to_org)
    print "\t organization to organization: " + str(org_to_org)
    print "\t errors: " + str(errors)

In [4]:
show_relations_of_type('member', 4)


Katrin Albsteiger (person) has relation member to CDU/CSU-Fraktion (entity)


Stephan Albani (person) has relation member to CDU/CSU-Fraktion (entity)


Kerstin Andreae (person) has relation member to Bundestagsfraktion der Grünen (entity)


Peter Altmaier (person) has relation member to CDU/CSU-Fraktion (entity)


statisics for relation type: member
count relation: 4287
	 person to organization: 4098
	 organization to organization: 118
	 errors: 50