notebook.community

Edit and run



In [7]:

    
import psycopg2
from dateutil import parser

posts_counts={}
columns=['altmetric_id','DOI','ISSN','mendeley','citeulike','connotea','twitter','reddit','facebook','googleplus','blogs','news','video','wikipedia','q&a',
         'pinterest','weibo','peer_reviews','policy']

with open("D:\\NIU\\Data Mining 2\\Project\\Connection_Details.txt",'r') as con_file:
    conDetails=con_file.readlines()
    hostname=conDetails[0].strip('\n')
    username=conDetails[1].strip('\n')
    password=conDetails[2].strip('\n')
    database=conDetails[3].strip('\n')

myConnection = psycopg2.connect( host=hostname, user=username, password=password, dbname=database )

if myConnection:
    print("connection successful!!!")









    



connection successful!!!



In [8]:

    
def doQuery( conn ) :
    cur = conn.cursor()
    cur.execute("SELECT publication -> 'altmetric_id', publication -> 'citation' ->> 'doi', publication -> 'citation' -> 'issns', publication -> 'counts' -> 'readers', "
                "publication -> 'posts' FROM policy")

    with open("D:\\NIU\\Data Mining 2\\Project\\PolicyData.csv", 'w+', encoding='utf-8') as newf:
        header = ''
        for col in columns:
            header += col + ','
        newf.write(header[:-1] + '\n')

        for alt_id,doi,issns,readers,posts in cur.fetchall():
            new_record=''
            policy_count=0

            policy_appeared_dates=[]
            for p in posts['policy']:
                policy_count += 1
                policy_appeared_dates.append(parser.parse(p['posted_on']))
            first_mentioned_on_policy=min(policy_appeared_dates)

            for post in posts:
                if post!='policy':
                    posts_counts[post]=0
                    for p in posts[post]:
                        posted_date=parser.parse(p['posted_on'])
                        if posted_date<first_mentioned_on_policy:
                            posts_counts[post]+=1
            new_record+=str(alt_id)+','+str(doi)+','

            for issn in issns:
                if(len(issn)>=8):
                    new_record+=str(issn)+';'
            new_record = new_record[:-1]+ ',' + str(readers['mendeley']) + ',' + str(readers['citeulike']) + ',' + str(readers['connotea']) + ','

            for i in range(6,len(columns)-1):
                if columns[i] in posts_counts:
                    new_record += str(posts_counts[columns[i]])+','
                else:
                    new_record += ','
            new_record += str(policy_count)+'\n'

            newf.write(new_record)



In [9]:

    
doQuery( myConnection )
myConnection.close()
print("data creation completed!")