In [7]:
import psycopg2
from dateutil import parser
posts_counts={}
columns=['altmetric_id','DOI','ISSN','mendeley','citeulike','connotea','twitter','reddit','facebook','googleplus','blogs','news','video','wikipedia','q&a',
'pinterest','weibo','peer_reviews','policy']
with open("D:\\NIU\\Data Mining 2\\Project\\Connection_Details.txt",'r') as con_file:
conDetails=con_file.readlines()
hostname=conDetails[0].strip('\n')
username=conDetails[1].strip('\n')
password=conDetails[2].strip('\n')
database=conDetails[3].strip('\n')
myConnection = psycopg2.connect( host=hostname, user=username, password=password, dbname=database )
if myConnection:
print("connection successful!!!")
In [8]:
def doQuery( conn ) :
cur = conn.cursor()
cur.execute("SELECT publication -> 'altmetric_id', publication -> 'citation' ->> 'doi', publication -> 'citation' -> 'issns', publication -> 'counts' -> 'readers', "
"publication -> 'posts' FROM policy")
with open("D:\\NIU\\Data Mining 2\\Project\\PolicyData.csv", 'w+', encoding='utf-8') as newf:
header = ''
for col in columns:
header += col + ','
newf.write(header[:-1] + '\n')
for alt_id,doi,issns,readers,posts in cur.fetchall():
new_record=''
policy_count=0
policy_appeared_dates=[]
for p in posts['policy']:
policy_count += 1
policy_appeared_dates.append(parser.parse(p['posted_on']))
first_mentioned_on_policy=min(policy_appeared_dates)
for post in posts:
if post!='policy':
posts_counts[post]=0
for p in posts[post]:
posted_date=parser.parse(p['posted_on'])
if posted_date<first_mentioned_on_policy:
posts_counts[post]+=1
new_record+=str(alt_id)+','+str(doi)+','
for issn in issns:
if(len(issn)>=8):
new_record+=str(issn)+';'
new_record = new_record[:-1]+ ',' + str(readers['mendeley']) + ',' + str(readers['citeulike']) + ',' + str(readers['connotea']) + ','
for i in range(6,len(columns)-1):
if columns[i] in posts_counts:
new_record += str(posts_counts[columns[i]])+','
else:
new_record += ','
new_record += str(policy_count)+'\n'
newf.write(new_record)
In [9]:
doQuery( myConnection )
myConnection.close()
print("data creation completed!")