notebook.community

Edit and run



In [7]:

    
import pandas as pd

# Import Stata files
volume = pd.read_stata('./data/VolumePaperData.dta')
cites = pd.read_stata('./data/AssessingCodedData.dta')

# Drop coder column
volume.drop('volCoder', axis=1, inplace=True)

# Create new dataframe with total no. of replications/extensions for each paper
volume_cites = cites[['volID', 'replication', 'extension']].groupby('volID').sum()

# Add new variable
volume_cites['Both'] = volume_cites['replication'] + volume_cites['extension']

# Merge dataframes together
merged = pd.merge(volume, volume_cites, how='left', left_on='volID', right_index=True)

# Rename columns
merged.columns = ['ID', 'Title', 'Web of Science', 'Top 200', 'Google Scholar', 'Field', 'Replications', 'Extensions', 'Both']

# Fill in missing values
merged[['Replications', 'Extensions', 'Both']] = merged[['Replications', 'Extensions', 'Both']].fillna(0)

# Export to CSV
merged.to_csv('data.csv', index=False)



In [8]:

    
merged.head()









    Out[8]:







  
    
      
      ID
      Title
      Web of Science
      Top 200
      Google Scholar
      Field
      Replications
      Extensions
      Both
    
  
  
    
      0
      2
      Learning about a New Technology: Pineapple in ...
      176
      108.0
      1246
      Development
      1.0
      10.0
      11.0
    
    
      1
      3
      Multiple-Product Firms and Product Switching
      111
      74.0
      764
      Labor/IO
      1.0
      2.0
      3.0
    
    
      2
      4
      Momma's Got the Pill": How Anthony Comstock an...
      23
      15.0
      98
      Labor/IO
      2.0
      0.0
      2.0
    
    
      3
      5
      Matching and Sorting in Online Dating
      75
      23.0
      326
      Labor/IO
      0.0
      5.0
      5.0
    
    
      4
      6
      Entry, Exit, and Investment-Specific Technical...
      9
      9.0
      38
      Macro/Int/Trade
      0.0
      0.0
      0.0

	ID	Title	Web of Science	Top 200	Google Scholar	Field	Replications	Extensions	Both
0	2	Learning about a New Technology: Pineapple in ...	176	108.0	1246	Development	1.0	10.0	11.0
1	3	Multiple-Product Firms and Product Switching	111	74.0	764	Labor/IO	1.0	2.0	3.0
2	4	Momma's Got the Pill": How Anthony Comstock an...	23	15.0	98	Labor/IO	2.0	0.0	2.0
3	5	Matching and Sorting in Online Dating	75	23.0	326	Labor/IO	0.0	5.0	5.0
4	6	Entry, Exit, and Investment-Specific Technical...	9	9.0	38	Macro/Int/Trade	0.0	0.0	0.0