Fetch Licenses (Windows Only)

This notebook gives gives a CSV of packages installed with conda along with their respective licenses.

License: MIT

License URL: https://opensource.org/licenses/MIT



In [1]:

    
import json
import os
import subprocess
import sys

import numpy as np
import pandas as pd



In [2]:

    
OUTPUT_PATH = os.path.join(os.path.expanduser('~'), 'output.csv')



In [3]:

    
anaconda_folder = sys.executable.rstrip('\\python.exe')
pkgs_folder = anaconda_folder + '\\pkgs\\'
conda_path = os.path.join(anaconda_folder, 'Scripts', 'conda.exe')



In [4]:

    
# Get new paths
output = subprocess.check_output([conda_path, 'list'], shell=True)
output_string = output.decode()
data_lines = [
    item.split()
    for item in output_string.splitlines()
    if not item.startswith('#')
]

# Original 
raw_df = pd.DataFrame(
    data_lines,
    columns=['name', 'version', 'identifier', 'specialized']
)

base_df = raw_df[['name', 'version']].copy()

base_df.head(5)









    Out[4]:







  
    
      
      name
      version
    
  
  
    
      0
      _ipyw_jlab_nb_ext_conf
      0.1.0
    
    
      1
      _license
      1.1
    
    
      2
      alabaster
      0.7.10
    
    
      3
      anaconda
      custom
    
    
      4
      anaconda-client
      1.6.5



In [5]:

    
# Iterate through packages and fetch metadata.
rows = []

for root, folders, files in os.walk(pkgs_folder):
    if root.endswith('\\info'):
        about_path = os.path.join(root, 'about.json')
        license_path = os.path.join(root, 'LICENSE.txt')
        index_path = os.path.join(root, 'index.json')
        # Index
        try:
            with open(index_path, 'r') as f:
                name = json.loads(f.read())['name']
        except Exception:
            name = np.NaN
        # License
        try:
            with open(license_path, 'r') as f:
                license_text = f.read()
        except Exception:
            license_text = np.NaN
        # License Type
        try:
            with open(about_path, 'r') as f:
                license_type = json.loads(f.read())['license']
        except Exception:
            license_type = np.NaN
        # License URL
        try:
            with open(about_path, 'r') as f:
                license_url = json.loads(f.read())['license_url']
        except Exception:
            license_url = np.NaN
        # Results to row.
        rows.append({
            'name': name,
            'license_type': license_type,
            'license_url': license_url,
            'license_text': license_text
        })

meta_df = pd.DataFrame.from_records(rows).drop_duplicates()
meta_df.head(5)









    Out[5]:







  
    
      
      license_text
      license_type
      license_url
      name
    
  
  
    
      0
      Copyright (c) 2017 Jeff Forcier.\n\nBased on o...
      BSD 3-Clause
      NaN
      alabaster
    
    
      1
      ===================================\nAnaconda ...
      BSD
      NaN
      anaconda
    
    
      2
      Copyright (c) 2016, Continuum Analytics, Inc.\...
      BSD 3-clause
      NaN
      anaconda-client
    
    
      4
      NaN
      proprietary - Continuum Analytics, Inc.
      NaN
      anaconda-navigator
    
    
      5
      NaN
      BSD 3-Clause
      NaN
      anaconda-project



In [6]:

    
final_df = base_df.merge(meta_df, how='right', on='name')
final_df.head(5)









    Out[6]:







  
    
      
      name
      version
      license_text
      license_type
      license_url
    
  
  
    
      0
      _ipyw_jlab_nb_ext_conf
      0.1.0
      NaN
      BSD
      NaN
    
    
      1
      _license
      1.1
      NaN
      NaN
      NaN
    
    
      2
      alabaster
      0.7.10
      Copyright (c) 2017 Jeff Forcier.\n\nBased on o...
      BSD 3-Clause
      NaN
    
    
      3
      anaconda
      custom
      ===================================\nAnaconda ...
      BSD
      NaN
    
    
      4
      anaconda-client
      1.6.5
      Copyright (c) 2016, Continuum Analytics, Inc.\...
      BSD 3-clause
      NaN



In [7]:

    
final_df.to_csv(OUTPUT_PATH, index=False)

	name	version
0	_ipyw_jlab_nb_ext_conf	0.1.0
1	_license	1.1
2	alabaster	0.7.10
3	anaconda	custom
4	anaconda-client	1.6.5

	license_text	license_type	license_url	name
0	Copyright (c) 2017 Jeff Forcier.\n\nBased on o...	BSD 3-Clause	NaN	alabaster
1	===================================\nAnaconda ...	BSD	NaN	anaconda
2	Copyright (c) 2016, Continuum Analytics, Inc.\...	BSD 3-clause	NaN	anaconda-client
4	NaN	proprietary - Continuum Analytics, Inc.	NaN	anaconda-navigator
5	NaN	BSD 3-Clause	NaN	anaconda-project