Fetch Licenses (Windows Only)

This notebook gives gives a CSV of packages installed with conda along with their respective licenses.

Copyright: Theo Naunheim, 2017

License: MIT

License URL: https://opensource.org/licenses/MIT


In [1]:
import json
import os
import subprocess
import sys

import numpy as np
import pandas as pd

In [2]:
OUTPUT_PATH = os.path.join(os.path.expanduser('~'), 'output.csv')

In [3]:
anaconda_folder = sys.executable.rstrip('\\python.exe')
pkgs_folder = anaconda_folder + '\\pkgs\\'
conda_path = os.path.join(anaconda_folder, 'Scripts', 'conda.exe')

In [4]:
# Get new paths
output = subprocess.check_output([conda_path, 'list'], shell=True)
output_string = output.decode()
data_lines = [
    item.split()
    for item in output_string.splitlines()
    if not item.startswith('#')
]

# Original 
raw_df = pd.DataFrame(
    data_lines,
    columns=['name', 'version', 'identifier', 'specialized']
)

base_df = raw_df[['name', 'version']].copy()

base_df.head(5)


Out[4]:
name version
0 _ipyw_jlab_nb_ext_conf 0.1.0
1 _license 1.1
2 alabaster 0.7.10
3 anaconda custom
4 anaconda-client 1.6.5

In [5]:
# Iterate through packages and fetch metadata.
rows = []

for root, folders, files in os.walk(pkgs_folder):
    if root.endswith('\\info'):
        about_path = os.path.join(root, 'about.json')
        license_path = os.path.join(root, 'LICENSE.txt')
        index_path = os.path.join(root, 'index.json')
        # Index
        try:
            with open(index_path, 'r') as f:
                name = json.loads(f.read())['name']
        except Exception:
            name = np.NaN
        # License
        try:
            with open(license_path, 'r') as f:
                license_text = f.read()
        except Exception:
            license_text = np.NaN
        # License Type
        try:
            with open(about_path, 'r') as f:
                license_type = json.loads(f.read())['license']
        except Exception:
            license_type = np.NaN
        # License URL
        try:
            with open(about_path, 'r') as f:
                license_url = json.loads(f.read())['license_url']
        except Exception:
            license_url = np.NaN
        # Results to row.
        rows.append({
            'name': name,
            'license_type': license_type,
            'license_url': license_url,
            'license_text': license_text
        })

meta_df = pd.DataFrame.from_records(rows).drop_duplicates()
meta_df.head(5)


Out[5]:
license_text license_type license_url name
0 Copyright (c) 2017 Jeff Forcier.\n\nBased on o... BSD 3-Clause NaN alabaster
1 ===================================\nAnaconda ... BSD NaN anaconda
2 Copyright (c) 2016, Continuum Analytics, Inc.\... BSD 3-clause NaN anaconda-client
4 NaN proprietary - Continuum Analytics, Inc. NaN anaconda-navigator
5 NaN BSD 3-Clause NaN anaconda-project

In [6]:
final_df = base_df.merge(meta_df, how='right', on='name')
final_df.head(5)


Out[6]:
name version license_text license_type license_url
0 _ipyw_jlab_nb_ext_conf 0.1.0 NaN BSD NaN
1 _license 1.1 NaN NaN NaN
2 alabaster 0.7.10 Copyright (c) 2017 Jeff Forcier.\n\nBased on o... BSD 3-Clause NaN
3 anaconda custom ===================================\nAnaconda ... BSD NaN
4 anaconda-client 1.6.5 Copyright (c) 2016, Continuum Analytics, Inc.\... BSD 3-clause NaN

In [7]:
final_df.to_csv(OUTPUT_PATH, index=False)