This notebook gives gives a CSV of packages installed with conda along with their respective licenses.
Copyright: Theo Naunheim, 2017
License: MIT
License URL: https://opensource.org/licenses/MIT
In [1]:
import json
import os
import subprocess
import sys
import numpy as np
import pandas as pd
In [2]:
OUTPUT_PATH = os.path.join(os.path.expanduser('~'), 'output.csv')
In [3]:
anaconda_folder = sys.executable.rstrip('\\python.exe')
pkgs_folder = anaconda_folder + '\\pkgs\\'
conda_path = os.path.join(anaconda_folder, 'Scripts', 'conda.exe')
In [4]:
# Get new paths
output = subprocess.check_output([conda_path, 'list'], shell=True)
output_string = output.decode()
data_lines = [
item.split()
for item in output_string.splitlines()
if not item.startswith('#')
]
# Original
raw_df = pd.DataFrame(
data_lines,
columns=['name', 'version', 'identifier', 'specialized']
)
base_df = raw_df[['name', 'version']].copy()
base_df.head(5)
Out[4]:
In [5]:
# Iterate through packages and fetch metadata.
rows = []
for root, folders, files in os.walk(pkgs_folder):
if root.endswith('\\info'):
about_path = os.path.join(root, 'about.json')
license_path = os.path.join(root, 'LICENSE.txt')
index_path = os.path.join(root, 'index.json')
# Index
try:
with open(index_path, 'r') as f:
name = json.loads(f.read())['name']
except Exception:
name = np.NaN
# License
try:
with open(license_path, 'r') as f:
license_text = f.read()
except Exception:
license_text = np.NaN
# License Type
try:
with open(about_path, 'r') as f:
license_type = json.loads(f.read())['license']
except Exception:
license_type = np.NaN
# License URL
try:
with open(about_path, 'r') as f:
license_url = json.loads(f.read())['license_url']
except Exception:
license_url = np.NaN
# Results to row.
rows.append({
'name': name,
'license_type': license_type,
'license_url': license_url,
'license_text': license_text
})
meta_df = pd.DataFrame.from_records(rows).drop_duplicates()
meta_df.head(5)
Out[5]:
In [6]:
final_df = base_df.merge(meta_df, how='right', on='name')
final_df.head(5)
Out[6]:
In [7]:
final_df.to_csv(OUTPUT_PATH, index=False)