In [1]:
from conda_tools import (cache, environment)
from conda_tools import environment_utils as eu
from conda_tools import cache_utils as cu
import os
from os.path import join
from itertools import groupby, chain
from versio.version import Version
# adjust root to be your Miniconda prefix
root = r"C:\Users\Ryan\Miniconda3"
root_envs = join(root, 'envs')
root_pkgs = join(root, 'pkgs')
print(root_envs)
print(root_pkgs)
The two core components of the conda ecosystem are the package cache and the environment subfolders. These are abstracted with PackageInfo
and Environment
objects respectively.
Here we create "pools" of PackageInfo
and Environment
objects. These objects permit easy, read-only access to various bits of metadata stored in the package cache and conda-meta/ subfolders in the environment. We want to reuse the objects as much as we can to minimize disk I/O. All the disk reads are currently cached with the objects, so the more objects you work with, the more RAM will be required.
In [2]:
# Create pkg_cache and environments
pkg_cache = cache.packages(root_pkgs)
envs = environment.environments(root_envs)
print(pkg_cache[:5])
print()
print(envs[:5])
In [3]:
pi = pkg_cache[0]
pi.index # info/index.json
Out[3]:
In [4]:
# We can access fields of index.json directly from the object.
pi.name, pi.version, pi.build
Out[4]:
In [5]:
# Access to info/files
pi.files
Out[5]:
In [6]:
# The full spec of the package. This is always "name-version-build"
pi.full_spec
Out[6]:
In [7]:
# We can queries against the information we have on packages
# For example, I want to find all MIT licensed packages in the cache
{pi.full_spec: pi.license for pi in pkg_cache if pi.license == 'MIT'}
Out[7]:
In [8]:
e = envs[2]
e
Out[8]:
In [9]:
# We can discover the currently activated environment
{e.path: e.activated() for e in envs}
Out[9]:
In [10]:
# We can see all the packages that claim to be linked into the environment, keyed by name
e.linked_packages
Out[10]:
In [11]:
# linked packages are either hard-linked, symlinked, or copied into environments.
set(chain(e.hard_linked, e.soft_linked, e.copy_linked)) ^ set(e.linked_packages.values())
Out[11]:
In [12]:
# The origin channel of each package
e.package_channels
Out[12]:
In [13]:
# We also have access to the history of the environment.
# The history object is an adaptation of conda's history parser.
# (note: The interface to this may change in the future)
e.history.object_log
Out[13]:
In [14]:
# Calculate potential collisions in environments by packages claiming the same file paths
# Very quick and naive way of detecting file path collisions.
for i, p1 in enumerate(pkg_cache):
for p2 in pkg_cache[i+1:]:
if p1.name == p2.name:
continue
x = p1.files.intersection(p2.files)
if x:
print("{} collides with {}".format(p1, p2))
print("\tCollisions: ", x)
In [15]:
# Cache Utils has some higher order, convenience functions
# See what environments a package is linked into
# Note that this is a O(n) operation where n is the sum of the installed packages in each environment you're checking.
# If you're running this for the first time, it has to read all the metadata for each environment.
# Also note, that this creates new package info objects and environment objects each run, so each run
# prompts a full scan of both the package cache and all environments.
cu.linked_environments((pkg_cache[0],), envs)
Out[15]:
In [16]:
# Find which environments the latest packages are linked to.
# This example uses Versio to parse and compare PEP440 compliant version numbers
# This will exclude packages like packages like jpeg and openssl
# This loop simple creates Version objects so we can compare them later.
Versions = {}
for x in pkg_cache:
try:
if x.name in Versions:
Versions[x.name].append(Version(x.version))
else:
Versions[x.name] = [Version(x.version)]
except:
print("Skipping ", x.name, x.version)
# sort the value lists and pick the latest versions
#pversions = {k: str(list(sorted(v))[-1]) for k, v in Versions.items()}
# sort the value lists and pick the older versions
pversions = {k: list(map(str, list(sorted(v))[:-1])) for k, v in Versions.items()}
# The most up-to-date packages are linked to which environments?
#latest_pkgs = [x for x in pkg_cache if x.name in pversions and x.version == pversions[x.name]]
# Find the environments that older packages are linked to
latest_pkgs = [x for x in pkg_cache if x.name in pversions and x.version in set(pversions[x.name])]
# Simply print the results nicely
{str(k): list(map(str, v)) for k, v in cu.linked_environments(latest_pkgs, envs).items()}
Out[16]:
In [17]:
# All packages that are not linked to any environment
cu.unlinked_packages(pkg_cache, envs)
Out[17]:
In [18]:
# Environment representation of root environment
e = environment.Environment(join(root_envs, 'env2'))
In [19]:
# Long running. Disk intensive.
filter_pyc = lambda f: filter(lambda x: not x.endswith('.pyc'), f)
In [20]:
# List all files in an environment that are not hardlinked (and should be).
# Note that *.pyc files are filtered out.
not_linked = {x: tuple(filter_pyc(y)) for x, y in eu.check_hardlinked_env(envs[0]).items()}
# If you wish to see all the non-existant hardlinks, including *.pyc files, remove the filter_pyc function call
# not_linked = {x: y for x, y in eu.check_hardlinked_env(envs[0]).items()}
not_linked
Out[20]:
In [21]:
# We can leverage the information in the environment's history to get packages
# that were explicitly installed by the user.
eu.explicitly_installed(e)
Out[21]:
In [ ]: