Get results using the exponential mechanism.
In [ ]:
import sys, os, time
from collections import deque
import argparse
import numpy as np
import imp
In [ ]:
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
Utility functions.
In [ ]:
def import_anywhere(module_name, paths):
"""import methods from any folder"""
try:
f, filename, desc = imp.find_module(module_name, paths)
return imp.load_module(module_name, f, filename, desc)
finally:
# Since we may exit via an exception, close fp explicitly.
if f:
f.close()
In [ ]:
utility_functions = import_anywhere('utility_functions', [SCRIPT_DIR])
from utility_functions import get_chisq_sensitivity
Parse command line arguments.
In [ ]:
parser = argparse.ArgumentParser(description="Get results based on the JS method.")
parser.add_argument("k", metavar="NUM_SNP", help="number of SNPs to output", type=int)
parser.add_argument("e", metavar="EPSILON", help="privacy budget epsilon", type=float)
parser.add_argument("n_case", help="number of cases", type=int)
parser.add_argument("n_control", help="number of controls", type=int)
parser.add_argument("infile", help="input file of chisquare statistics")
parser.add_argument("-s", help="sensitivity of the scoring function", default=1, type=int)
args = parser.parse_args()
if not os.path.isfile(args.infile):
sys.exit("The follwoing file does not exist: {}".format(args.infile))
Setup data.
In [ ]:
name_score_tuples = []
with open(args.infile, 'r') as infile:
# skip header line
garbage = infile.readline()
for line in infile:
name, score = line.strip().split()
name_score_tuples.append((name, float(score)))
indexed_snp_name_dict = dict(enumerate([name for name, ss in name_score_tuples]))
snp_scores = np.array([ss for name, ss in name_score_tuples])
Perform Laplace mechanism.
In [ ]:
sensitivity = get_chisq_sensitivity(args.n_case, args.n_control)
scale = sensitivity * 2.0 * args.k / args.e
scores_perturbed = snp_scores + np.random.laplace(scale=scale,
size=len(snp_scores))
results_indices = np.argsort(scores_perturbed)[::-1][:args.k]
results_names = map(indexed_snp_name_dict.get, results_indices)
In [ ]:
for nn in results_names:
print nn