RV precision comparision

This compares the RV precisions by eniric in 2017 which fixed a masking bug in condition #2 and changed the normalization impelentation. it compares RV precision calculated in 2017 to the published results in Figueria et al 2016.


In [1]:
from os.path import join

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from eniric import config

Load in the precision results


In [2]:
path = config.paths["precision_results"]
old_precision_file = join(path,
    "precision_figueira_2016.dat")
df_pub = pd.read_csv(old_precision_file, sep="\t")
df_pub.columns
# df_old_corrected = ...

df_pub.head()


Out[2]:
Simulation RV_Cond_1[m/s] RV_Cond_2[m/s] RV_Cond_3[m/s]
0 M0-Z-1.0-60k 8.9 26.1 9.3
1 M0-Z-1.0-80k 6.0 17.1 6.2
2 M0-Z-1.0-100k 4.5 12.8 4.6
3 M0-Z-5.0-60k 13.6 38.9 14.0
4 M0-Z-5.0-80k 10.6 30.5 10.9

In [3]:
new_snrnorm_file = join(path, "precision_results_2017.dat")
df_new = pd.read_csv(new_snrnorm_file, sep="\t")
df_new = df_new.rename(columns={"# id": "Simulation"})
# Round to 1dp
df_new.prec_1 = np.round(df_new.prec_1, 1)
df_new.prec_2 = np.round(df_new.prec_2, 1)
df_new.prec_3 = np.round(df_new.prec_3, 1)
df_new.head()


Out[3]:
Simulation prec_1 prec_2 prec_3
0 M0-Z-1.0-60k 9.0 14.8 9.3
1 M0-Z-1.0-80k 6.0 9.9 6.2
2 M0-Z-1.0-100k 4.5 7.5 4.7
3 M0-Z-5.0-60k 13.7 22.4 14.2
4 M0-Z-5.0-80k 10.7 17.4 11.0

In [ ]:
# Check all rows are the same between tables.
assert np.all(df_pub.Simulation == df_new.Simulation)

In [ ]:
# Header names Simulation RV_Cond_1[m/s] RV_Cond_2[m/s] RV_Cond_3[m/s]
print(df_pub.columns)
print(df_new.columns)


Index(['Simulation', 'RV_Cond_1[m/s]', 'RV_Cond_2[m/s]', 'RV_Cond_3[m/s]'], dtype='object')
Index(['Simulation', 'prec_1', 'prec_2', 'prec_3'], dtype='object')

In [ ]:
# Compare results of 2017 precsions with fixing clumping and changing the normalization.
# Comapre between df_new and df_pub

# Calcualte percentage difference inc precsion of cond 1, 2, 3
# With 1 and 3 testing the normalization.
# Count number of percision in cond 2 that decreased
cond_1_up = (df_new["prec_1"] > df_pub["RV_Cond_1[m/s]"]).sum()
cond_1_down = (df_new["prec_1"] < df_pub["RV_Cond_1[m/s]"]).sum()
cond_2_up = (df_new["prec_2"] > df_pub["RV_Cond_2[m/s]"]).sum()
cond_2_down = (df_new["prec_2"] < df_pub["RV_Cond_2[m/s]"]).sum()
cond_3_up = (df_new["prec_3"] > df_pub["RV_Cond_3[m/s]"]).sum()
cond_3_down = (df_new["prec_3"] < df_pub["RV_Cond_3[m/s]"]).sum()
print(
    "Number of Simulations that improve precision from all fixes = {}".format(
        cond_1_down
    )
)
print(
    "Number of Simulations that worsen precision from all fixes = {}".format(cond_1_up)
)
print(
    "Number of Simulations that improve precision from all fixes = {}".format(
        cond_2_down
    )
)
print(
    "Number of Simulations that worsen precision from all fixes = {}".format(cond_2_up)
)
print(
    "Number of Simulations that improve precision from all fixes = {}".format(
        cond_3_down
    )
)
print(
    "Number of Simulations that worsen precision from all fixes = {}".format(cond_3_up)
)


# Percentage of change
# 100 * (new - old) / old
all_cond1_percent_diff = (
    100 * (df_new["prec_1"] - df_pub["RV_Cond_1[m/s]"]) / df_pub["RV_Cond_1[m/s]"]
)
all_cond2_percent_diff = (
    100 * (df_new["prec_2"] - df_pub["RV_Cond_2[m/s]"]) / df_pub["RV_Cond_2[m/s]"]
)
all_cond3_percent_diff = (
    100 * (df_new["prec_3"] - df_pub["RV_Cond_3[m/s]"]) / df_pub["RV_Cond_3[m/s]"]
)

all_cond1_percent_diff.hist(bins=20, label="cond1")
plt.xlabel("Percentage of RV Change.")
plt.legend()
plt.title("RV prevision change due to all fixes.")
plt.show()

all_cond2_percent_diff.hist(bins=20, label="cond2")
plt.xlabel("Percentage of RV Change.")
plt.legend()
plt.title("RV prevision change due to all fixes.")
plt.show()

all_cond3_percent_diff.hist(bins=20, label="cond3")
plt.xlabel("Percentage of RV Change.")
plt.legend()
plt.title("RV prevision change due to all fixes.")
plt.show()


print("The majority of the changes comes from the bug in condition_2.")


Number of Simulations that improve precision from all fixes = 0
Number of Simulations that worsen precision from all fixes = 110
Number of Simulations that improve precision from all fixes = 69
Number of Simulations that worsen precision from all fixes = 111
Number of Simulations that improve precision from all fixes = 0
Number of Simulations that worsen precision from all fixes = 104

In [ ]:
# Find the large discrepencies in precision  >50 %erc

# Find the extreme changing precisions

percentage_lim = 200
mask = all_cond2_percent_diff > percentage_lim
dict_for_df = {
    "Simulation": df_new.Simulation[mask],
    "pub_cond_2": df_pub["RV_Cond_2[m/s]"][mask],
    "new_cond_2": df_new.prec_2[mask],
    "pub_cond_1": df_pub["RV_Cond_1[m/s]"][mask],
    "new_cond_1": df_new.prec_1[mask],
    "pub_cond_3": df_pub["RV_Cond_3[m/s]"][mask],
    "new_cond_3": df_new.prec_3[mask],
}

# print(df_new.Simulation[all_cond1_percent_diff > 2])
df_large = pd.DataFrame(
    dict_for_df
)  # print(df_new.Simulation[all_cond3_percent_diff > 2])
cols = df_large.columns.tolist()
new_cols = [
    "Simulation",
    "pub_cond_1",
    "new_cond_1",
    "pub_cond_2",
    "new_cond_2",
    "pub_cond_3",
    "new_cond_3",
]
df_large = df_large[new_cols]  # Reorder columns
print("Simulations that have a large change in precision for condition 2.")
df_large.head()

# Most large changes in precision from the K band.

In [ ]:
cols = df_large.columns.tolist()
new_cols = [
    "Simulation",
    "pub_cond_1",
    "new_cond_1",
    "pub_cond_2",
    "new_cond_2",
    "pub_cond_3",
    "new_cond_3",
]

Plot out RV values with published values.

Including published precison 2 values.


In [ ]:
res_colour = {"60k": "blue", "80k": "green", "100k": "red"}
cond_marker = {1: ".", 2: "o", 3: "^"}
band_loc = {"Z": 1, "Y": 2, "J": 3, "H": 4, "K": 5}
conditions = {1: ["prec_1"], 2: ["prec_2"], 3: ["prec_3"]}
pub_conds = {1: ["RV_Cond_1[m/s]"], 2: ["RV_Cond_2[m/s]"], 3: ["RV_Cond_3[m/s]"]}
vel = 1.0

print("Dashed lines indicate the published cond_2 values.")

# for b in "ZYJHK":
for star in ["M0", "M3", "M6", "M9"]:
    # plt.figure()
    for res in ["60k", "80k", "100k"]:
        for cond in range(1, 4):
            x_vals = [band_loc[b] for b in "ZYJHK"]
            ids = ["{0:s}-{1:s}-{2:.1f}-{3}".format(star, b, vel, res) for b in "ZYJHK"]
            df_masks = [df_new.Simulation == id for id in ids]
            # .iat[0,0] gives the first(only element) in dataframe created by mask of index
            pub_rv_vals = [df_pub[pub_conds[cond]][mask].iat[0, 0] for mask in df_masks]
            plt.plot(
                x_vals,
                pub_rv_vals,
                ls="--",
                marker=cond_marker[cond],
                color=res_colour[res],
            )

            rv_vals = [df_new[conditions[cond]][mask].iat[0, 0] for mask in df_masks]
            plt.plot(x_vals, rv_vals, marker=cond_marker[cond], color=res_colour[res])
            plt.xticks(x_vals, [b for b in "ZYJHK"])

    plt.ylabel("Precision [m/s]")
    plt.title("{0} with R_vel = {1} m/s".format(star, vel))
    plt.show()

In [ ]: