In [1]:
import sys, os
import pandas as pd
from sklearn.model_selection import train_test_split
from lcc.db_tier.connectors import FileManager, OgleII, OgleII
from lcc.data_manager.package_reader import PackageReader
from lcc.stars_processing.systematic_search.stars_searcher import StarsSearcher, StarsSearcherRedis
from lcc.stars_processing.tools.visualization import plotProbabSpace
from lcc.stars_processing.tools.params_estim import ParamsEstimator
from lcc.utils.output_process_modules import saveIntoFile, loadFromFile
from lcc.utils.stars import saveStars, plotStarsPicture
from lcc.utils.helpers import get_combinations
from lcc.api.input_parse import parse_tun_query
%matplotlib inline
from matplotlib import pylab
pylab.rcParams['figure.figsize'] = (24.0, 8.0)
In [2]:
# We will be using Abbe value and variogram slope to describe each light curve
# and the "brain" of the filter will be Gradient Boosting
descr_name1 = "AbbeValueDescr"
descr_name2 = "VariogramSlopeDescr"
decid_name = "GradBoostDec"
# Parameters to tune and their ranges
tun_param1 = "bins"
bin_from = 10
bin_to = 150
bin_step = 50
tun_param2 = "days_per_bin"
dpb_from = 30
dpb_to = 110
dpb_step = 10
# Load example stars which are included in the package
obt_method = "FileManager"
quasars_path = PackageReader.getSamplePath("qso")
be_stars_path = PackageReader.getSamplePath("be_stars")
In [3]:
all_descriptors = PackageReader().getClassesDict("descriptors")
all_deciders = PackageReader().getClassesDict("deciders")
print("Descriptors: {}\n".format(", ".join(all_descriptors.keys())))
print("Deciders: {}".format(", ".join(all_deciders.keys())))
Note: all_descriptors
and all_deciders
are dictionaries of descriptors and deciders classes which can be directly used. For example:
my_descriptor = all_descriptors["AbbeValueDescr"](bins=100)
is same as:
from lcc.stars_processing.descriptors import AbbeValueDescr
my_descriptor = AbbeValueDescr(bins=100)
In [4]:
abbe_descr = all_descriptors.get(descr_name1)
vario_slope_descr = all_descriptors.get(descr_name2)
decider = all_deciders.get(decid_name)
In [5]:
combinations = get_combinations([":".join([descr_name1, tun_param1]), ":".join([descr_name2, tun_param2])],
range(bin_from, bin_to, bin_step), range(dpb_from, dpb_to, dpb_step))
tun_params = parse_tun_query(combinations)
In [6]:
tun_params[:3]
Out[6]:
In [7]:
quasars = FileManager({"path": quasars_path}).getStars()
be_stars = FileManager({"path": be_stars_path}).getStars()
In [8]:
quasars = [st for st in quasars if st.lightCurve and len(st.lightCurve.mag) >= bin_to]
be_stars = [st for st in be_stars if st.lightCurve and len(st.lightCurve.mag) >= bin_to]
In [9]:
quasars_train, quasars_test = train_test_split(quasars, train_size=0.8)
be_stars_train, be_stars_test = train_test_split(be_stars, train_size=0.8)
In [10]:
import random
random.shuffle(quasars)
for star in quasars[:3]:
print("*"*20)
print(star)
print()
In [11]:
plotStarsPicture(quasars[:3])
In [12]:
# Light curves description
quasars[0].lightCurve.meta
Out[12]:
In [13]:
# Estimate all combinations and get the best one
es = ParamsEstimator(searched=quasars_train,
others=be_stars_train,
descriptors=[abbe_descr, vario_slope_descr],
deciders=[decider],
tuned_params=tun_params)
star_filter, best_stats, best_params = es.fit()
In [14]:
print("Optimal parameters: {}".format(es.tuned_params[0]))
pd.DataFrame(es.stats)
Out[14]:
In [15]:
star_filter.getSpaceCoordinates(quasars[:5])
Out[15]:
In [16]:
prediction_quasars = star_filter.evaluateStars(quasars_test)
prediction_be_stars = star_filter.evaluateStars(be_stars_test)
In [17]:
prediction_quasars.head()
Out[17]:
In [18]:
plotProbabSpace(star_filter)
In [19]:
# This should return a star from OGLEII
ogle_star = OgleII({"starid": 1, "field_num": 2, "target": "lmc"}).getStars()
In [20]:
# Query for OgleII
db_name = "OgleII"
starid_from = 1
starid_to = 10
field_num_from = 1
field_num_to = 2
target = "lmc"
In [21]:
# Prepare queries and run sequential systematic search by using filter
queries = get_combinations(["starid", "field_num", "target"],
range(starid_from, starid_to),
range(field_num_from, field_num_to),
[target])
searcher = StarsSearcher([star_filter],
db_connector=db_name,
stat_file_path="/tmp/lcc_status.csv",
save_coords=True)
searcher.queryStars(queries)
passed_stars = searcher.getPassedStars()
In [22]:
searcher.getStatus()
Out[22]:
In [23]:
saveStars(quasars[:5], "/tmp")
Out[23]:
In [24]:
redis_searcher = StarsSearcherRedis([star_filter], db_connector=db_name, save_coords=True)
redis_searcher.queryStars(queries)
passed_stars = redis_searcher.getPassedStars()
In [26]:
# redis_searcher.getStatus()
In [ ]: