Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DDG-DA paper code #743

Merged
merged 47 commits into from
Jan 10, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
48f8694
Merge data selection to main
wendili-cs Jul 1, 2021
5bb06cd
Update trainer for reweighter
wendili-cs Jul 1, 2021
4f442f5
Typos fixed.
wendili-cs Jul 8, 2021
da013fd
Merge branch 'main' into ds
you-n-g Jul 30, 2021
81b4383
update data selection interface
you-n-g Aug 9, 2021
aa2699f
successfully run exp after refactor some interface
you-n-g Aug 13, 2021
d17aaac
data selection share handler & trainer
you-n-g Aug 20, 2021
82b4115
fix meta model time series bug
you-n-g Aug 22, 2021
5b118c4
fix online workflow set_uri bug
you-n-g Sep 13, 2021
3b073f7
fix set_uri bug
you-n-g Sep 26, 2021
384b670
Merge remote-tracking branch 'origin/main' into ds
you-n-g Sep 26, 2021
b0850b0
updawte ds docs and delay trainer bug
you-n-g Sep 27, 2021
051b261
Merge remote-tracking branch 'wd_ds/ds' into ds
you-n-g Oct 9, 2021
f10d726
Merge branch 'main' into ds
you-n-g Nov 14, 2021
cdcfe30
Merge remote-tracking branch 'origin/main' into ds
you-n-g Nov 14, 2021
6d61ad0
Merge remote-tracking branch 'origin/main' into ds
you-n-g Nov 16, 2021
f32a7ad
docs
you-n-g Nov 16, 2021
8fb37b6
resume reweighter
you-n-g Nov 16, 2021
21baead
add reweighting result
you-n-g Nov 16, 2021
12afe61
fix qlib model import
you-n-g Nov 17, 2021
1d9732b
make recorder more friendly
you-n-g Nov 17, 2021
20a8fe5
fix experiment workflow bug
you-n-g Nov 18, 2021
faf3e03
commit for merging master incase of conflictions
you-n-g Dec 9, 2021
76d1bd9
Merge remote-tracking branch 'origin/main' into ds
you-n-g Dec 9, 2021
3bc4030
Successful run DDG-DA with a single command
you-n-g Dec 11, 2021
49c4074
remove unused code
you-n-g Dec 11, 2021
ce66d9a
asdd more docs
you-n-g Dec 13, 2021
cea134d
Update README.md
you-n-g Dec 13, 2021
a4a2b32
Update & fix some bugs.
demon143 Jan 8, 2022
8241832
Update configuration & remove debug functions
wendili-cs Jan 8, 2022
e1b079d
Update README.md
wendili-cs Jan 9, 2022
6a3f471
Modfify horizon from code rather than yaml
wendili-cs Jan 9, 2022
c3364cd
Update performance in README.md
wendili-cs Jan 9, 2022
b3d1081
Merge remote-tracking branch 'origin/main' into ds
you-n-g Jan 9, 2022
fa2d047
fix part comments
you-n-g Jan 9, 2022
efab5cb
Remove unfinished TCTS.
wendili-cs Jan 10, 2022
5a184eb
Fix some details.
wendili-cs Jan 10, 2022
8fee1b4
Update meta docs
wendili-cs Jan 10, 2022
a31a4d5
Update README.md of the benchmarks_dynamic
wendili-cs Jan 10, 2022
ca3fe76
Merge branch 'main' into ds
you-n-g Jan 10, 2022
97f61d5
Update README.md files
wendili-cs Jan 10, 2022
2726560
Merge branch 'ds' of wd_git:you-n-g/qlib into ds
wendili-cs Jan 10, 2022
da68103
Add README.md to the rolling_benchmark baseline.
wendili-cs Jan 10, 2022
7e1183b
Refine the docs and link
you-n-g Jan 10, 2022
b0857c2
Rename README.md in benchmarks_dynamic.
wendili-cs Jan 10, 2022
38b83dd
Remove comments.
wendili-cs Jan 10, 2022
34f5bd2
auto download data
you-n-g Jan 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ data_handler_config: &data_handler_config
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
Expand Down
29 changes: 18 additions & 11 deletions examples/benchmarks_dynamic/DDG-DA/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
DIRNAME = Path(__file__).absolute().resolve().parent
sys.path.append(str(DIRNAME.parent / "baseline"))
from rolling_benchmark import RollingBenchmark # NOTE: sys.path is changed for import RollingBenchmark
import torch

torch.manual_seed(43)


class DDGDA:
Expand All @@ -32,15 +35,17 @@ class DDGDA:
- `rm -r mlruns`
"""

def __init__(self) -> None:
def __init__(self, model_types=["linear", "linear"]) -> None:
self.step = 20
# NOTE:
# the horizon must match the meaning in the base task template
self.horizon = 1
self.horizon = 20
self.meta_exp_name = "DDG-DA"
self.model_types = model_types # first for calculate IC, second for forecasting models' type

def get_feature_importance(self):
rb = RollingBenchmark()
# this must be lightGBM, because it needs to get the feature importance
rb = RollingBenchmark(model_type="gbdt")
task = rb.basic_task()

model = init_instance_by_config(task["model"])
Expand All @@ -67,7 +72,7 @@ def dump_data_for_proxy_model(self):
fi = self.get_feature_importance()
col_selected = fi.nlargest(topk)

rb = RollingBenchmark()
rb = RollingBenchmark(model_type=self.model_types[0])
task = rb.basic_task()
dataset = init_instance_by_config(task["dataset"])
prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
Expand Down Expand Up @@ -105,10 +110,12 @@ def dump_meta_ipt(self):
Dump data for training meta model.
This function will dump the input data for meta model
"""
rb = RollingBenchmark()
# According to the experiments, the choice of the model type is very important for achieving good results
rb = RollingBenchmark(model_type=self.model_types[0])
sim_task = rb.basic_task()

sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 350})
if self.model_types[0] == "gbdt":
sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 150})

exp_name_sim = f"data_sim_s{self.step}"

Expand All @@ -125,7 +132,7 @@ def train_meta_model(self):

# 1) leverage the simplified proxy forecasting model to train meta model.
# - Only the dataset part is important, in current version of meta model will integrate the
rb = RollingBenchmark()
rb = RollingBenchmark(model_type=self.model_types[0])
sim_task = rb.basic_task()
proxy_forecast_model_task = {
# "model": "qlib.contrib.model.linear.LinearModel",
Expand All @@ -146,7 +153,7 @@ def train_meta_model(self):
kwargs = dict(
task_tpl=proxy_forecast_model_task,
step=self.step,
segments=0.5,
segments=0.62,
you-n-g marked this conversation as resolved.
Show resolved Hide resolved
trunc_days=1 + self.horizon,
hist_step_n=30,
fill_method="max",
Expand All @@ -163,7 +170,7 @@ def train_meta_model(self):
# 3) train and logging meta model
with R.start(experiment_name=self.meta_exp_name):
R.log_params(**kwargs)
mm = MetaModelDS(step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.0001)
mm = MetaModelDS(step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=200)
mm.fit(md)
R.save_objects(model=mm)

Expand Down Expand Up @@ -196,7 +203,7 @@ def meta_inference(self):
hist_step_n = int(param["hist_step_n"])
fill_method = param.get("fill_method", "max")

rb = RollingBenchmark()
rb = RollingBenchmark(model_type=self.model_types[1])
task_l = rb.create_rolling_tasks()

# 2.2) create meta dataset for final dataset
Expand Down Expand Up @@ -226,7 +233,7 @@ def train_and_eval_tasks(self):
"""
with self._task_path.open("rb") as f:
tasks = pickle.load(f)
rb = RollingBenchmark(rolling_exp="rolling_ds")
rb = RollingBenchmark(rolling_exp="rolling_ds", model_type=self.model_types[1])
rb.train_rolling_tasks(tasks)
rb.ens_rolling()
rb.update_rolling_rec()
Expand Down
6 changes: 4 additions & 2 deletions examples/benchmarks_dynamic/REAMDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ Modeling the dynamic of market is a very important problem in Quant research.

| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------------|---------|----|------|---------|-----------|-------------------|-------------------|--------------|
| RR[LightGBM] | | | | | | | | |
| DDG-DA[LightGBM] | | | | | | | | |
| RR[Linear] |Alpha158 |0.088|0.570|0.102 |0.622 |0.077 |1.175 |-0.086 |
| DDG-DA[Linear] |Alpha158 |0.093|0.622|0.106 |0.670 |0.085 |1.213 |-0.093 |
| RR[LightGBM] |Alpha158 |0.079|0.566|0.088 |0.592 |0.075 |1.226 |-0.096 |
| DDG-DA[LightGBM] |Alpha158 |0.084|0.639|0.093 |0.664 |0.099 |1.442 |-0.071 |
32 changes: 22 additions & 10 deletions examples/benchmarks_dynamic/baseline/rolling_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,37 @@ class RollingBenchmark:

"""

def __init__(self, rolling_exp="rolling_models") -> None:
def __init__(self, rolling_exp="rolling_models", model_type="linear") -> None:
self.step = 20
self.horizon = 1
self.horizon = 20
self.rolling_exp = rolling_exp
self.model_type = model_type

def basic_task(self):
"""For fast training rolling"""
conf_path = DIRNAME.parent.parent / "benchmarks" / "LightGBM" / "workflow_config_lightgbm_Alpha158.yaml"
if self.model_type == "gbdt":
conf_path = DIRNAME.parent.parent / "benchmarks" / "LightGBM" / "workflow_config_lightgbm_Alpha158.yaml"
# dump the processed data on to disk for later loading to speed up the processing
h_path = DIRNAME / "lightgbm_alpha158_handler.pkl"
elif self.model_type == "linear":
conf_path = DIRNAME.parent.parent / "benchmarks" / "Linear" / "workflow_config_linear_Alpha158.yaml"
h_path = DIRNAME / "linear_alpha158_handler.pkl"
you-n-g marked this conversation as resolved.
Show resolved Hide resolved
else:
raise AssertionError("Model type is not supported!")
with conf_path.open("r") as f:
conf = yaml.safe_load(f)
task = conf["task"]

# dump the processed data on to disk for later loading to speed up the processing
h_path = DIRNAME / "lightgbm_alpha158_handler.pkl"
# modify dataset horizon
conf["task"]["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
"Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
]

task = conf["task"]

if not h_path.exists():
h_conf = task["dataset"]["kwargs"]["handler"]
h = init_instance_by_config(h_conf)
h.to_pickle(h_path, dump_all=True)
# if not h_path.exists():
you-n-g marked this conversation as resolved.
Show resolved Hide resolved
h_conf = task["dataset"]["kwargs"]["handler"]
h = init_instance_by_config(h_conf)
h.to_pickle(h_path, dump_all=True)

task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
task["record"] = ["qlib.workflow.record_temp.SignalRecord"]
Expand Down
6 changes: 6 additions & 0 deletions qlib/contrib/meta/data_selection/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ def __init__(self, task: dict, meta_info: pd.DataFrame, mode: str = MetaTask.PRO
test_idx=d_test["label"].index,
)
)

# debug: record the test period of the current meta-task instance
self.test_period = (
you-n-g marked this conversation as resolved.
Show resolved Hide resolved
d_test["feature"].index.get_level_values("datetime")[0],
d_test["feature"].index.get_level_values("datetime")[-1],
)
# TODO: set device: I think this is not necessary to converting data format.
self.processed_meta_input = data_to_tensor(self.processed_meta_input)

Expand Down
9 changes: 7 additions & 2 deletions qlib/contrib/meta/data_selection/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(
clip_weight=2.0,
criterion="ic_loss",
lr=0.0001,
max_epoch=150,
max_epoch=100,
):
self.step = step
self.hist_step_n = hist_step_n
Expand All @@ -65,7 +65,7 @@ def __init__(
self.fitted = False

def run_epoch(self, phase, task_list, epoch, opt, loss_l, ignore_weight=False):
if phase == "train": # phase 0 for training, 1 for inference
if phase == "train":
self.tn.train()
torch.set_grad_enabled(True)
else:
Expand Down Expand Up @@ -141,6 +141,11 @@ def fit(self, meta_dataset: MetaDatasetDS):
phases = ["train", "test"]
meta_tasks_l = meta_dataset.prepare_tasks(phases)

if len(meta_tasks_l[1]):
R.log_params(
**dict(proxy_test_begin=meta_tasks_l[1][0].test_period)
) # debug: record when the test phase starts

self.tn = PredNet(
step=self.step, hist_step_n=self.hist_step_n, clip_weight=self.clip_weight, clip_method=self.clip_method
)
Expand Down