From 9174dfe11be6d971319cbefe18cb359985a33b22 Mon Sep 17 00:00:00 2001 From: Yuchen Fang Date: Mon, 26 Sep 2022 13:44:00 +0800 Subject: [PATCH 1/7] feat(data): :sparkles: add a general highfreq data handler for open source Add HighFreqOpenHandler and HighFreqOpenBacktestHandler for data pipeline without paused_num information. --- qlib/contrib/data/highfreq_handler.py | 127 ++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py index 4898725da9..8e07b23e56 100644 --- a/qlib/contrib/data/highfreq_handler.py +++ b/qlib/contrib/data/highfreq_handler.py @@ -110,6 +110,91 @@ def get_normalized_price_feature(price_field, shift=0): return fields, names +class HighFreqOpenHandler(HighFreqHandler): + def __init__( + self, + instruments="csi300", + start_time=None, + end_time=None, + infer_processors=[], + learn_processors=[], + fit_start_time=None, + fit_end_time=None, + drop_raw=True, + day_length=240, + ): + super().__init__( + instruments=instruments, + start_time=start_time, + end_time=end_time, + infer_processors=infer_processors, + learn_processors=learn_processors, + fit_start_time=fit_start_time, + fit_end_time=fit_end_time, + drop_raw=drop_raw, + ) + self.day_length = day_length + + def get_feature_config(self): + fields = [] + names = [] + + template_if = "If(IsNull({1}), {0}, {1})" + template_paused = f"Cut({{0}}, {self.day_length * 2}, None)" + + def get_normalized_price_feature(price_field, shift=0): + # norm with the close price of 237th minute of yesterday. + if shift == 0: + template_norm = f"{{0}}/DayLast(Ref({{1}}, {self.day_length * 2}))" + else: + template_norm = f"Ref({{0}}, " + str(shift) + ")/DayLast(Ref({{1}}, {self.day_length}))" + + template_fillnan = "FFillNan({0})" + # calculate -> ffill -> remove paused + feature_ops = template_paused.format( + template_fillnan.format( + template_norm.format(template_if.format("$close", price_field), template_fillnan.format("$close")) + ) + ) + return feature_ops + + fields += [get_normalized_price_feature("$open", 0)] + fields += [get_normalized_price_feature("$high", 0)] + fields += [get_normalized_price_feature("$low", 0)] + fields += [get_normalized_price_feature("$close", 0)] + fields += [get_normalized_price_feature("$vwap", 0)] + names += ["$open", "$high", "$low", "$close", "$vwap"] + + fields += [get_normalized_price_feature("$open", self.day_length)] + fields += [get_normalized_price_feature("$high", self.day_length)] + fields += [get_normalized_price_feature("$low", self.day_length)] + fields += [get_normalized_price_feature("$close", self.day_length)] + names += ["$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1"] + + # calculate and fill nan with 0 + fields += [ + template_paused.format( + "If(IsNull({0}), 0, {0})".format( + f"{{0}}/Ref(DayLast(Mean({{0}}, {self.day_length * 30})), {self.day_length})".format("$volume") + ) + ) + ] + names += ["$volume"] + + fields += [ + template_paused.format( + "If(IsNull({0}), 0, {0})".format( + f"Ref({{0}}, {self.day_length})/Ref(DayLast(Mean({{0}}, {self.day_length * 30})), {self.day_length})".format( + "$volume" + ) + ) + ) + ] + names += ["$volume_1"] + + return fields, names + + class HighFreqBacktestHandler(DataHandler): def __init__( self, @@ -162,6 +247,48 @@ def get_feature_config(self): return fields, names +class HighFreqOpenBacktestHandler(HighFreqBacktestHandler): + def __init__( + self, + instruments="csi300", + start_time=None, + end_time=None, + day_length=240, + ): + super().__init__( + instruments=instruments, + start_time=start_time, + end_time=end_time, + ) + self.day_length = day_length + + def get_feature_config(self): + fields = [] + names = [] + + template_paused = f"Cut({{0}}, {self.day_length * 2}, None)" + # template_paused = "{0}" + template_fillnan = "FFillNan({0})" + template_if = "If(IsNull({1}), {0}, {1})" + fields += [ + template_paused.format(template_fillnan.format("$close")), + ] + names += ["$close0"] + + fields += [ + template_paused.format(template_if.format(template_fillnan.format("$close"), "$vwap")), + ] + names += ["$vwap0"] + + fields += [ + template_paused.format( + "If(IsNull({0}), 0, {0})".format('$volume') + ) + ] + names += ["$volume0"] + + return fields, names + class HighFreqOrderHandler(DataHandlerLP): def __init__( From 6072eeabb74d5f22d971460011385ca503d3f6a4 Mon Sep 17 00:00:00 2001 From: mingzhehan Date: Mon, 26 Sep 2022 06:01:29 +0000 Subject: [PATCH 2/7] fix: position of parameter init --- qlib/contrib/data/highfreq_handler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py index 8e07b23e56..6589f04799 100644 --- a/qlib/contrib/data/highfreq_handler.py +++ b/qlib/contrib/data/highfreq_handler.py @@ -123,6 +123,7 @@ def __init__( drop_raw=True, day_length=240, ): + self.day_length = day_length super().__init__( instruments=instruments, start_time=start_time, @@ -133,7 +134,6 @@ def __init__( fit_end_time=fit_end_time, drop_raw=drop_raw, ) - self.day_length = day_length def get_feature_config(self): fields = [] @@ -147,7 +147,7 @@ def get_normalized_price_feature(price_field, shift=0): if shift == 0: template_norm = f"{{0}}/DayLast(Ref({{1}}, {self.day_length * 2}))" else: - template_norm = f"Ref({{0}}, " + str(shift) + ")/DayLast(Ref({{1}}, {self.day_length}))" + template_norm = f"Ref({{0}}, " + str(shift) + f")/DayLast(Ref({{1}}, {self.day_length}))" template_fillnan = "FFillNan({0})" # calculate -> ffill -> remove paused @@ -169,6 +169,7 @@ def get_normalized_price_feature(price_field, shift=0): fields += [get_normalized_price_feature("$high", self.day_length)] fields += [get_normalized_price_feature("$low", self.day_length)] fields += [get_normalized_price_feature("$close", self.day_length)] + fields += [get_normalized_price_feature("$vwap", self.day_length)] names += ["$open_1", "$high_1", "$low_1", "$close_1", "$vwap_1"] # calculate and fill nan with 0 @@ -255,12 +256,12 @@ def __init__( end_time=None, day_length=240, ): + self.day_length = day_length super().__init__( instruments=instruments, start_time=start_time, end_time=end_time, ) - self.day_length = day_length def get_feature_config(self): fields = [] From 49f66dd860c9e420b769c534ca7d3c32add1afeb Mon Sep 17 00:00:00 2001 From: Yuchen Fang Date: Mon, 26 Sep 2022 14:08:59 +0800 Subject: [PATCH 3/7] style(data): :lipstick: rename open to general --- qlib/contrib/data/highfreq_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py index 6589f04799..797d6ee718 100644 --- a/qlib/contrib/data/highfreq_handler.py +++ b/qlib/contrib/data/highfreq_handler.py @@ -110,7 +110,7 @@ def get_normalized_price_feature(price_field, shift=0): return fields, names -class HighFreqOpenHandler(HighFreqHandler): +class HighFreqGeneralHandler(HighFreqHandler): def __init__( self, instruments="csi300", @@ -248,7 +248,7 @@ def get_feature_config(self): return fields, names -class HighFreqOpenBacktestHandler(HighFreqBacktestHandler): +class HighFreqGeneralBacktestHandler(HighFreqBacktestHandler): def __init__( self, instruments="csi300", From ba483a7fd09104c909a2a1b9888f166101651789 Mon Sep 17 00:00:00 2001 From: Yuchen Fang Date: Mon, 26 Sep 2022 14:12:23 +0800 Subject: [PATCH 4/7] style(data): :lipstick: lint --- qlib/contrib/data/highfreq_handler.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py index 797d6ee718..1895830c2d 100644 --- a/qlib/contrib/data/highfreq_handler.py +++ b/qlib/contrib/data/highfreq_handler.py @@ -248,6 +248,7 @@ def get_feature_config(self): return fields, names + class HighFreqGeneralBacktestHandler(HighFreqBacktestHandler): def __init__( self, @@ -281,11 +282,7 @@ def get_feature_config(self): ] names += ["$vwap0"] - fields += [ - template_paused.format( - "If(IsNull({0}), 0, {0})".format('$volume') - ) - ] + fields += [template_paused.format("If(IsNull({0}), 0, {0})".format("$volume"))] names += ["$volume0"] return fields, names From 4b3cb9b096cffd32a0b79d768ade1b8e3a2a0cba Mon Sep 17 00:00:00 2001 From: Yuchen Fang Date: Tue, 11 Oct 2022 11:13:11 +0800 Subject: [PATCH 5/7] style: :lipstick: delete useless comment & fix inheritance relation --- qlib/contrib/data/highfreq_handler.py | 41 +++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py index 1895830c2d..10ec297591 100644 --- a/qlib/contrib/data/highfreq_handler.py +++ b/qlib/contrib/data/highfreq_handler.py @@ -110,7 +110,7 @@ def get_normalized_price_feature(price_field, shift=0): return fields, names -class HighFreqGeneralHandler(HighFreqHandler): +class HighFreqGeneralHandler(DataHandlerLP): def __init__( self, instruments="csi300", @@ -124,14 +124,36 @@ def __init__( day_length=240, ): self.day_length = day_length + def check_transform_proc(proc_l): + new_l = [] + for p in proc_l: + p["kwargs"].update( + { + "fit_start_time": fit_start_time, + "fit_end_time": fit_end_time, + } + ) + new_l.append(p) + return new_l + + infer_processors = check_transform_proc(infer_processors) + learn_processors = check_transform_proc(learn_processors) + + data_loader = { + "class": "QlibDataLoader", + "kwargs": { + "config": self.get_feature_config(), + "swap_level": False, + "freq": "1min", + }, + } super().__init__( instruments=instruments, start_time=start_time, end_time=end_time, + data_loader=data_loader, infer_processors=infer_processors, learn_processors=learn_processors, - fit_start_time=fit_start_time, - fit_end_time=fit_end_time, drop_raw=drop_raw, ) @@ -249,7 +271,7 @@ def get_feature_config(self): return fields, names -class HighFreqGeneralBacktestHandler(HighFreqBacktestHandler): +class HighFreqGeneralBacktestHandler(DataHandler): def __init__( self, instruments="csi300", @@ -258,10 +280,19 @@ def __init__( day_length=240, ): self.day_length = day_length + data_loader = { + "class": "QlibDataLoader", + "kwargs": { + "config": self.get_feature_config(), + "swap_level": False, + "freq": "1min", + }, + } super().__init__( instruments=instruments, start_time=start_time, end_time=end_time, + data_loader=data_loader, ) def get_feature_config(self): @@ -269,7 +300,6 @@ def get_feature_config(self): names = [] template_paused = f"Cut({{0}}, {self.day_length * 2}, None)" - # template_paused = "{0}" template_fillnan = "FFillNan({0})" template_if = "If(IsNull({1}), {0}, {1})" fields += [ @@ -481,7 +511,6 @@ def get_feature_config(self): template_if = "If(IsNull({1}), {0}, {1})" template_paused = "Select(Gt($hx_paused_num, 1.001), {0})" - # template_paused = "{0}" template_fillnan = "FFillNan({0})" fields += [ template_fillnan.format(template_paused.format("$close")), From e995f58c93e93d05271cec773eda7769b1a763c7 Mon Sep 17 00:00:00 2001 From: Yuchen Fang Date: Tue, 11 Oct 2022 11:22:10 +0800 Subject: [PATCH 6/7] style: :lipstick: lint --- qlib/contrib/data/highfreq_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py index 10ec297591..e60a59d15f 100644 --- a/qlib/contrib/data/highfreq_handler.py +++ b/qlib/contrib/data/highfreq_handler.py @@ -124,6 +124,7 @@ def __init__( day_length=240, ): self.day_length = day_length + def check_transform_proc(proc_l): new_l = [] for p in proc_l: From fef972d0f1baad80ce107bd691811c3590dc373d Mon Sep 17 00:00:00 2001 From: Yuchen Fang Date: Wed, 12 Oct 2022 14:41:16 +0800 Subject: [PATCH 7/7] style: :lipstick: remove duplicated function --- qlib/contrib/data/highfreq_handler.py | 48 +++++---------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/qlib/contrib/data/highfreq_handler.py b/qlib/contrib/data/highfreq_handler.py index e60a59d15f..373b8e669d 100644 --- a/qlib/contrib/data/highfreq_handler.py +++ b/qlib/contrib/data/highfreq_handler.py @@ -1,5 +1,7 @@ from qlib.data.dataset.handler import DataHandler, DataHandlerLP +from .handler import check_transform_proc + EPSILON = 1e-4 @@ -15,20 +17,9 @@ def __init__( fit_end_time=None, drop_raw=True, ): - def check_transform_proc(proc_l): - new_l = [] - for p in proc_l: - p["kwargs"].update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } - ) - new_l.append(p) - return new_l - infer_processors = check_transform_proc(infer_processors) - learn_processors = check_transform_proc(learn_processors) + infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) + learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) data_loader = { "class": "QlibDataLoader", @@ -125,20 +116,8 @@ def __init__( ): self.day_length = day_length - def check_transform_proc(proc_l): - new_l = [] - for p in proc_l: - p["kwargs"].update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } - ) - new_l.append(p) - return new_l - - infer_processors = check_transform_proc(infer_processors) - learn_processors = check_transform_proc(learn_processors) + infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) + learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) data_loader = { "class": "QlibDataLoader", @@ -331,20 +310,9 @@ def __init__( fit_end_time=None, drop_raw=True, ): - def check_transform_proc(proc_l): - new_l = [] - for p in proc_l: - p["kwargs"].update( - { - "fit_start_time": fit_start_time, - "fit_end_time": fit_end_time, - } - ) - new_l.append(p) - return new_l - infer_processors = check_transform_proc(infer_processors) - learn_processors = check_transform_proc(learn_processors) + infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time) + learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time) data_loader = { "class": "QlibDataLoader",