online serving V8

microsoft · you-n-g · May 17, 2021 · Feb 16, 2021 · Feb 17, 2021 · Feb 26, 2021
commit 319396c815c4ac1e5a07d8d2a64623ff0a14a1ba
diff --git a/examples/model_rolling/task_manager_rolling.py b/examples/model_rolling/task_manager_rolling.py
@@ -15,6 +15,11 @@
 from qlib.model.ens.group import RollingGroup
 from qlib.model.trainer import TrainerRM
 
+"""
+This example shows how a Trainer work based on TaskManager with rolling tasks.
+After training, how to collect the rolling results will be showed in task_collecting.
+"""
+
 data_handler_config = {
     "start_time": "2008-01-01",
     "end_time": "2020-08-01",
@@ -71,81 +76,83 @@
     "record": record_config,
 }
 
-# Reset all things to the first status, be careful to save important data
-def reset(task_pool, exp_name):
-    print("========== reset ==========")
-    TaskManager(task_pool=task_pool).remove()
-
-    exp = R.get_exp(experiment_name=exp_name)
-
-    for rid in exp.list_recorders():
-        exp.delete_recorder(rid)
-
-
-# This part corresponds to "Task Generating" in the document
-def task_generating():
-
-    print("========== task_generating ==========")
-
-    tasks = task_generator(
-        tasks=[task_xgboost_config, task_lgb_config],
-        generators=RollingGen(step=550, rtype=RollingGen.ROLL_SD),  # generate different date segment
-    )
-
-    pprint(tasks)
-
-    return tasks
-
-
-def task_training(tasks, task_pool, exp_name):
-    trainer = TrainerRM(exp_name, task_pool)
-    trainer.train(tasks)
-
-
-# This part corresponds to "Task Collecting" in the document
-def task_collecting(exp_name):
-    print("========== task_collecting ==========")
-
-    def rec_key(recorder):
-        task_config = recorder.load_object("task")
-        model_key = task_config["model"]["class"]
-        rolling_key = task_config["dataset"]["kwargs"]["segments"]["test"]
-        return model_key, rolling_key
-
-    def my_filter(recorder):
-        # only choose the results of "LGBModel"
-        model_key, rolling_key = rec_key(recorder)
-        if model_key == "LGBModel":
-            return True
-        return False
-
-    artifact = ens_workflow(
-        RecorderCollector(exp_name=exp_name, rec_key_func=rec_key, rec_filter_func=my_filter),
-        RollingGroup(),
-    )
-    print(artifact)
-
-
-def main(
-    provider_uri="~/.qlib/qlib_data/cn_data",
-    task_url="mongodb://10.0.0.4:27017/",
-    task_db_name="rolling_db",
-    experiment_name="rolling_exp",
-    task_pool="rolling_task",
-):
-    mongo_conf = {
-        "task_url": task_url,
-        "task_db_name": task_db_name,
-    }
-    qlib.init(provider_uri=provider_uri, region=REG_CN, mongo=mongo_conf)
 
-    reset(task_pool, experiment_name)
-    tasks = task_generating()
-    task_training(tasks, task_pool, experiment_name)
-    task_collecting(experiment_name)
+class RollingTaskExample:
+    def __init__(
+        self,
+        provider_uri="~/.qlib/qlib_data/cn_data",
+        region=REG_CN,
+        task_url="mongodb://10.0.0.4:27017/",
+        task_db_name="rolling_db",
+        experiment_name="rolling_exp",
+        task_pool="rolling_task",
+        task_config=[task_xgboost_config, task_lgb_config],
+        rolling_step=550,
+        rolling_type=RollingGen.ROLL_SD,
+    ):
+        # TaskManager config
+        mongo_conf = {
+            "task_url": task_url,
+            "task_db_name": task_db_name,
+        }
+        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
+        self.experiment_name = experiment_name
+        self.task_pool = task_pool
+        self.task_config = task_config
+        self.rolling_gen = RollingGen(step=rolling_step, rtype=rolling_type)
+
+    # Reset all things to the first status, be careful to save important data
+    def reset(self):
+        print("========== reset ==========")
+        TaskManager(task_pool=self.task_pool).remove()
+        exp = R.get_exp(experiment_name=self.experiment_name)
+        for rid in exp.list_recorders():
+            exp.delete_recorder(rid)
+
+    def task_generating(self):
+        print("========== task_generating ==========")
+        tasks = task_generator(
+            tasks=self.task_config,
+            generators=self.rolling_gen,  # generate different date segments
+        )
+        pprint(tasks)
+        return tasks
+
+    def task_training(self, tasks):
+        print("========== task_training ==========")
+        trainer = TrainerRM(self.experiment_name, self.task_pool)
+        trainer.train(tasks)
+
+    def task_collecting(self):
+        print("========== task_collecting ==========")
+
+        def rec_key(recorder):
+            task_config = recorder.load_object("task")
+            model_key = task_config["model"]["class"]
+            rolling_key = task_config["dataset"]["kwargs"]["segments"]["test"]
+            return model_key, rolling_key
+
+        def my_filter(recorder):
+            # only choose the results of "LGBModel"
+            model_key, rolling_key = rec_key(recorder)
+            if model_key == "LGBModel":
+                return True
+            return False
+
+        artifact = ens_workflow(
+            RecorderCollector(exp_name=self.experiment_name, rec_key_func=rec_key, rec_filter_func=my_filter),
+            RollingGroup(),
+        )
+        print(artifact)
+
+    def main(self):
+        self.reset()
+        tasks = self.task_generating()
+        self.task_training(tasks)
+        self.task_collecting()
 
 
 if __name__ == "__main__":
     ## to see the whole process with your own parameters, use the command below
-    # python update_online_pred.py main --experiment_name="your_exp_name"
-    fire.Fire()
+    # python task_manager_rolling.py main --experiment_name="your_exp_name"
+    fire.Fire(RollingTaskExample)
diff --git a/examples/online_srv/online_management_simulate.py b/examples/online_srv/online_management_simulate.py
@@ -11,7 +11,7 @@
 from qlib.workflow.task.manage import TaskManager
 
 """
-This examples is about the OnlineManager and OnlineSimulator based on Rolling tasks. 
+This examples is about the OnlineManager and OnlineSimulator based on rolling tasks. 
 The OnlineManager will focus on the updating of your online models.
 The OnlineSimulator will focus on the simulating real updating routine of your online models.
 """

diff --git a/...srv/task_manager_rolling_with_updating.py → ...s/online_srv/rolling_online_management.py b/...srv/task_manager_rolling_with_updating.py → ...s/online_srv/rolling_online_management.py
@@ -1,18 +1,21 @@
-from pprint import pprint
-
+import os
+from pathlib import Path
+import pickle
 import fire
 import qlib
-from qlib.config import REG_CN
-from qlib.model.trainer import task_train
 from qlib.workflow import R
-from qlib.workflow.task.collect import RecorderCollector
-from qlib.model.ens.ensemble import RollingEnsemble, ens_workflow
-from qlib.workflow.task.gen import RollingGen, task_generator
-from qlib.workflow.task.manage import TaskManager, run_task
+from qlib.workflow.task.gen import RollingGen
+from qlib.workflow.task.manage import TaskManager
 from qlib.workflow.online.manager import RollingOnlineManager
 from qlib.workflow.task.utils import list_recorders
 from qlib.model.trainer import TrainerRM
-from qlib.model.ens.group import RollingGroup
+
+"""
+This example show how RollingOnlineManager works with rolling tasks.
+There are two parts including first train and routine.
+Firstly, the RollingOnlineManager will finish the first training and set trained models to `online` models.
+Next, the RollingOnlineManager will finish a routine process, including update online prediction -> prepare signals -> prepare tasks -> prepare new models -> reset online models
+"""
 
 data_handler_config = {
     "start_time": "2013-01-01",
@@ -89,92 +92,38 @@ def __init__(
             "task_db_name": task_db_name,  # database name
         }
         qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
-
-        self.rolling_gen = RollingGen(step=rolling_step, rtype=RollingGen.ROLL_SD)
-        self.trainer = TrainerRM(self.exp_name, self.task_pool)
-        self.task_manager = TaskManager(self.task_pool)
         self.rolling_online_manager = RollingOnlineManager(
-            experiment_name=exp_name, rolling_gen=self.rolling_gen, trainer=self.trainer
-        )
-
-    def print_online_model(self):
-        print("========== print_online_model ==========")
-        print("Current 'online' model:")
-
-        for rec in self.rolling_online_manager.online_models():
-            print(rec.info["id"])
-        print("Current 'next online' model:")
-        for rid, rec in list_recorders(self.exp_name).items():
-            if self.rolling_online_manager.get_online_tag(rec) == self.rolling_online_manager.NEXT_ONLINE_TAG:
-                print(rid)
-
-    # This part corresponds to "Task Generating" in the document
-    def task_generating(self):
-
-        print("========== task_generating ==========")
-
-        tasks = task_generator(
-            tasks=[task_xgboost_config, task_lgb_config],
-            generators=self.rolling_gen,  # generate different date segment
+            experiment_name=exp_name,
+            rolling_gen=RollingGen(step=rolling_step, rtype=RollingGen.ROLL_SD),
+            trainer=TrainerRM(self.exp_name, self.task_pool),
         )
 
-        pprint(tasks)
-
-        return tasks
-
-    def task_training(self, tasks):
-        # self.trainer.train(tasks)
-        self.rolling_online_manager.prepare_new_models(tasks, tag=RollingOnlineManager.ONLINE_TAG)
-
-    # This part corresponds to "Task Collecting" in the document
-    def task_collecting(self):
-        print("========== task_collecting ==========")
-
-        def rec_key(recorder):
-            task_config = recorder.load_object("task")
-            model_key = task_config["model"]["class"]
-            rolling_key = task_config["dataset"]["kwargs"]["segments"]["test"]
-            return model_key, rolling_key
-
-        def my_filter(recorder):
-            # only choose the results of "LGBModel"
-            model_key, rolling_key = rec_key(recorder)
-            if model_key == "LGBModel":
-                return True
-            return False
-
-        artifact = ens_workflow(
-            RecorderCollector(exp_name=self.exp_name, rec_key_func=rec_key, rec_filter_func=my_filter), RollingGroup()
-        )
-        print(artifact)
+    _ROLLING_MANAGER_PATH = ".rolling_manager"  # the RollingOnlineManager will dump to this file, for it will be loaded when calling routine.
 
     # Reset all things to the first status, be careful to save important data
     def reset(self):
         print("========== reset ==========")
-        self.task_manager.remove()
+        TaskManager(self.task_pool).remove()
         exp = R.get_exp(experiment_name=self.exp_name)
         for rid in exp.list_recorders():
             exp.delete_recorder(rid)
 
-    # Run this firstly to see the workflow in Task Management
+        if os.path.exists(self._ROLLING_MANAGER_PATH):
+            os.remove(self._ROLLING_MANAGER_PATH)
+
     def first_run(self):
         print("========== first_run ==========")
         self.reset()
-
-        tasks = self.task_generating()
-        pprint(tasks)
-        self.task_training(tasks)
-        self.task_collecting()
-
-        # latest_rec, _ = self.rolling_online_manager.list_latest_recorders()
-        # self.rolling_online_manager.reset_online_tag(list(latest_rec.values()))
+        self.rolling_online_manager.first_train([task_xgboost_config, task_lgb_config])
+        self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
+        print(self.rolling_online_manager.collect_artifact())
 
     def routine(self):
         print("========== routine ==========")
-        self.print_online_model()
+        with Path(self._ROLLING_MANAGER_PATH).open("rb") as f:
+            self.rolling_online_manager = pickle.load(f)
         self.rolling_online_manager.routine()
-        self.print_online_model()
-        self.task_collecting()
+        print(self.rolling_online_manager.collect_artifact())
 
     def main(self):
         self.first_run()

diff --git a/examples/online_srv/update_online_pred.py b/examples/online_srv/update_online_pred.py
@@ -5,6 +5,13 @@
 from qlib.workflow.online.manager import OnlineManagerR
 from qlib.workflow.task.utils import list_recorders
 
+"""
+This example show how OnlineManager works when we need update prediction.
+There are two parts including first_train and update_online_pred.
+Firstly, the RollingOnlineManager will finish the first training and set the trained model to `online` model.
+Next, the RollingOnlineManager will finish updating online prediction
+"""
+
 data_handler_config = {
     "start_time": "2008-01-01",
     "end_time": "2020-08-01",
@@ -52,31 +59,25 @@
 }
 
 
-def first_train(experiment_name="online_srv"):
-
-    rec = task_train(task_config=task, experiment_name=experiment_name)
-
-    online_manager = OnlineManagerR(experiment_name)
-    online_manager.reset_online_tag(rec)
-
-
-def update_online_pred(experiment_name="online_srv"):
-
-    online_manager = OnlineManagerR(experiment_name)
-
-    print("Here are the online models waiting for update:")
-    for rid, rec in list_recorders(experiment_name).items():
-        if online_manager.get_online_tag(rec) == OnlineManagerR.ONLINE_TAG:
-            print(rid)
+class UpdatePredExample:
+    def __init__(
+        self, provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv", task_config=task
+    ):
+        qlib.init(provider_uri=provider_uri, region=region)
+        self.experiment_name = experiment_name
+        self.online_manager = OnlineManagerR(self.experiment_name)
+        self.task_config = task_config
 
-    online_manager.update_online_pred()
+    def first_train(self):
+        rec = task_train(self.task_config, experiment_name=self.experiment_name)
+        self.online_manager.reset_online_tag(rec)  # set to online model
 
+    def update_online_pred(self):
+        self.online_manager.update_online_pred()
 
-def main(provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_name="online_srv"):
-    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
-    qlib.init(provider_uri=provider_uri, region=region)
-    first_train(experiment_name)
-    update_online_pred(experiment_name)
+    def main(self):
+        self.first_train()
+        self.update_online_pred()
 
 
 if __name__ == "__main__":
@@ -86,4 +87,4 @@ def main(provider_uri="~/.qlib/qlib_data/cn_data", region=REG_CN, experiment_nam
     # python update_online_pred.py update_online_pred
     ## to see the whole process with your own parameters, use the command below
     # python update_online_pred.py main --experiment_name="your_exp_name"
-    fire.Fire()
+    fire.Fire(UpdatePredExample)
diff --git a/qlib/model/trainer.py b/qlib/model/trainer.py
@@ -135,3 +135,12 @@ def train(self, tasks: list, train_func=None, *args, **kwargs):
         for _id in _id_list:
             recs.append(tm.re_query(_id)["res"])
         return recs
+
+
+class DelayTrainer(Trainer):
+    def fake_train(self):
+        self.fake_trained = []
+
+    def train(self):
+        for rec in self.fake_trained:
+            pass