Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update manage.py #586

Merged
merged 8 commits into from
Sep 9, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions qlib/workflow/task/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ def __init__(self, task_pool: str):
task_pool: str
the name of Collection in MongoDB
"""
#self.task_pool is mongodb's connection
self.task_pool = getattr(get_mongodb(), task_pool)
self.task_pool:pymongo.collection.Collection = getattr(get_mongodb(), task_pool)
self.logger = get_module_logger(self.__class__.__name__)

@staticmethod
Expand All @@ -110,6 +109,19 @@ def _encode_task(self, task):
return task

def _decode_task(self, task):
"""
_decode_task is Serialization tool

Parameters
----------
task : dict
task information

Returns
-------
bson.objectid.ObjectId
demon143 marked this conversation as resolved.
Show resolved Hide resolved
Convert dict to bson
"""
for prefix in self.ENCODE_FIELDS_PREFIX:
for k in list(task.keys()):
if k.startswith(prefix):
Expand Down Expand Up @@ -216,7 +228,6 @@ def create_task(self, task_def_l, dry_run=False, print_nt=False) -> List[str]:
new_tasks = []
_id_list = []
for t in task_def_l:
#self.task_pool: XXX = getattr(...)
try:
r = self.task_pool.find_one({"filter": t})
except InvalidDocument:
Expand All @@ -230,7 +241,6 @@ def create_task(self, task_def_l, dry_run=False, print_nt=False) -> List[str]:
else:
_id_list.append(None)
else:
#_decode_task is Serialization tool
_id_list.append(self._decode_task(r)["_id"])

self.logger.info(f"Total Tasks: {len(task_def_l)}, New Tasks: {len(new_tasks)}")
Expand Down Expand Up @@ -473,11 +483,11 @@ def run_task(

After running this method, here are 4 situations (before_status -> after_status):

STATUS_WAITING -> STATUS_DONE: use task["def"] as `task_func` param
STATUS_WAITING -> STATUS_DONE: use task["def"] as `task_func` param,it means that the task has not been started

STATUS_WAITING -> STATUS_PART_DONE: use task["def"] as `task_func` param

STATUS_PART_DONE -> STATUS_PART_DONE: use task["res"] as `task_func` param
STATUS_PART_DONE -> STATUS_PART_DONE: use task["res"] as `task_func` param,it means that the task has been started but not completed

STATUS_PART_DONE -> STATUS_DONE: use task["res"] as `task_func` param

Expand Down Expand Up @@ -508,7 +518,7 @@ def (task_def, **kwargs) -> <res which will be committed>
if task is None:
break
get_module_logger("run_task").info(task["def"])
# when fetching `WAITING` task, use task["def"] to train. "def" means that the task has not been defined
# when fetching `WAITING` task, use task["def"] to train.
if before_status == TaskManager.STATUS_WAITING:
param = task["def"]
# when fetching `PART_DONE` task, use task["res"] to train because the middle result has been saved to task["res"]
Expand Down