From 5b73b80293d224c36a60c02e2026cd7e606e4fe8 Mon Sep 17 00:00:00 2001 From: Maxim Smolskiy Date: Mon, 5 Dec 2022 04:29:03 +0300 Subject: [PATCH] Fix the Errors/Warnings when building Qlib's documentation (#1381) * Fix the Errors/Warnings when building Qlib's documentation * Fix * Fix * Empty * Test CI * Add doc compiling checking to CI * Fix * Tries to be consistent with Makefile Co-authored-by: you-n-g --- .github/workflows/test_qlib_from_source.yml | 2 +- docs/Makefile | 1 + docs/reference/api.rst | 4 +-- docs/requirements.txt | 1 + qlib/contrib/evaluate.py | 4 ++- .../analysis_model_performance.py | 4 +-- .../analysis_position/cumulative_return.py | 4 ++- .../report/analysis_position/rank_label.py | 3 ++- qlib/contrib/strategy/cost_control.py | 27 ++++++++++++------- qlib/contrib/strategy/signal_strategy.py | 21 ++++++++++----- qlib/data/cache.py | 12 +++++---- qlib/data/data.py | 16 ++++++++--- qlib/data/dataset/handler.py | 3 ++- qlib/data/dataset/loader.py | 2 +- qlib/data/filter.py | 8 +++--- qlib/data/ops.py | 13 ++++++--- qlib/model/trainer.py | 18 ++++++------- qlib/rl/utils/log.py | 2 +- qlib/workflow/__init__.py | 7 ++--- qlib/workflow/expm.py | 9 ++++--- qlib/workflow/online/manager.py | 2 +- qlib/workflow/online/strategy.py | 4 +-- qlib/workflow/record_temp.py | 3 ++- qlib/workflow/task/collect.py | 6 ++--- qlib/workflow/task/manage.py | 7 ++--- qlib/workflow/task/utils.py | 22 ++++++++------- 26 files changed, 127 insertions(+), 78 deletions(-) diff --git a/.github/workflows/test_qlib_from_source.yml b/.github/workflows/test_qlib_from_source.yml index edaee5576f..aa8bf63e2d 100644 --- a/.github/workflows/test_qlib_from_source.yml +++ b/.github/workflows/test_qlib_from_source.yml @@ -60,7 +60,7 @@ jobs: - name: Make html with sphinx run: | cd docs - sphinx-build -b html . build + sphinx-build -W --keep-going -b html . _build cd .. # Check Qlib with pylint diff --git a/docs/Makefile b/docs/Makefile index 11ee1e7986..ad7b944dd2 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,4 +17,5 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile + pip install -r requirements.txt @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/reference/api.rst b/docs/reference/api.rst index 4e6a7a8543..2d2ad628f7 100644 --- a/docs/reference/api.rst +++ b/docs/reference/api.rst @@ -117,7 +117,7 @@ Model Strategy -------- -.. automodule:: qlib.contrib.strategy.strategy +.. automodule:: qlib.contrib.strategy :members: Evaluate @@ -255,7 +255,7 @@ Utils Serializable ------------ -.. automodule:: qlib.utils.serial.Serializable +.. automodule:: qlib.utils.serial :members: RL diff --git a/docs/requirements.txt b/docs/requirements.txt index 745d7e0fe3..c10a86d4ee 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,3 +4,4 @@ numpy scipy scikit-learn pandas +tianshou diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py index 8e5cfd4fb5..a9308578a0 100644 --- a/qlib/contrib/evaluate.py +++ b/qlib/contrib/evaluate.py @@ -96,9 +96,11 @@ def indicator_analysis(df, method="mean"): index: Index(datetime) method : str, optional statistics method of pa/ffr, by default "mean" + - if method is 'mean', count the mean statistical value of each trade indicator - if method is 'amount_weighted', count the deal_amount weighted mean statistical value of each trade indicator - if method is 'value_weighted', count the value weighted mean statistical value of each trade indicator + Note: statistics method of pos is always "mean" Returns @@ -154,6 +156,7 @@ def backtest_daily( E.g. .. code-block:: python + # dict strategy = { "class": "TopkDropoutStrategy", @@ -180,7 +183,6 @@ def backtest_daily( # 3) specify module path with class name # - "a.b.c.ClassName" getattr(, "ClassName")() will be used. - executor : Union[str, dict, BaseExecutor] for initializing the outermost executor. benchmark: str diff --git a/qlib/contrib/report/analysis_model/analysis_model_performance.py b/qlib/contrib/report/analysis_model/analysis_model_performance.py index 3bd3eb65eb..df189dd7ba 100644 --- a/qlib/contrib/report/analysis_model/analysis_model_performance.py +++ b/qlib/contrib/report/analysis_model/analysis_model_performance.py @@ -276,8 +276,8 @@ def model_performance_graph( ) -> [list, tuple]: """Model performance - :param pred_label: index is **pd.MultiIndex**, index name is **[instrument, datetime]**; columns names is **[score, - label]**. It is usually same as the label of model training(e.g. "Ref($close, -2)/Ref($close, -1) - 1"). + :param pred_label: index is **pd.MultiIndex**, index name is **[instrument, datetime]**; columns names is **[score, label]**. + It is usually same as the label of model training(e.g. "Ref($close, -2)/Ref($close, -1) - 1"). .. code-block:: python diff --git a/qlib/contrib/report/analysis_position/cumulative_return.py b/qlib/contrib/report/analysis_position/cumulative_return.py index 00985a17c5..b15ea15c05 100644 --- a/qlib/contrib/report/analysis_position/cumulative_return.py +++ b/qlib/contrib/report/analysis_position/cumulative_return.py @@ -218,6 +218,7 @@ def cumulative_return_graph( Graph desc: + - Axis X: Trading day. - Axis Y: - Above axis Y: `(((Ref($close, -1)/$close - 1) * weight).sum() / weight.sum()).cumsum()`. @@ -242,7 +243,8 @@ def cumulative_return_graph( :param label_data: `D.features` result; index is `pd.MultiIndex`, index name is [`instrument`, `datetime`]; columns names is [`label`]. - **The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])` + + **The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])` .. code-block:: python diff --git a/qlib/contrib/report/analysis_position/rank_label.py b/qlib/contrib/report/analysis_position/rank_label.py index 2927f12a29..fb2fcc6d8b 100644 --- a/qlib/contrib/report/analysis_position/rank_label.py +++ b/qlib/contrib/report/analysis_position/rank_label.py @@ -99,7 +99,8 @@ def rank_label_graph( :param position: position data; **qlib.backtest.backtest** result. :param label_data: **D.features** result; index is **pd.MultiIndex**, index name is **[instrument, datetime]**; columns names is **[label]**. - **The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])`. + + **The label T is the change from T to T+1**, it is recommended to use ``close``, example: `D.features(D.instruments('csi500'), ['Ref($close, -1)/$close-1'])`. .. code-block:: python diff --git a/qlib/contrib/strategy/cost_control.py b/qlib/contrib/strategy/cost_control.py index 2209375e56..ff51f484f5 100644 --- a/qlib/contrib/strategy/cost_control.py +++ b/qlib/contrib/strategy/cost_control.py @@ -25,12 +25,14 @@ def __init__( common_infra=None, **kwargs, ): - """Parameter + """ + Parameters + ---------- topk : int top-N stocks to buy risk_degree : float - position percentage of total value - buy_method : + position percentage of total value buy_method: + rank_fill: assign the weight stocks that rank high first(1/topk max) average_fill: assign the weight to the stocks rank high averagely. """ @@ -51,12 +53,19 @@ def get_risk_degree(self, trade_step=None): return self.risk_degree def generate_target_weight_position(self, score, current, trade_start_time, trade_end_time): - """Parameter: - score : pred score for this trade date, pd.Series, index is stock_id, contain 'score' column - current : current position, use Position() class - trade_date : trade date - generate target position from score for this date and the current position - The cache is not considered in the position + """ + Parameters + ---------- + score: + pred score for this trade date, pd.Series, index is stock_id, contain 'score' column + current: + current position, use Position() class + trade_date: + trade date + + generate target position from score for this date and the current position + + The cache is not considered in the position """ # TODO: # If the current stock list is more than topk(eg. The weights are modified diff --git a/qlib/contrib/strategy/signal_strategy.py b/qlib/contrib/strategy/signal_strategy.py index 00b34989e1..b026bf7a97 100644 --- a/qlib/contrib/strategy/signal_strategy.py +++ b/qlib/contrib/strategy/signal_strategy.py @@ -103,9 +103,13 @@ def __init__( before sell stock , will check current.get_stock_count(order.stock_id) >= self.hold_thresh. only_tradable : bool will the strategy only consider the tradable stock when buying and selling. + if only_tradable: + strategy will make decision with the tradable state of the stock info and avoid buy and sell them. + else: + strategy will make buy sell decision without checking the tradable state of the stock. """ super().__init__(**kwargs) @@ -287,9 +291,11 @@ def __init__( the decision of the strategy will base on the given signal trade_exchange : Exchange exchange that provides market info, used to deal order and generate report + - If `trade_exchange` is None, self.trade_exchange will be set with common_infra - It allowes different trade_exchanges is used in different executions. - For example: + - In daily execution, both daily exchange and minutely are usable, but the daily exchange is recommended because it run faster. - In minutely execution, the daily exchange is not usable, only the minutely exchange is recommended. """ @@ -303,6 +309,7 @@ def __init__( def generate_target_weight_position(self, score, current, trade_start_time, trade_end_time): """ Generate target position from score for this date and the current position.The cash is not considered in the position + Parameters ----------- score : pd.Series @@ -355,12 +362,14 @@ class EnhancedIndexingStrategy(WeightStrategyBase): Users need to prepare their risk model data like below: - ├── /path/to/riskmodel - ├──── 20210101 - ├────── factor_exp.{csv|pkl|h5} - ├────── factor_cov.{csv|pkl|h5} - ├────── specific_risk.{csv|pkl|h5} - ├────── blacklist.{csv|pkl|h5} # optional + .. code-block:: text + + ├── /path/to/riskmodel + ├──── 20210101 + ├────── factor_exp.{csv|pkl|h5} + ├────── factor_cov.{csv|pkl|h5} + ├────── specific_risk.{csv|pkl|h5} + ├────── blacklist.{csv|pkl|h5} # optional The risk model data can be obtained from risk data provider. You can also use `qlib.model.riskmodel.structured.StructuredCovEstimator` to prepare these data. diff --git a/qlib/data/cache.py b/qlib/data/cache.py index e7336e8bed..addd28871d 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -141,8 +141,10 @@ def __init__(self, mem_cache_size_limit=None, limit_type="length"): Parameters ---------- - mem_cache_size_limit: cache max size. - limit_type: length or sizeof; length(call fun: len), size(call fun: sys.getsizeof). + mem_cache_size_limit: + cache max size. + limit_type: + length or sizeof; length(call fun: len), size(call fun: sys.getsizeof). """ size_limit = C.mem_cache_size_limit if mem_cache_size_limit is None else mem_cache_size_limit @@ -858,7 +860,7 @@ def gen_dataset_cache(self, cache_path: Union[str, Path], instruments, fields, f """gen_dataset_cache .. note:: This function does not consider the cache read write lock. Please - Acquire the lock outside this function + acquire the lock outside this function The format the cache contains 3 parts(followed by typical filename). @@ -874,10 +876,10 @@ def gen_dataset_cache(self, cache_path: Union[str, Path], instruments, fields, f 1999-11-12 00:00:00 2 3 ... - .. note:: The start is closed. The end is open!!!!! + .. note:: The start is closed. The end is open!!!!! - Each line contains two element with a timestamp as its index. - - It indicates the `start_index`(included) and `end_index`(excluded) of the data for `timestamp` + - It indicates the `start_index` (included) and `end_index` (excluded) of the data for `timestamp` - meta data: cache/d41366901e25de3ec47297f12e2ba11d.meta diff --git a/qlib/data/data.py b/qlib/data/data.py index a6b1ce19a8..73edf9f010 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -220,7 +220,8 @@ def instruments(market: Union[List, str] = "all", filter_pipe: Union[List, None] ---------- dict: if isinstance(market, str) dict of stockpool config. - {`market`=>base market name, `filter_pipe`=>list of filters} + + {`market` => base market name, `filter_pipe` => list of filters} example : @@ -432,9 +433,12 @@ def expression(self, instrument, field, start_time=None, end_time=None, freq="da data of a certain expression The data has two types of format + 1) expression with datetime index + 2) expression with integer index - - because the datetime is not as good as + + - because the datetime is not as good as """ raise NotImplementedError("Subclass of ExpressionProvider must implement `Expression` method") @@ -890,6 +894,7 @@ def __init__(self, align_time: bool = True): Will we align the time to calendar the frequency is flexible in some dataset and can't be aligned. For the data with fixed frequency with a shared calendar, the align data to the calendar will provides following benefits + - Align queries to the same parameters, so the cache can be shared. """ super().__init__() @@ -1167,11 +1172,12 @@ def features( inst_processors=[], ): """ - Parameters: - ----------- + Parameters + ---------- disk_cache : int whether to skip(0)/use(1)/replace(2) disk_cache + This function will try to use cache method which has a keyword `disk_cache`, and will use provider method if a type error is raised because the DatasetD instance is a provider class. @@ -1221,10 +1227,12 @@ class ClientProvider(BaseProvider): """Client Provider Requesting data from server as a client. Can propose requests: + - Calendar : Directly respond a list of calendars - Instruments (without filter): Directly respond a list/dict of instruments - Instruments (with filters): Respond a list/dict of instruments - Features : Respond a cache uri + The general workflow is described as follows: When the user use client provider to propose a request, the client provider will connect the server and send the request. The client will start to wait for the response. The response will be made instantly indicating whether the cache is available. The waiting procedure will terminate only when the client get the response saying `feature_available` is true. `BUG` : Everytime we make request for certain data we need to connect to the server, wait for the response and disconnect from it. We can't make a sequence of requests within one connection. You can refer to https://python-socketio.readthedocs.io/en/latest/client.html for documentation of python-socketIO client. diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py index 7815445c1e..2fe8f8a632 100644 --- a/qlib/data/dataset/handler.py +++ b/qlib/data/dataset/handler.py @@ -35,7 +35,7 @@ class DataHandler(Serializable): Example of the data: The multi-index of the columns is optional. - .. code-block:: python + .. code-block:: text feature label $close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0 @@ -671,6 +671,7 @@ def get_cols(self, col_set=DataHandler.CS_ALL, data_key: str = DK_I) -> list: def cast(cls, handler: "DataHandlerLP") -> "DataHandlerLP": """ Motivation + - A user creates a datahandler in his customized package. Then he wants to share the processed handler to other users without introduce the package dependency and complicated data processing logic. - This class make it possible by casting the class to DataHandlerLP and only keep the processed data diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py index 074cfa6084..cc9ecd7c41 100644 --- a/qlib/data/dataset/loader.py +++ b/qlib/data/dataset/loader.py @@ -27,7 +27,7 @@ def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame: Example of the data (The multi-index of the columns is optional.): - .. code-block:: python + .. code-block:: text feature label $close $volume Ref($close, 1) Mean($close, 3) $high-$low LABEL0 diff --git a/qlib/data/filter.py b/qlib/data/filter.py index c8c36c099f..9e924f728a 100644 --- a/qlib/data/filter.py +++ b/qlib/data/filter.py @@ -272,8 +272,8 @@ class NameDFilter(SeriesDFilter): def __init__(self, name_rule_re, fstart_time=None, fend_time=None): """Init function for name filter class - params: - ------ + Parameters + ---------- name_rule_re: str regular expression for the name rule. """ @@ -325,8 +325,8 @@ class ExpressionDFilter(SeriesDFilter): def __init__(self, rule_expression, fstart_time=None, fend_time=None, keep=False): """Init function for expression filter class - params: - ------ + Parameters + ---------- fstart_time: str filter the feature starting from this time. fend_time: str diff --git a/qlib/data/ops.py b/qlib/data/ops.py index fe2ebc9f6d..d9a2ffbb3e 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -1530,6 +1530,7 @@ def __init__(self, feature, freq, func): """ Resampling the data to target frequency. The resample function of pandas is used. + - the timestamp will be at the start of the time span after resample. Parameters @@ -1632,10 +1633,14 @@ def register(self, ops_list: List[Union[Type[ExpressionOps], dict]]): ops_list : List[Union[Type[ExpressionOps], dict]] - if type(ops_list) is List[Type[ExpressionOps]], each element of ops_list represents the operator class, which should be the subclass of `ExpressionOps`. - if type(ops_list) is List[dict], each element of ops_list represents the config of operator, which has the following format: - { - "class": class_name, - "module_path": path, - } + + .. code-block:: text + + { + "class": class_name, + "module_path": path, + } + Note: `class` should be the class name of operator, `module_path` should be a python module or path of file. """ for _operator in ops_list: diff --git a/qlib/model/trainer.py b/qlib/model/trainer.py index 84ae2a6c65..65842d81fa 100644 --- a/qlib/model/trainer.py +++ b/qlib/model/trainer.py @@ -3,7 +3,7 @@ """ The Trainer will train a list of tasks and return a list of model recorders. -There are two steps in each Trainer including ``train``(make model recorder) and ``end_train``(modify model recorder). +There are two steps in each Trainer including ``train`` (make model recorder) and ``end_train`` (modify model recorder). This is a concept called ``DelayTrainer``, which can be used in online simulating for parallel training. In ``DelayTrainer``, the first step is only to save some necessary info to model recorders, and the second step which will be finished in the end can do some concurrent and time-consuming operations such as model fitting. @@ -242,7 +242,7 @@ def __init__( def train(self, tasks: list, train_func: Callable = None, experiment_name: str = None, **kwargs) -> List[Recorder]: """ - Given a list of `task`s and return a list of trained Recorder. The order can be guaranteed. + Given a list of `tasks` and return a list of trained Recorder. The order can be guaranteed. Args: tasks (list): a list of definitions based on `task` dict @@ -315,7 +315,7 @@ def end_train(self, models, end_train_func=None, experiment_name: str = None, ** Args: models (list): a list of Recorder, the tasks have been saved to them - end_train_func (Callable, optional): the end_train method which needs at least `recorder`s and `experiment_name`. Defaults to None for using self.end_train_func. + end_train_func (Callable, optional): the end_train method which needs at least `recorders` and `experiment_name`. Defaults to None for using self.end_train_func. experiment_name (str): the experiment name, None for use default name. kwargs: the params for end_train_func. @@ -390,14 +390,14 @@ def train( **kwargs, ) -> List[Recorder]: """ - Given a list of `task`s and return a list of trained Recorder. The order can be guaranteed. + Given a list of `tasks` and return a list of trained Recorder. The order can be guaranteed. This method defaults to a single process, but TaskManager offered a great way to parallel training. Users can customize their train_func to realize multiple processes or even multiple machines. Args: tasks (list): a list of definitions based on `task` dict - train_func (Callable): the training method which needs at least `task`s and `experiment_name`. None for the default training method. + train_func (Callable): the training method which needs at least `tasks` and `experiment_name`. None for the default training method. experiment_name (str): the experiment name, None for use default name. before_status (str): the tasks in before_status will be fetched and trained. Can be STATUS_WAITING, STATUS_PART_DONE. after_status (str): the tasks after trained will become after_status. Can be STATUS_WAITING, STATUS_PART_DONE. @@ -470,7 +470,7 @@ def worker( The multiprocessing method for `train`. It can share a same task_pool with `train` and can run in other progress or other machines. Args: - train_func (Callable): the training method which needs at least `task`s and `experiment_name`. None for the default training method. + train_func (Callable): the training method which needs at least `tasks` and `experiment_name`. None for the default training method. experiment_name (str): the experiment name, None for use default name. """ if train_func is None: @@ -525,7 +525,7 @@ def train(self, tasks: list, train_func=None, experiment_name: str = None, **kwa Args: tasks (list): a list of definition based on `task` dict - train_func (Callable): the train method which need at least `task`s and `experiment_name`. Defaults to None for using self.train_func. + train_func (Callable): the train method which need at least `tasks` and `experiment_name`. Defaults to None for using self.train_func. experiment_name (str): the experiment name, None for use default name. Returns: @@ -554,7 +554,7 @@ def end_train(self, recs, end_train_func=None, experiment_name: str = None, **kw Args: recs (list): a list of Recorder, the tasks have been saved to them. - end_train_func (Callable, optional): the end_train method which need at least `recorder`s and `experiment_name`. Defaults to None for using self.end_train_func. + end_train_func (Callable, optional): the end_train method which need at least `recorders` and `experiment_name`. Defaults to None for using self.end_train_func. experiment_name (str): the experiment name, None for use default name. kwargs: the params for end_train_func. @@ -596,7 +596,7 @@ def worker(self, end_train_func=None, experiment_name: str = None): The multiprocessing method for `end_train`. It can share a same task_pool with `end_train` and can run in other progress or other machines. Args: - end_train_func (Callable, optional): the end_train method which need at least `recorder`s and `experiment_name`. Defaults to None for using self.end_train_func. + end_train_func (Callable, optional): the end_train method which need at least `recorders` and `experiment_name`. Defaults to None for using self.end_train_func. experiment_name (str): the experiment name, None for use default name. """ if end_train_func is None: diff --git a/qlib/rl/utils/log.py b/qlib/rl/utils/log.py index 2a113e47cd..4b0e68c68e 100644 --- a/qlib/rl/utils/log.py +++ b/qlib/rl/utils/log.py @@ -321,7 +321,7 @@ class LogBuffer(LogWriter): - on_episode: Whether it's called at the end of an episode - on_collect: Whether it's called at the end of a collect - - log_buffer: the :class:`LogBbuffer`object + - log_buffer: the :class:`LogBbuffer` object No return value is expected. """ diff --git a/qlib/workflow/__init__.py b/qlib/workflow/__init__.py index d14782c60d..815d3e1240 100644 --- a/qlib/workflow/__init__.py +++ b/qlib/workflow/__init__.py @@ -47,6 +47,7 @@ def start( with R.start(experiment_name='test', recorder_name='recorder_1', resume=True): # if users want to resume recorder, they have to specify the exact same name for experiment and recorder. ... # further operations + Parameters ---------- experiment_id : str @@ -204,7 +205,7 @@ def list_recorders(self, experiment_id=None, experiment_name=None): If user doesn't provide the id or name of the experiment, this method will try to retrieve the default experiment and list all the recorders of the default experiment. If the default experiment doesn't exist, the method will first create the default experiment, and then create a new recorder under it. (More information about the default experiment - can be found `here <../component/recorder.html#qlib.workflow.exp.Experiment>`_). + can be found `here <../component/recorder.html#qlib.workflow.exp.Experiment>`__). Here is the example code: @@ -249,7 +250,7 @@ def get_exp( - Else If '`create`' is False: - - If ``active experiment` exists: + - If `active experiment` exists: - no id or name specified, return the active experiment. @@ -294,7 +295,7 @@ def get_exp( according to user's specification if the experiment hasn't been created before. start : bool when start is True, - if the experiment has not started(not activated), it will start + if the experiment has not started(not activated), it will start It is designed for R.log_params to auto start experiments Returns diff --git a/qlib/workflow/expm.py b/qlib/workflow/expm.py index be6b494e05..3059eecd18 100644 --- a/qlib/workflow/expm.py +++ b/qlib/workflow/expm.py @@ -21,13 +21,14 @@ class ExpManager: """ - This is the `ExpManager` class for managing experiments. The API is designed similar to mlflow. - (The link: https://mlflow.org/docs/latest/python_api/mlflow.html) + This is the `ExpManager` class for managing experiments. The API is designed similar to mlflow. + (The link: https://mlflow.org/docs/latest/python_api/mlflow.html) + + The `ExpManager` is expected to be a singleton (btw, we can have multiple `Experiment`s with different uri. user can get different experiments from different uri, and then compare records of them). Global Config (i.e. `C`) is also a singleton. - The `ExpManager` is expected to be a singleton (btw, we can have multiple `Experiment`s with different uri. user can get different experiments from different uri, and then compare records of them). Global Config (i.e. `C`) is also a singleton. So we try to align them together. They share the same variable, which is called **default uri**. Please refer to `ExpManager.default_uri` for details of variable sharing. - When the user starts an experiment, the user may want to set the uri to a specific uri (it will override **default uri** during this period), and then unset the **specific uri** and fallback to the **default uri**. `ExpManager._active_exp_uri` is that **specific uri**. + When the user starts an experiment, the user may want to set the uri to a specific uri (it will override **default uri** during this period), and then unset the **specific uri** and fallback to the **default uri**. `ExpManager._active_exp_uri` is that **specific uri**. """ active_experiment: Optional[Experiment] diff --git a/qlib/workflow/online/manager.py b/qlib/workflow/online/manager.py index 35e73821c8..09e96d444f 100644 --- a/qlib/workflow/online/manager.py +++ b/qlib/workflow/online/manager.py @@ -121,7 +121,7 @@ def __init__( Args: strategies (Union[OnlineStrategy, List[OnlineStrategy]]): an instance of OnlineStrategy or a list of OnlineStrategy begin_time (Union[str,pd.Timestamp], optional): the OnlineManager will begin at this time. Defaults to None for using the latest date. - trainer (Trainer): the trainer to train task. None for using TrainerR. + trainer (qlib.model.trainer.Trainer): the trainer to train task. None for using TrainerR. freq (str, optional): data frequency. Defaults to "day". """ self.logger = get_module_logger(self.__class__.__name__) diff --git a/qlib/workflow/online/strategy.py b/qlib/workflow/online/strategy.py index bda068dbf6..f2988d843f 100644 --- a/qlib/workflow/online/strategy.py +++ b/qlib/workflow/online/strategy.py @@ -24,11 +24,11 @@ class OnlineStrategy: def __init__(self, name_id: str): """ Init OnlineStrategy. - This module **MUST** use `Trainer <../reference/api.html#Trainer>`_ to finishing model training. + This module **MUST** use `Trainer <../reference/api.html#qlib.model.trainer.Trainer>`_ to finishing model training. Args: name_id (str): a unique name or id. - trainer (Trainer, optional): a instance of Trainer. Defaults to None. + trainer (qlib.model.trainer.Trainer, optional): a instance of Trainer. Defaults to None. """ self.name_id = name_id self.logger = get_module_logger(self.__class__.__name__) diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py index ffda529da8..ab2a4a2ffd 100644 --- a/qlib/workflow/record_temp.py +++ b/qlib/workflow/record_temp.py @@ -118,6 +118,7 @@ def check(self, include_self: bool = False, parents: bool = True): """ Check if the records is properly generated and saved. It is useful in following examples + - checking if the depended files complete before generating new things. - checking if the final files is completed @@ -131,7 +132,7 @@ def check(self, include_self: bool = False, parents: bool = True): Raise ------ FileNotFoundError - : whether the records are stored properly. + whether the records are stored properly. """ if include_self: diff --git a/qlib/workflow/task/collect.py b/qlib/workflow/task/collect.py index 16c5505fcd..bedbd96d20 100644 --- a/qlib/workflow/task/collect.py +++ b/qlib/workflow/task/collect.py @@ -45,7 +45,7 @@ def collect(self) -> dict: {"IC": {"Xgboost": pd.Series, "LSTM": pd.Series}} - ...... + ... """ raise NotImplementedError(f"Please implement the `collect` method.") @@ -58,7 +58,7 @@ def process_collect(collected_dict, process_list=[], *args, **kwargs) -> dict: Args: collected_dict (dict): the dict return by `collect` process_list (list or Callable): the list of processors or the instance of a processor to process dict. - The processor order is the same as the list order. + The processor order is the same as the list order. For example: [Group1(..., Ensemble1()), Group2(..., Ensemble2())] Returns: @@ -97,7 +97,7 @@ class MergeCollector(Collector): A can collect {"prediction": pd.Series} and B can collect {"IC": {"Xgboost": pd.Series, "LSTM": pd.Series}}. Then after this class's collect, we can collect {"A_prediction": pd.Series, "B_IC": {"Xgboost": pd.Series, "LSTM": pd.Series}} - ...... + ... """ diff --git a/qlib/workflow/task/manage.py b/qlib/workflow/task/manage.py index 9ac9a7a630..7fe9f58d66 100644 --- a/qlib/workflow/task/manage.py +++ b/qlib/workflow/task/manage.py @@ -489,7 +489,7 @@ def run_task( after_status: str = TaskManager.STATUS_DONE, **kwargs, ): - """ + r""" While the task pool is not empty (has WAITING tasks), use task_func to fetch and run tasks in task_pool After running this method, here are 4 situations (before_status -> after_status): @@ -505,8 +505,9 @@ def run_task( Parameters ---------- task_func : Callable - def (task_def, **kwargs) -> - the function to run the task + def (task_def, \**kwargs) -> + + the function to run the task task_pool : str the name of the task pool (Collection in MongoDB) query: dict diff --git a/qlib/workflow/task/utils.py b/qlib/workflow/task/utils.py index 77fd9fa2e7..a914ea54fe 100644 --- a/qlib/workflow/task/utils.py +++ b/qlib/workflow/task/utils.py @@ -25,18 +25,22 @@ def get_mongodb() -> Database: Using qlib.init(): - mongo_conf = { - "task_url": task_url, # your MongoDB url - "task_db_name": task_db_name, # database name - } - qlib.init(..., mongo=mongo_conf) + .. code-block:: python + + mongo_conf = { + "task_url": task_url, # your MongoDB url + "task_db_name": task_db_name, # database name + } + qlib.init(..., mongo=mongo_conf) After qlib.init(): - C["mongo"] = { - "task_url" : "mongodb://localhost:27017/", - "task_db_name" : "rolling_db" - } + .. code-block:: python + + C["mongo"] = { + "task_url" : "mongodb://localhost:27017/", + "task_db_name" : "rolling_db" + } Returns: Database: the Database instance