diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/404.html b/404.html new file mode 100644 index 00000000..a5eda92d --- /dev/null +++ b/404.html @@ -0,0 +1,3939 @@ + + + + + + + + + + + + + + + + + + + CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ +

404 - Not found

+ +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/CNAME b/CNAME new file mode 100644 index 00000000..03ff64a2 --- /dev/null +++ b/CNAME @@ -0,0 +1 @@ +docs.rllte.dev \ No newline at end of file diff --git a/README-zh-Hans/index.html b/README-zh-Hans/index.html new file mode 100644 index 00000000..c476c2a2 --- /dev/null +++ b/README-zh-Hans/index.html @@ -0,0 +1,4390 @@ + + + + + + + + + + + + + + + + + + + + + README zh Hans - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +
+
+ +
+RLLTE: 强化学习长期演进计划 + +

论文 | + 文档 | + 示例 | + 论坛 | + 基线

+ + + +| [English](README.md) | [中文](docs/README-zh-Hans.md) | + +
+ +

Contents

+ +

概述

+

受通信领域长期演进(LTE)标准项目的启发,RLLTE旨在提供用于推进RL研究和应用的开发组件和工程标准。除了提供一流的算法实现外,RLLTE还能够充当开发算法的工具包。

+
+ + + +
+RLLTE简介. +
+ +

RLLTE项目特色: +- 🧬 长期演进以提供最新的强化学习算法与技巧; +- 🏞️ 丰富完备的项目生态,支持任务设计、模型训练、模型评估以及模型部署 (TensorRT, CANN, ...); +- 🧱 高度模块化的设计以实现RL算法的完全解耦; +- 🚀 优化的工作流用于硬件加速; +- ⚙️ 支持自定义环境和模块; +- 🖥️ 支持包括GPU和NPU的多种算力设备; +- 💾 大量可重用的基线数据 (rllte-hub); +- 👨‍✈️ 基于大语言模型打造的Copilot。

+

项目结构如下:

+
+ +
+ +

有关这些模块的详细描述,请参阅API文档

+

快速入门

+

安装

+
    +
  • 前置条件
  • +
+

当前,我们建议使用Python>=3.8,用户可以通过以下方式创建虚拟环境: +

conda create -n rllte python=3.8
+

+
    +
  • 通过 pip
  • +
+

打开终端通过pip安装 rllte: +

pip install rllte-core # 安装基本模块
+pip install rllte-core[envs] # 安装预设的任务环境
+

+
    +
  • 通过 git
  • +
+

开启终端从[GitHub]中复制仓库(https://github.com/RLE-Foundation/rllte): +

git clone https://github.com/RLE-Foundation/rllte.git
+
+在这之后, 运行以下命令行安装所需的包: +
pip install -e . # 安装基本模块
+pip install -e .[envs] # 安装预设的任务环境
+

+

更详细的安装说明, 请参阅, 入门指南.

+

快速训练内置算法

+

RLLTE为广受认可的强化学习算法提供了高质量的实现,并且设计了简单友好的界面用于应用构建。

+

使用NVIDIA GPU

+

假如我们要用 DrQ-v2算法解决 DeepMind Control Suite任务, 只需编写如下 train.py文件:

+

# import `env` and `agent` module
+from rllte.env import make_dmc_env 
+from rllte.agent import DrQv2
+
+if __name__ == "__main__":
+    device = "cuda:0"
+    # 创建 env, `eval_env` 可选
+    env = make_dmc_env(env_id="cartpole_balance", device=device)
+    eval_env = make_dmc_env(env_id="cartpole_balance", device=device)
+    # 创建 agent
+    agent = DrQv2(env=env, eval_env=eval_env, device=device, tag="drqv2_dmc_pixel")
+    # 开始训练
+    agent.train(num_train_steps=500000, log_interval=1000)
+
+运行train.py文件,将会得到如下输出:

+
+ +
+ +

使用HUAWEI NPU

+

与上述案例类似, 如果需要在 HUAWEI NPU 上训练智能体,只需将 cuda 替换为 npu: +

device = "cuda:0" -> device = "npu:0"
+

+

三步创建您的强化学习智能体

+

借助RLLTE,开发者只需三步就可以实现一个强化学习算法。接下来这个例子将展示如何实现 Advantage Actor-Critic (A2C) 算法用于解决 Atari 游戏: +- 首先,调用算法原型: +

from rllte.common.prototype import OnPolicyAgent
+
+- 其次,导入必要的模块: +
from rllte.xploit.encoder import MnihCnnEncoder
+from rllte.xploit.policy import OnPolicySharedActorCritic
+from rllte.xploit.storage import VanillaRolloutStorage
+from rllte.xplore.distribution import Categorical
+
+- 运行选定策略的 .describe 函数,运行结果如下: +
OnPolicySharedActorCritic.describe()
+# Output:
+# ================================================================================
+# Name       : OnPolicySharedActorCritic
+# Structure  : self.encoder (shared by actor and critic), self.actor, self.critic
+# Forward    : obs -> self.encoder -> self.actor -> actions
+#            : obs -> self.encoder -> self.critic -> values
+#            : actions -> log_probs
+# Optimizers : self.optimizers['opt'] -> (self.encoder, self.actor, self.critic)
+# ================================================================================
+
+这将会展示当前策略的数据结构。最后,将上述模块整合到一起并且编写 .update 函数: +
from torch import nn
+import torch as th
+
+class A2C(OnPolicyAgent):
+    def __init__(self, env, tag, seed, device, num_steps) -> None:
+        super().__init__(env=env, tag=tag, seed=seed, device=device, num_steps=num_steps)
+        # 创建模块
+        encoder = MnihCnnEncoder(observation_space=env.observation_space, feature_dim=512)
+        policy = OnPolicySharedActorCritic(observation_space=env.observation_space,
+                                           action_space=env.action_space,
+                                           feature_dim=512,
+                                           opt_class=th.optim.Adam,
+                                           opt_kwargs=dict(lr=2.5e-4, eps=1e-5),
+                                           init_fn="xavier_uniform"
+                                           )
+        storage = VanillaRolloutStorage(observation_space=env.observation_space,
+                                        action_space=env.action_space,
+                                        device=device,
+                                        storage_size=self.num_steps,
+                                        num_envs=self.num_envs,
+                                        batch_size=256
+                                        )
+        # 设定所有模块
+        self.set(encoder=encoder, policy=policy, storage=storage, distribution=Categorical)
+
+    def update(self):
+        for _ in range(4):
+            for batch in self.storage.sample():
+                # 评估采样的动作
+                new_values, new_log_probs, entropy = self.policy.evaluate_actions(obs=batch.observations, actions=batch.actions)
+                # 策略损失
+                policy_loss = - (batch.adv_targ * new_log_probs).mean()
+                # 价值损失
+                value_loss = 0.5 * (new_values.flatten() - batch.returns).pow(2).mean()
+                # 更新
+                self.policy.optimizers['opt'].zero_grad(set_to_none=True)
+                (value_loss * 0.5 + policy_loss - entropy * 0.01).backward()
+                nn.utils.clip_grad_norm_(self.policy.parameters(), 0.5)
+                self.policy.optimizers['opt'].step()
+
+然后,使用以下代码训练该智能体: +
from rllte.env import make_atari_env
+if __name__ == "__main__":
+    device = "cuda"
+    env = make_atari_env("PongNoFrameskip-v4", num_envs=8, seed=0, device=device)
+    agent = A2C(env=env, tag="a2c_atari", seed=0, device=device, num_steps=128)
+    agent.train(num_train_steps=10000000)
+
+上述例子表明,利用 RLLTE 只需少数几行代码便可以得到一个强化学习智能体。

+

算法解耦与模块替代

+

RLLTE 许可开发者将预设好的模块替换,以便于进行算法性能比较和优化。开发者可以将预设模块替换成别的类型的内置模块或者自定义模块。假设我们想要对比不同编码器的效果,只需要调用其中 .set 函数: +

from rllte.xploit.encoder import EspeholtResidualEncoder
+encoder = EspeholtResidualEncoder(...)
+agent.set(encoder=encoder)
+
+RLLTE 框架十分简便,给予开发者们最大程度的自由。更多详细说明请参考教程

+

功能列表 (部分)

+

强化学习智能体

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
类型算法连续离散多重二元多重离散多进程NPU💰🔭
On-PolicyA2C✔️✔️✔️✔️✔️✔️✔️
On-PolicyPPO✔️✔️✔️✔️✔️✔️✔️
On-PolicyDrAC✔️✔️✔️✔️✔️✔️✔️✔️
On-PolicyDAAC✔️✔️✔️✔️✔️✔️✔️
On-PolicyDrDAAC✔️✔️✔️✔️✔️✔️✔️✔️
On-PolicyPPG✔️✔️✔️✔️✔️✔️
Off-PolicyDQN✔️✔️✔️✔️
Off-PolicyDDPG✔️✔️✔️✔️
Off-PolicySAC✔️✔️✔️✔️
Off-PolicyTD3✔️✔️✔️✔️
Off-PolicyDrQ-v2✔️✔️✔️✔️
DistributedIMPALA✔️✔️✔️
+
+
    +
  • 🐌:开发中;
  • +
  • 💰:支持内在奖励塑造;
  • +
  • 🔭:支持观测增强。
  • +
+
+

内在奖励模块

+ + + + + + + + + + + + + + + + + + + + + + + + + +
类型模块
Count-basedPseudoCounts, RND
Curiosity-drivenICM, GIRM, RIDE
Memory-basedNGU
Information theory-basedRE3, RISE, REVD
+

详细案例请参考 Tutorials: Use Intrinsic Reward and Observation Augmentation

+

RLLTE 生态环境

+

探索RLLTE生态以加速您的研究:

+
    +
  • Hub:提供快速训练的 API 接口以及可重复使用的基准测试;
  • +
  • Evaluation:提供可信赖的模型评估标准;
  • +
  • Env:提供封装完善的环境;
  • +
  • Deployment:提供便捷的算法部署接口;
  • +
  • Pre-training:提供多种强化学习预训练的方式;
  • +
  • Copilot:提供大语言模型 copilot。
  • +
+

API 文档

+

请参阅我们便捷的 API 文档:https://docs.rllte.dev/

+
+ +
+ +

如何贡献

+

欢迎参与贡献我们的项目!在您准备编程之前,请先参阅CONTRIBUTING.md

+

引用项目

+

如果您想在研究中引用 RLLTE,请参考如下格式: +

@software{rllte,
+  author = {Mingqi Yuan, Zequn Zhang, Yang Xu, Shihao Luo, Bo Li, Xin Jin, and Wenjun Zeng},
+  title = {RLLTE: Long-Term Evolution Project of Reinforcement Learning},
+  url = {https://github.com/RLE-Foundation/rllte},
+  year = {2023},
+}
+

+

致谢

+

该项目由 香港理工大学东方理工高等研究院,以及 FLW-Foundation赞助。 东方理工高性能计算中心 提供了 GPU 计算平台, 华为异腾 提供了 NPU 计算平台。该项目的部分代码参考了其他优秀的开源项目,请参见 ACKNOWLEDGMENT.md

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api/index.html b/api/index.html new file mode 100644 index 00000000..f83deae5 --- /dev/null +++ b/api/index.html @@ -0,0 +1,4833 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Overview - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + +

Architecture

+

Agent: Implemented RL algorithms using RLLTE modules.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeAlgo.BoxDis.M.B.M.D.M.P.NPU💰🔭
On-PolicyA2C✔️✔️✔️✔️✔️✔️✔️
On-PolicyPPO✔️✔️✔️✔️✔️✔️✔️
On-PolicyDrAC✔️✔️✔️✔️✔️✔️✔️✔️
On-PolicyDAAC✔️✔️✔️✔️✔️✔️✔️
On-PolicyDrDAAC✔️✔️✔️✔️✔️✔️✔️✔️
On-PolicyPPG✔️✔️✔️✔️✔️✔️
Off-PolicyDQN✔️✔️✔️✔️
Off-PolicyDDPG✔️✔️✔️✔️
Off-PolicySAC✔️✔️✔️✔️
Off-PolicySAC-Discrete✔️✔️✔️✔️
Off-PolicyTD3✔️✔️✔️✔️
Off-PolicyDrQ-v2✔️✔️✔️✔️
DistributedIMPALA✔️✔️✔️
+
+
    +
  • Dis., M.B., M.D.: Discrete, MultiBinary, and MultiDiscrete action space;
  • +
  • M.P.: Multi processing;
  • +
  • 🐌: Developing;
  • +
  • 💰: Support intrinsic reward shaping;
  • +
  • 🔭: Support observation augmentation.
  • +
+
+
+

Xploit: Modules that focus on exploitation in RL.

+
+

Policy: Policies for interaction and learning.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleTypeRemark
OnPolicySharedActorCriticOn-policyActor-Critic networks with a shared encoder.
OnPolicyDecoupledActorCriticOn-policyActor-Critic networks with two separate encoders.
OffPolicyDoubleQNetworkOff-policyDouble Q-network.
OffPolicyDoubleActorDoubleCriticOff-policyDouble deterministic actor network and double-critic network.
OffPolicyDetActorDoubleCriticOff-policyDeterministic actor network and double-critic network.
OffPolicyStochActorDoubleCriticOff-policyStochastic actor network and double-critic network.
DistributedActorLearnerDistributedMemory-shared actor and learner networks
+
+
+

Encoder: Neural nework-based encoders for processing observations.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleInputReferenceTarget Task
EspeholtResidualEncoderImagesPaperAtari or Procgen games
MnihCnnEncoderImagesPaperAtari games
TassaCnnEncoderImagesPaperDeepMind Control Suite: pixel
PathakCnnEncoderImagesPaperAtari or MiniGrid games
IdentityEncoderStatesN/ADeepMind Control Suite: state
VanillaMlpEncoderStatesN/ADeepMind Control Suite: state
RaffinCombinedEncoderDictPaperHighway
+
+
    +
  • Naming Rule: Surname of the first author + Backbone + Encoder
  • +
  • Target Task: The testing tasks in their paper or potential tasks.
  • +
+
+
+
+

Storage: Experience storage and sampling.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleTypeRemark
VanillaRolloutStorageOn-policy
DictRolloutStorageOn-policy
VanillaReplayStorageOff-policy
DictReplayStorageOff-policy
NStepReplayStorageOff-policy
PrioritizedReplayStorageOff-policy
HerReplayStorageOff-policy
VanillaDistributedStorageDistributed
+
+
+

Xplore: Modules that focus on exploration in RL.

+
+

Augmentation: PyTorch.nn-like modules for observation augmentation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleInputReference
GaussianNoiseStatesPaper
RandomAmplitudeScalingStatesPaper
GrayScaleImagesPaper
RandomColorJitterImagesPaper
RandomConvolutionImagesPaper
RandomCropImagesPaper
RandomCutoutImagesPaper
RandomCutoutColorImagesPaper
RandomFlipImagesPaper
RandomRotateImagesPaper
RandomShiftImagesPaper
RandomTranslateImagesPaper
+
+
+

Distribution: Distributions for sampling actions.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleTypeReference
NormalNoiseNoisePaper
OrnsteinUhlenbeckNoiseNoisePaper
TruncatedNormalNoiseNoisePaper
BernoulliDistributionPaper
CategoricalDistributionPaper
MultiCategoricalDistributionPaper
DiagonalGaussianDistributionPaper
SquashedNormalDistributionPaper
+
+
    +
  • In RLLTE, the action noise is implemented via a Distribution manner to realize unification.
  • +
+
+
+
+

Reward: Intrinsic reward modules for enhancing exploration.

+ + + + + + + + + + + + + + + + + + + + + + + + + +
TypeModules
Count-basedPseudoCounts, RND
Curiosity-drivenICM, GIRM, RIDE
Memory-basedNGU
Information theory-basedRE3, RISE, REVD
+

See Tutorials: Use Intrinsic Reward and Observation Augmentation for usage examples.

+
+
+

Env: Packaged environments (e.g., Atari games) for fast invocation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FunctionNameRemarkReference
make_atari_envAtari GamesDiscrete controlPaper
make_bullet_envPyBullet Robotics EnvironmentsContinuous controlPaper
make_dmc_envDeepMind Control SuiteContinuous controlPaper
make_minigrid_envMiniGrid GamesDiscrete controlPaper
make_procgen_envProcgen GamesDiscrete controlPaper
make_robosuite_envRobosuite Robotics EnvironmentsContinuous controlPaper
+
+

Copilot: Large language model-empowered copilot.

+

See Copilot.

+
+

Hub: Fast training APIs and reusable benchmarks.

+

See Benchmarks.

+
+

Evaluation: Reasonable and reliable metrics for algorithm evaluation.

+

See Tutorials: Model Evaluation.

+
+

Pre-training: Methods of pre-training in RL.

+

See Tutorials: Pre-training.

+
+

Deployment: Convenient APIs for model deployment.

+

See Tutorials: Model Deployment.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/daac/index.html b/api_docs/agent/daac/index.html new file mode 100644 index 00000000..dadb1af8 --- /dev/null +++ b/api_docs/agent/daac/index.html @@ -0,0 +1,4186 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DAAC - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DAAC

+

source +

DAAC(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,
+   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,
+   hidden_dim: int = 256, clip_range: float = 0.2, clip_range_vf: float = 0.2,
+   policy_epochs: int = 1, value_freq: int = 1, value_epochs: int = 9, vf_coef: float = 0.5,
+   ent_coef: float = 0.01, adv_coef: float = 0.25, max_grad_norm: float = 0.5,
+   discount: float = 0.999, init_fn: str = 'xavier_uniform'
+)
+

+
+

Decoupled Advantage Actor-Critic (DAAC) agent. +Based on: https://github.com/rraileanu/idaac

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • clip_range (float) : Clipping parameter.
  • +
  • clip_range_vf (float) : Clipping parameter for the value function.
  • +
  • policy_epochs (int) : Times of updating the policy network.
  • +
  • value_freq (int) : Update frequency of the value network.
  • +
  • value_epochs (int) : Times of updating the value network.
  • +
  • vf_coef (float) : Weighting coefficient of value loss.
  • +
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • +
  • adv_ceof (float) : Weighting coefficient of advantage loss.
  • +
  • max_grad_norm (float) : Maximum norm of gradients.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

DAAC agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update function that returns training metrics such as policy loss, value loss, etc..

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/drac/index.html b/api_docs/agent/drac/index.html new file mode 100644 index 00000000..bb341916 --- /dev/null +++ b/api_docs/agent/drac/index.html @@ -0,0 +1,4183 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DrAC - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DrAC

+

source +

DrAC(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,
+   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,
+   hidden_dim: int = 512, clip_range: float = 0.1, clip_range_vf: float = 0.1,
+   n_epochs: int = 4, vf_coef: float = 0.5, ent_coef: float = 0.01, aug_coef: float = 0.1,
+   max_grad_norm: float = 0.5, discount: float = 0.999, init_fn: str = 'orthogonal'
+)
+

+
+

Data Regularized Actor-Critic (DrAC) agent. +Based on: https://github.com/rraileanu/auto-drac

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • clip_range (float) : Clipping parameter.
  • +
  • clip_range_vf (float) : Clipping parameter for the value function.
  • +
  • n_epochs (int) : Times of updating the policy.
  • +
  • vf_coef (float) : Weighting coefficient of value loss.
  • +
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • +
  • aug_coef (float) : Weighting coefficient of augmentation loss.
  • +
  • max_grad_norm (float) : Maximum norm of gradients.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

DrAC agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update function that returns training metrics such as policy loss, value loss, etc..

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/drdaac/index.html b/api_docs/agent/drdaac/index.html new file mode 100644 index 00000000..cbe04bfd --- /dev/null +++ b/api_docs/agent/drdaac/index.html @@ -0,0 +1,4187 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DrDAAC - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DrDAAC

+

source +

DrDAAC(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,
+   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,
+   hidden_dim: int = 256, clip_range: float = 0.2, clip_range_vf: float = 0.2,
+   policy_epochs: int = 1, value_freq: int = 1, value_epochs: int = 9, vf_coef: float = 0.5,
+   ent_coef: float = 0.01, aug_coef: float = 0.1, adv_coef: float = 0.25,
+   max_grad_norm: float = 0.5, discount: float = 0.999, init_fn: str = 'xavier_uniform'
+)
+

+
+

Data-Regularized extension of Decoupled Advantage Actor-Critic (DAAC) agent. +Based on: https://github.com/rraileanu/idaac

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • clip_range (float) : Clipping parameter.
  • +
  • clip_range_vf (float) : Clipping parameter for the value function.
  • +
  • policy_epochs (int) : Times of updating the policy network.
  • +
  • value_freq (int) : Update frequency of the value network.
  • +
  • value_epochs (int) : Times of updating the value network.
  • +
  • vf_coef (float) : Weighting coefficient of value loss.
  • +
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • +
  • aug_coef (float) : Weighting coefficient of augmentation loss.
  • +
  • adv_ceof (float) : Weighting coefficient of advantage loss.
  • +
  • max_grad_norm (float) : Maximum norm of gradients.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

DAAC agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update function that returns training metrics such as policy loss, value loss, etc..

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/drqv2/index.html b/api_docs/agent/drqv2/index.html new file mode 100644 index 00000000..f82e0ca5 --- /dev/null +++ b/api_docs/agent/drqv2/index.html @@ -0,0 +1,4248 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DrQv2 - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DrQv2

+

source +

DrQv2(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000,
+   storage_size: int = 1000000, feature_dim: int = 50, batch_size: int = 256,
+   lr: float = 0.0001, eps: float = 1e-08, hidden_dim: int = 1024,
+   critic_target_tau: float = 0.01, update_every_steps: int = 2,
+   stddev_clip: float = 0.3, init_fn: str = 'orthogonal'
+)
+

+
+

Data Regularized Q-v2 (DrQv2) agent. +Based on: https://github.com/facebookresearch/drqv2

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_init_steps (int) : Number of initial exploration steps.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • critic_target_tau : The critic Q-function soft-update rate.
  • +
  • update_every_steps (int) : The agent update frequency.
  • +
  • stddev_clip (float) : The exploration std clip range.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

DrQv2 agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update the agent and return training metrics such as actor loss, critic_loss, etc.

+

.update_critic

+

source +

.update_critic(
+   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, discount: th.Tensor,
+   next_obs: th.Tensor
+)
+

+
+

Update the critic network.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • discounts (th.Tensor) : discounts.
  • +
  • next_obs (th.Tensor) : Next observations.
  • +
+

Returns

+

None.

+

.update_actor

+

source +

.update_actor(
+   obs: th.Tensor
+)
+

+
+

Update the actor network.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/impala/index.html b/api_docs/agent/impala/index.html new file mode 100644 index 00000000..39c60896 --- /dev/null +++ b/api_docs/agent/impala/index.html @@ -0,0 +1,4191 @@ + + + + + + + + + + + + + + + + + + + + + + + + + IMPALA - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

IMPALA

+

source +

IMPALA(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', num_steps: int = 80, num_actors: int = 45, num_learners: int = 4,
+   num_storages: int = 60, feature_dim: int = 512, batch_size: int = 4, lr: float = 0.0004,
+   eps: float = 0.01, hidden_dim: int = 512, use_lstm: bool = False, ent_coef: float = 0.01,
+   baseline_coef: float = 0.5, max_grad_norm: float = 40, discount: float = 0.99,
+   init_fn: str = 'identity'
+)
+

+
+

Importance Weighted Actor-Learner Architecture (IMPALA). +Based on: https://github.com/facebookresearch/torchbeast/blob/main/torchbeast/monobeast.py

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • num_actors (int) : Number of actors.
  • +
  • num_learners (int) : Number of learners.
  • +
  • num_storages (int) : Number of storages.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • use_lstm (bool) : Use LSTM in the policy network or not.
  • +
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • +
  • baseline_coef (float) : Weighting coefficient of baseline value loss.
  • +
  • max_grad_norm (float) : Maximum norm of gradients.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

IMPALA agent instance.

+

Methods:

+

.update

+

source +

.update(
+   batch: Dict, lock = threading.Lock()
+)
+

+
+

Update the learner model.

+

Args

+
    +
  • batch (Batch) : Batch samples.
  • +
  • lock (Lock) : Thread lock.
  • +
+

Returns

+

Training metrics.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/legacy/a2c/index.html b/api_docs/agent/legacy/a2c/index.html new file mode 100644 index 00000000..9134f5f4 --- /dev/null +++ b/api_docs/agent/legacy/a2c/index.html @@ -0,0 +1,4179 @@ + + + + + + + + + + + + + + + + + + + + + + + + + A2C - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

A2C

+

source +

A2C(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,
+   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,
+   hidden_dim: int = 512, n_epochs: int = 4, vf_coef: float = 0.5, ent_coef: float = 0.01,
+   max_grad_norm: float = 0.5, discount: float = 0.99, init_fn: str = 'orthogonal'
+)
+

+
+

Advantage Actor-Critic (A2C) agent. +Based on: https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • n_epochs (int) : Times of updating the policy.
  • +
  • vf_coef (float) : Weighting coefficient of value loss.
  • +
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • +
  • max_grad_norm (float) : Maximum norm of gradients.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

A2C agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update function that returns training metrics such as policy loss, value loss, etc..

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/legacy/ddpg/index.html b/api_docs/agent/legacy/ddpg/index.html new file mode 100644 index 00000000..af1858fe --- /dev/null +++ b/api_docs/agent/legacy/ddpg/index.html @@ -0,0 +1,4249 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DDPG - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DDPG

+

source +

DDPG(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000,
+   storage_size: int = 1000000, feature_dim: int = 50, batch_size: int = 256,
+   lr: float = 0.0001, eps: float = 1e-08, hidden_dim: int = 1024,
+   critic_target_tau: float = 0.01, update_every_steps: int = 2, discount: float = 0.99,
+   stddev_clip: float = 0.3, init_fn: str = 'orthogonal'
+)
+

+
+

Deep Deterministic Policy Gradient (DDPG) agent.

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_init_steps (int) : Number of initial exploration steps.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • critic_target_tau : The critic Q-function soft-update rate.
  • +
  • update_every_steps (int) : The agent update frequency.
  • +
  • discount (float) : Discount factor.
  • +
  • stddev_clip (float) : The exploration std clip range.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

DDPG agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update the agent and return training metrics such as actor loss, critic_loss, etc.

+

.update_critic

+

source +

.update_critic(
+   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, terminateds: th.Tensor,
+   truncateds: th.Tensor, next_obs: th.Tensor
+)
+

+
+

Update the critic network.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Terminateds.
  • +
  • truncateds (th.Tensor) : Truncateds.
  • +
  • next_obs (th.Tensor) : Next observations.
  • +
+

Returns

+

None.

+

.update_actor

+

source +

.update_actor(
+   obs: th.Tensor
+)
+

+
+

Update the actor network.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/legacy/dqn/index.html b/api_docs/agent/legacy/dqn/index.html new file mode 100644 index 00000000..6016bd87 --- /dev/null +++ b/api_docs/agent/legacy/dqn/index.html @@ -0,0 +1,4179 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DQN - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DQN

+

source +

DQN(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000,
+   storage_size: int = 10000, feature_dim: int = 50, batch_size: int = 32,
+   lr: float = 0.001, eps: float = 1e-08, hidden_dim: int = 1024, tau: float = 1.0,
+   update_every_steps: int = 4, target_update_freq: int = 1000, discount: float = 0.99,
+   init_fn: str = 'orthogonal'
+)
+

+
+

Deep Q-Network (DQN) agent.

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_init_steps (int) : Number of initial exploration steps.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • tau : The Q-function soft-update rate.
  • +
  • update_every_steps (int) : The update frequency of the policy.
  • +
  • target_update_freq (int) : The frequency of target Q-network update.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

DQN agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update the agent and return training metrics such as actor loss, critic_loss, etc.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/legacy/ppo/index.html b/api_docs/agent/legacy/ppo/index.html new file mode 100644 index 00000000..9a877a61 --- /dev/null +++ b/api_docs/agent/legacy/ppo/index.html @@ -0,0 +1,4182 @@ + + + + + + + + + + + + + + + + + + + + + + + + + PPO - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

PPO

+

source +

PPO(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,
+   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,
+   hidden_dim: int = 512, clip_range: float = 0.1, clip_range_vf: Optional[float] = 0.1,
+   n_epochs: int = 4, vf_coef: float = 0.5, ent_coef: float = 0.01,
+   max_grad_norm: float = 0.5, discount: float = 0.999, init_fn: str = 'orthogonal'
+)
+

+
+

Proximal Policy Optimization (PPO) agent. +Based on: https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • clip_range (float) : Clipping parameter.
  • +
  • clip_range_vf (Optional[float]) : Clipping parameter for the value function.
  • +
  • n_epochs (int) : Times of updating the policy.
  • +
  • vf_coef (float) : Weighting coefficient of value loss.
  • +
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • +
  • max_grad_norm (float) : Maximum norm of gradients.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

PPO agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update function that returns training metrics such as policy loss, value loss, etc..

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/legacy/sac/index.html b/api_docs/agent/legacy/sac/index.html new file mode 100644 index 00000000..5210822b --- /dev/null +++ b/api_docs/agent/legacy/sac/index.html @@ -0,0 +1,4280 @@ + + + + + + + + + + + + + + + + + + + + + + + + + SAC - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

SAC

+

source +

SAC(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 5000,
+   storage_size: int = 10000000, feature_dim: int = 50, batch_size: int = 1024,
+   lr: float = 0.0001, eps: float = 1e-08, hidden_dim: int = 1024,
+   actor_update_freq: int = 1, critic_target_tau: float = 0.005,
+   critic_target_update_freq: int = 2, log_std_range: Tuple[float, ...] = (-5.0, 2),
+   betas: Tuple[float, float] = (0.9, 0.999), temperature: float = 0.1,
+   fixed_temperature: bool = False, discount: float = 0.99, init_fn: str = 'orthogonal'
+)
+

+
+

Soft Actor-Critic (SAC) agent. +Based on: https://github.com/denisyarats/pytorch_sac

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_init_steps (int) : Number of initial exploration steps.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • actor_update_freq (int) : The actor update frequency (in steps).
  • +
  • critic_target_tau (float) : The critic Q-function soft-update rate.
  • +
  • critic_target_update_freq (int) : The critic Q-function soft-update frequency (in steps).
  • +
  • log_std_range (Tuple[float]) : Range of std for sampling actions.
  • +
  • betas (Tuple[float]) : Coefficients used for computing running averages of gradient and its square.
  • +
  • temperature (float) : Initial temperature coefficient.
  • +
  • fixed_temperature (bool) : Fixed temperature or not.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

PPO agent instance.

+

Methods:

+

.alpha

+

source +

.alpha()
+

+
+

Get the temperature coefficient.

+

.update

+

source +

.update()
+

+
+

Update the agent and return training metrics such as actor loss, critic_loss, etc.

+

.update_critic

+

source +

.update_critic(
+   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, terminateds: th.Tensor,
+   truncateds: th.Tensor, next_obs: th.Tensor
+)
+

+
+

Update the critic network.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Terminateds.
  • +
  • truncateds (th.Tensor) : Truncateds.
  • +
  • next_obs (th.Tensor) : Next observations.
  • +
+

Returns

+

None.

+

.update_actor_and_alpha

+

source +

.update_actor_and_alpha(
+   obs: th.Tensor
+)
+

+
+

Update the actor network and temperature.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/legacy/sacd/index.html b/api_docs/agent/legacy/sacd/index.html new file mode 100644 index 00000000..8538175f --- /dev/null +++ b/api_docs/agent/legacy/sacd/index.html @@ -0,0 +1,4312 @@ + + + + + + + + + + + + + + + + + + + + + + + + + SACDiscrete - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

SACDiscrete

+

source +

SACDiscrete(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 10000,
+   storage_size: int = 100000, feature_dim: int = 50, batch_size: int = 256,
+   lr: float = 0.0005, eps: float = 1e-08, hidden_dim: int = 256,
+   actor_update_freq: int = 1, critic_target_tau: float = 0.01,
+   critic_target_update_freq: int = 4, betas: Tuple[float, float] = (0.9, 0.999),
+   temperature: float = 0.0, fixed_temperature: bool = False,
+   target_entropy_ratio: float = 0.98, discount: float = 0.99,
+   init_fn: str = 'orthogonal'
+)
+

+
+

Soft Actor-Critic Discrete (SAC-Discrete) agent.

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_init_steps (int) : Number of initial exploration steps.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • actor_update_freq (int) : The actor update frequency (in steps).
  • +
  • critic_target_tau (float) : The critic Q-function soft-update rate.
  • +
  • critic_target_update_freq (int) : The critic Q-function soft-update frequency (in steps).
  • +
  • betas (Tuple[float]) : Coefficients used for computing running averages of gradient and its square.
  • +
  • temperature (float) : Initial temperature coefficient.
  • +
  • fixed_temperature (bool) : Fixed temperature or not.
  • +
  • target_entropy_ratio (float) : Target entropy ratio.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

PPO agent instance.

+

Methods:

+

.alpha

+

source +

.alpha()
+

+
+

Get the temperature coefficient.

+

.update

+

source +

.update()
+

+
+

Update the agent and return training metrics such as actor loss, critic_loss, etc.

+

.deal_with_zero_probs

+

source +

.deal_with_zero_probs(
+   action_probs: th.Tensor
+)
+

+
+

Deal with situation of 0.0 probabilities.

+

Args

+
    +
  • action_probs (th.Tensor) : Action probabilities.
  • +
+

Returns

+

Action probabilities and its log values.

+

.update_critic

+

source +

.update_critic(
+   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, terminateds: th.Tensor,
+   truncateds: th.Tensor, next_obs: th.Tensor
+)
+

+
+

Update the critic network.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Terminateds.
  • +
  • truncateds (th.Tensor) : Truncateds.
  • +
  • next_obs (th.Tensor) : Next observations.
  • +
+

Returns

+

None.

+

.update_actor_and_alpha

+

source +

.update_actor_and_alpha(
+   obs: th.Tensor
+)
+

+
+

Update the actor network and temperature.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/agent/ppg/index.html b/api_docs/agent/ppg/index.html new file mode 100644 index 00000000..944d7ad6 --- /dev/null +++ b/api_docs/agent/ppg/index.html @@ -0,0 +1,4188 @@ + + + + + + + + + + + + + + + + + + + + + + + + + PPG - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

PPG

+

source +

PPG(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,
+   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,
+   hidden_dim: int = 512, clip_range: float = 0.2, clip_range_vf: float = 0.2,
+   vf_coef: float = 0.5, ent_coef: float = 0.01, max_grad_norm: float = 0.5,
+   policy_epochs: int = 32, aux_epochs: int = 6, kl_coef: float = 1.0,
+   num_aux_mini_batch: int = 4, num_aux_grad_accum: int = 1, discount: float = 0.999,
+   init_fn: str = 'xavier_uniform'
+)
+

+
+

Phasic Policy Gradient (PPG). +Based on: https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppg_procgen.py

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on the pre-training mode.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
  • batch_size (int) : Number of samples per batch to load.
  • +
  • lr (float) : The learning rate.
  • +
  • eps (float) : Term added to the denominator to improve numerical stability.
  • +
  • hidden_dim (int) : The size of the hidden layers.
  • +
  • clip_range (float) : Clipping parameter.
  • +
  • clip_range_vf (float) : Clipping parameter for the value function.
  • +
  • vf_coef (float) : Weighting coefficient of value loss.
  • +
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • +
  • max_grad_norm (float) : Maximum norm of gradients.
  • +
  • policy_epochs (int) : Number of iterations in the policy phase.
  • +
  • aux_epochs (int) : Number of iterations in the auxiliary phase.
  • +
  • kl_coef (float) : Weighting coefficient of divergence loss.
  • +
  • num_aux_grad_accum (int) : Number of gradient accumulation for auxiliary phase update.
  • +
  • discount (float) : Discount factor.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

num_aux_mini_batch (int) Number of mini-batches in auxiliary phase.

+

Returns

+

PPG agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update function that returns training metrics such as policy loss, value loss, etc..

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/initialization/index.html b/api_docs/common/initialization/index.html new file mode 100644 index 00000000..71ef0e7a --- /dev/null +++ b/api_docs/common/initialization/index.html @@ -0,0 +1,4228 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Initialization - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

get_init_fn

+

source +

.get_init_fn(
+   method: str = 'orthogonal'
+)
+

+
+

Returns a network initialization function.

+

Args

+
    +
  • method (str) : Initialization method name.
  • +
+

Returns

+

Initialization function.

+
+

_xavier_normal

+

source +

._xavier_normal(
+   m
+)
+

+
+

Xavier normal initialization.

+
+

_xavier_uniform

+

source +

._xavier_uniform(
+   m
+)
+

+
+

Xavier uniform initialization.

+
+

_orthogonal

+

source +

._orthogonal(
+   m
+)
+

+
+

Orthogonal initialization.

+
+

_identity

+

source +

._identity(
+   m
+)
+

+
+

Identity initialization.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/logger/index.html b/api_docs/common/logger/index.html new file mode 100644 index 00000000..dd9ca356 --- /dev/null +++ b/api_docs/common/logger/index.html @@ -0,0 +1,4414 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Logger - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Logger

+

source +

Logger(
+   log_dir: Path
+)
+

+
+

The logger class.

+

Args

+
    +
  • log_dir : The logging location.
  • +
+

Returns

+

Logger instance.

+

Methods:

+

.record

+

source +

.record(
+   key: str, value: Any
+)
+

+
+

Record the metric.

+

Args

+
    +
  • key (str) : The key of the metric.
  • +
  • value (Any) : The value of the metric.
  • +
+

Returns

+

None.

+

.parse_train_msg

+

source +

.parse_train_msg(
+   msg: Dict
+)
+

+
+

Parse the training message.

+

Args

+
    +
  • msg (Dict) : The training message.
  • +
+

Returns

+

The formatted string.

+

.parse_eval_msg

+

source +

.parse_eval_msg(
+   msg: Dict
+)
+

+
+

Parse the evaluation message.

+

Args

+
    +
  • msg (Dict) : The evaluation message.
  • +
+

Returns

+

The formatted string.

+

.time_stamp

+

source +

.time_stamp()
+

+
+

Return the current time stamp.

+

.info

+

source +

.info(
+   msg: str
+)
+

+
+

Output msg with 'info' level.

+

Args

+
    +
  • msg (str) : Message to be printed.
  • +
+

Returns

+

None.

+

.debug

+

source +

.debug(
+   msg: str
+)
+

+
+

Output msg with 'debug' level.

+

Args

+
    +
  • msg (str) : Message to be printed.
  • +
+

Returns

+

None.

+

.error

+

source +

.error(
+   msg: str
+)
+

+
+

Output msg with 'error' level.

+

Args

+
    +
  • msg (str) : Message to be printed.
  • +
+

Returns

+

None.

+

.train

+

source +

.train(
+   msg: Dict
+)
+

+
+

Output msg with 'train' level.

+

Args

+
    +
  • msg (Dict) : Message to be printed.
  • +
+

Returns

+

None.

+

.eval

+

source +

.eval(
+   msg: Dict
+)
+

+
+

Output msg with 'eval' level.

+

Args

+
    +
  • msg (Dict) : Message to be printed.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/preprocessing/index.html b/api_docs/common/preprocessing/index.html new file mode 100644 index 00000000..725aa8b8 --- /dev/null +++ b/api_docs/common/preprocessing/index.html @@ -0,0 +1,4302 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Preprocessing - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

+

process_observation_space

+

source +

.process_observation_space(
+   observation_space: gym.Space
+)
+

+
+

Process the observation space.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
+

Returns

+

Information of the observation space.

+
+

process_action_space

+

source +

.process_action_space(
+   action_space: gym.Space
+)
+

+
+

Get the dimension of the action space.

+

Args

+
    +
  • action_space (gym.Space) : Action space.
  • +
+

Returns

+

Information of the action space.

+
+

get_flattened_obs_dim

+

source +

.get_flattened_obs_dim(
+   observation_space: spaces.Space
+)
+

+
+

Get the dimension of the observation space when flattened. It does not apply to image observation space. +Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L169

+

Args

+
    +
  • observation_space (spaces.Space) : Observation space.
  • +
+

Returns

+

The dimension of the observation space when flattened.

+
+

is_image_space_channels_first

+

source +

.is_image_space_channels_first(
+   observation_space: spaces.Box
+)
+

+
+

Check if an image observation space (see is_image_space) +is channels-first (CxHxW, True) or channels-last (HxWxC, False). +Use a heuristic that channel dimension is the smallest of the three. +If second dimension is smallest, raise an exception (no support).

+

Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L10

+

Args

+
    +
  • observation_space (spaces.Box) : Observation space.
  • +
+

Returns

+

True if observation space is channels-first image, False if channels-last.

+
+

is_image_space

+

source +

.is_image_space(
+   observation_space: gym.Space, check_channels: bool = False,
+   normalized_image: bool = False
+)
+

+
+

Check if a observation space has the shape, limits and dtype of a valid image. +The check is conservative, so that it returns False if there is a doubt. +Valid images: RGB, RGBD, GrayScale with values in [0, 255]

+

Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L27

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • check_channels (bool) : Whether to do or not the check for the number of channels. + e.g., with frame-stacking, the observation space may have more channels than expected.
  • +
  • normalized_image (bool) : Whether to assume that the image is already normalized + or not (this disables dtype and bounds checks): when True, it only checks that + the space is a Box and has 3 dimensions. + Otherwise, it checks that it has expected dtype (uint8) and bounds (values in [0, 255]).
  • +
+

Returns

+

True if observation space is channels-first image, False if channels-last.

+
+

preprocess_obs

+

source +

.preprocess_obs(
+   obs: th.Tensor, observation_space: gym.Space
+)
+

+
+

Observations preprocessing function. +Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L92

+

Args

+
    +
  • obs (th.Tensor) : Observation.
  • +
  • observation_space (gym.Space) : Observation space.
  • +
+

Returns

+

A function to preprocess observations.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/base_agent/index.html b/api_docs/common/prototype/base_agent/index.html new file mode 100644 index 00000000..4814424f --- /dev/null +++ b/api_docs/common/prototype/base_agent/index.html @@ -0,0 +1,4384 @@ + + + + + + + + + + + + + + + + + + + + + + + + + BaseAgent - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

BaseAgent

+

source +

BaseAgent(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'auto', pretraining: bool = False
+)
+

+
+

Base class of the agent.

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on pre-training model or not.
  • +
+

Returns

+

Base agent instance.

+

Methods:

+

.freeze

+

source +

.freeze(
+   **kwargs
+)
+

+
+

Freeze the agent and get ready for training.

+

.check

+

source +

.check()
+

+
+

Check the compatibility of selected modules.

+

.set

+

source +

.set(
+   encoder: Optional[Encoder] = None, policy: Optional[Policy] = None,
+   storage: Optional[Storage] = None, distribution: Optional[Distribution] = None,
+   augmentation: Optional[Augmentation] = None,
+   reward: Optional[IntrinsicRewardModule] = None
+)
+

+
+

Set a module for the agent.

+

Args

+
    +
  • encoder (Optional[Encoder]) : An encoder of rllte.xploit.encoder or a custom encoder.
  • +
  • policy (Optional[Policy]) : A policy of rllte.xploit.policy or a custom policy.
  • +
  • storage (Optional[Storage]) : A storage of rllte.xploit.storage or a custom storage.
  • +
  • distribution (Optional[Distribution]) : A distribution of rllte.xplore.distribution + or a custom distribution.
  • +
  • augmentation (Optional[Augmentation]) : An augmentation of rllte.xplore.augmentation + or a custom augmentation.
  • +
  • reward (Optional[IntrinsicRewardModule]) : A reward of rllte.xplore.reward or a custom reward.
  • +
+

Returns

+

None.

+

.mode

+

source +

.mode(
+   training: bool = True
+)
+

+
+

Set the training mode.

+

Args

+
    +
  • training (bool) : True (training) or False (evaluation).
  • +
+

Returns

+

None.

+

.save

+

source +

.save()
+

+
+

Save the agent.

+

.update

+

source +

.update(
+   *args, **kwargs
+)
+

+
+

Update function of the agent.

+

.train

+

source +

.train(
+   num_train_steps: int, init_model_path: Optional[str], log_interval: int,
+   eval_interval: int, save_interval: int, num_eval_episodes: int, th_compile: bool
+)
+

+
+

Training function.

+

Args

+
    +
  • num_train_steps (int) : The number of training steps.
  • +
  • init_model_path (Optional[str]) : The path of the initial model.
  • +
  • log_interval (int) : The interval of logging.
  • +
  • eval_interval (int) : The interval of evaluation.
  • +
  • save_interval (int) : The interval of saving model.
  • +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
  • th_compile (bool) : Whether to use th.compile or not.
  • +
+

Returns

+

None.

+

.eval

+

source +

.eval(
+   num_eval_episodes: int
+)
+

+
+

Evaluation function.

+

Args

+
    +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
+

Returns

+

The evaluation results.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/base_augmentation/index.html b/api_docs/common/prototype/base_augmentation/index.html new file mode 100644 index 00000000..280365e4 --- /dev/null +++ b/api_docs/common/prototype/base_augmentation/index.html @@ -0,0 +1,4112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + BaseAugmentation - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

BaseAugmentation

+

source +


+

+
+

Base class of augmentation.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/base_distribution/index.html b/api_docs/common/prototype/base_distribution/index.html new file mode 100644 index 00000000..c03ac574 --- /dev/null +++ b/api_docs/common/prototype/base_distribution/index.html @@ -0,0 +1,4154 @@ + + + + + + + + + + + + + + + + + + + + + + + + + BaseDistribution - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

BaseDistribution

+

source +

BaseDistribution(
+   *args, **kwargs
+)
+

+
+

Abstract base class of distributions. +In rllte, the action noise is implemented as a distribution.

+

Methods:

+

.sample

+

source +

.sample(
+   *args, **kwargs
+)
+

+
+

Generate samples.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/base_encoder/index.html b/api_docs/common/prototype/base_encoder/index.html new file mode 100644 index 00000000..6d6dcd73 --- /dev/null +++ b/api_docs/common/prototype/base_encoder/index.html @@ -0,0 +1,4121 @@ + + + + + + + + + + + + + + + + + + + + + + + + + BaseEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

BaseEncoder

+

source +

BaseEncoder(
+   observation_space: gym.Space, feature_dim: int = 0
+)
+

+
+

Base class that represents a features extractor.

+

Args

+
    +
  • observation_space (gym.Space) : The observation space.
  • +
  • feature_dim (int) : Number of features extracted.
  • +
+

Returns

+

The base encoder instance.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/base_policy/index.html b/api_docs/common/prototype/base_policy/index.html new file mode 100644 index 00000000..be8a7407 --- /dev/null +++ b/api_docs/common/prototype/base_policy/index.html @@ -0,0 +1,4331 @@ + + + + + + + + + + + + + + + + + + + + + + + + + BasePolicy - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

BasePolicy

+

source +

BasePolicy(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,
+   hidden_dim: int, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'
+)
+

+
+

Base class for all policies.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

Base policy instance.

+

Methods:

+

.optimizers

+

source +

.optimizers()
+

+
+

Get optimizers.

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.forward

+

source +

.forward(
+   obs: th.Tensor, training: bool = True
+)
+

+
+

Forward method.

+

Args

+
    +
  • obs (th.Tensor) : Observation from the environment.
  • +
  • training (bool) : Whether the agent is being trained or not.
  • +
+

Returns

+

Sampled actions, estimated values, ..., depends on specific algorithms.

+

.freeze

+

source +

.freeze(
+   *args, **kwargs
+)
+

+
+

Freeze the policy and start training.

+

.save

+

source +

.save(
+   *args, **kwargs
+)
+

+
+

Save models.

+

.load

+

source +

.load(
+   path: str, device: th.device
+)
+

+
+

Load initial parameters.

+

Args

+
    +
  • path (str) : Import path.
  • +
  • device (th.device) : Device to use.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/base_reward/index.html b/api_docs/common/prototype/base_reward/index.html new file mode 100644 index 00000000..c32cd541 --- /dev/null +++ b/api_docs/common/prototype/base_reward/index.html @@ -0,0 +1,4248 @@ + + + + + + + + + + + + + + + + + + + + + + + + + BaseIntrinsicRewardModule - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

BaseIntrinsicRewardModule

+

source +

BaseIntrinsicRewardModule(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05
+)
+

+
+

Base class of intrinsic reward module.

+

Args

+
    +
  • observation_space (gym.Space) : The observation space of environment.
  • +
  • action_space (gym.Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
+

Returns

+

Instance of the base intrinsic reward module.

+

Methods:

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add the samples to the intrinsic reward module if necessary. +User for modules like RE3 that have a storage component.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/base_storage/index.html b/api_docs/common/prototype/base_storage/index.html new file mode 100644 index 00000000..683faa69 --- /dev/null +++ b/api_docs/common/prototype/base_storage/index.html @@ -0,0 +1,4273 @@ + + + + + + + + + + + + + + + + + + + + + + + + + BaseStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

BaseStorage

+

source +

BaseStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str,
+   storage_size: int, batch_size: int, num_envs: int
+)
+

+
+

Base class of the storage module.

+

Args

+
    +
  • observation_space (gym.Space) : The observation space of environment.
  • +
  • action_space (gym.Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • storage_size (int) : The size of the storage.
  • +
  • batch_size (int) : Batch size of samples.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
+

Returns

+

Instance of the base storage.

+

Methods:

+

.to_torch

+

source +

.to_torch(
+   x: np.ndarray
+)
+

+
+

Convert numpy array to torch tensor.

+

Args

+
    +
  • x (np.ndarray) : Numpy array.
  • +
+

Returns

+

Torch tensor.

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.add

+

source +

.add(
+   *args, **kwargs
+)
+

+
+

Add samples to the storage.

+

.sample

+

source +

.sample(
+   *args, **kwargs
+)
+

+
+

Sample from the storage.

+

.update

+

source +

.update(
+   *args, **kwargs
+)
+

+
+

Update the storage if necessary.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/distributed_agent/index.html b/api_docs/common/prototype/distributed_agent/index.html new file mode 100644 index 00000000..03a8be9c --- /dev/null +++ b/api_docs/common/prototype/distributed_agent/index.html @@ -0,0 +1,4276 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DistributedAgent - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DistributedAgent

+

source +

DistributedAgent(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', num_steps: int = 80, num_actors: int = 45, num_learners: int = 4,
+   num_storages: int = 60, **kwargs
+)
+

+
+

Trainer for distributed algorithms.

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on pre-training model or not.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
  • num_actors (int) : Number of actors.
  • +
  • num_learners (int) : Number of learners.
  • +
  • num_storages (int) : Number of storages.
  • +
  • kwargs : Arbitrary arguments such as batch_size and hidden_dim.
  • +
+

Returns

+

Distributed agent instance.

+

Methods:

+

.run

+

source +

.run(
+   env: DistributedWrapper, actor_idx: int
+)
+

+
+

Sample function of each actor. Implemented by individual algorithms.

+

Args

+
    +
  • env (DistributedWrapper) : A Gym-like environment wrapped by DistributedWrapper.
  • +
  • actor_idx (int) : The index of actor.
  • +
+

Returns

+

None.

+

.update

+

source +

.update(
+   *args, **kwargs
+)
+

+
+

Update the agent. Implemented by individual algorithms.

+

.train

+

source +

.train(
+   num_train_steps: int, init_model_path: Optional[str] = None, log_interval: int = 1,
+   eval_interval: int = 5000, save_interval: int = 5000, num_eval_episodes: int = 10,
+   th_compile: bool = False
+)
+

+
+

Training function.

+

Args

+
    +
  • num_train_steps (int) : The number of training steps.
  • +
  • init_model_path (Optional[str]) : The path of the initial model.
  • +
  • log_interval (int) : The interval of logging.
  • +
  • eval_interval (int) : The interval of evaluation.
  • +
  • save_interval (int) : The interval of saving model.
  • +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
  • th_compile (bool) : Whether to use th.compile or not.
  • +
+

Returns

+

None.

+

.eval

+

source +

.eval(
+   num_eval_episodes: int
+)
+

+
+

Evaluation function.

+

Args

+
    +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
+

Returns

+

The evaluation results.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/off_policy_agent/index.html b/api_docs/common/prototype/off_policy_agent/index.html new file mode 100644 index 00000000..f51b1040 --- /dev/null +++ b/api_docs/common/prototype/off_policy_agent/index.html @@ -0,0 +1,4238 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OffPolicyAgent - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OffPolicyAgent

+

source +

OffPolicyAgent(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000, **kwargs
+)
+

+
+

Trainer for off-policy algorithms.

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (Optional[VecEnv]) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on pre-training model or not.
  • +
  • num_init_steps (int) : Number of initial exploration steps.
  • +
  • kwargs : Arbitrary arguments such as batch_size and hidden_dim.
  • +
+

Returns

+

Off-policy agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update the agent. Implemented by individual algorithms.

+

.train

+

source +

.train(
+   num_train_steps: int, init_model_path: Optional[str] = None, log_interval: int = 1,
+   eval_interval: int = 5000, save_interval: int = 5000, num_eval_episodes: int = 10,
+   th_compile: bool = False, anneal_lr: bool = False
+)
+

+
+

Training function.

+

Args

+
    +
  • num_train_steps (int) : The number of training steps.
  • +
  • init_model_path (Optional[str]) : The path of the initial model.
  • +
  • log_interval (int) : The interval of logging.
  • +
  • eval_interval (int) : The interval of evaluation.
  • +
  • save_interval (int) : The interval of saving model.
  • +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
  • th_compile (bool) : Whether to use th.compile or not.
  • +
  • anneal_lr (bool) : Whether to anneal the learning rate or not.
  • +
+

Returns

+

None.

+

.eval

+

source +

.eval(
+   num_eval_episodes: int
+)
+

+
+

Evaluation function.

+

Args

+
    +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
+

Returns

+

The evaluation results.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/prototype/on_policy_agent/index.html b/api_docs/common/prototype/on_policy_agent/index.html new file mode 100644 index 00000000..bb4ebd44 --- /dev/null +++ b/api_docs/common/prototype/on_policy_agent/index.html @@ -0,0 +1,4237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OnPolicyAgent - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OnPolicyAgent

+

source +

OnPolicyAgent(
+   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,
+   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128
+)
+

+
+

Trainer for on-policy algorithms.

+

Args

+
    +
  • env (VecEnv) : Vectorized environments for training.
  • +
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • +
  • tag (str) : An experiment tag.
  • +
  • seed (int) : Random seed for reproduction.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • pretraining (bool) : Turn on pre-training model or not.
  • +
  • num_steps (int) : The sample length of per rollout.
  • +
+

Returns

+

On-policy agent instance.

+

Methods:

+

.update

+

source +

.update()
+

+
+

Update the agent. Implemented by individual algorithms.

+

.train

+

source +

.train(
+   num_train_steps: int, init_model_path: Optional[str] = None, log_interval: int = 1,
+   eval_interval: int = 100, save_interval: int = 100, num_eval_episodes: int = 10,
+   th_compile: bool = True, anneal_lr: bool = False
+)
+

+
+

Training function.

+

Args

+
    +
  • num_train_steps (int) : The number of training steps.
  • +
  • init_model_path (Optional[str]) : The path of the initial model.
  • +
  • log_interval (int) : The interval of logging.
  • +
  • eval_interval (int) : The interval of evaluation.
  • +
  • save_interval (int) : The interval of saving model.
  • +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
  • th_compile (bool) : Whether to use th.compile or not.
  • +
  • anneal_lr (bool) : Whether to anneal the learning rate or not.
  • +
+

Returns

+

None.

+

.eval

+

source +

.eval(
+   num_eval_episodes: int
+)
+

+
+

Evaluation function.

+

Args

+
    +
  • num_eval_episodes (int) : The number of evaluation episodes.
  • +
+

Returns

+

The evaluation results.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/common/timer/index.html b/api_docs/common/timer/index.html new file mode 100644 index 00000000..7ba41f1e --- /dev/null +++ b/api_docs/common/timer/index.html @@ -0,0 +1,4173 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Timer - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Timer

+

source +


+

+
+

The calculagraph class.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the calculagraph.

+

.total_time

+

source +

.total_time()
+

+
+

Get the total time.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/env/atari/__init__/index.html b/api_docs/env/atari/__init__/index.html new file mode 100644 index 00000000..6b909d75 --- /dev/null +++ b/api_docs/env/atari/__init__/index.html @@ -0,0 +1,4224 @@ + + + + + + + + + + + + + + + + + + + + + + + + + make_atari_env - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

make_atari_env

+

source +

.make_atari_env(
+   env_id: str = 'Alien-v5', num_envs: int = 8, device: str = 'cpu', seed: int = 1,
+   frame_stack: int = 4, asynchronous: bool = True
+)
+

+
+

Create Atari environments.

+

Args

+
    +
  • env_id (str) : Name of environment.
  • +
  • num_envs (int) : Number of environments.
  • +
  • device (str) : Device to convert the data.
  • +
  • seed (int) : Random seed.
  • +
  • frame_stack (int) : Number of stacked frames.
  • +
  • asynchronous (bool) : True for creating asynchronous environments, + and False for creating synchronous environments.
  • +
+

Returns

+

The vectorized environments.

+
+

make_envpool_atari_env

+

source +

.make_envpool_atari_env(
+   env_id: str = 'Alien-v5', num_envs: int = 8, device: str = 'cpu', seed: int = 1,
+   asynchronous: bool = True
+)
+

+
+

Create Atari environments with envpool.

+

Args

+
    +
  • env_id (str) : Name of environment.
  • +
  • num_envs (int) : Number of environments.
  • +
  • device (str) : Device to convert the data.
  • +
  • seed (int) : Random seed.
  • +
  • asynchronous (bool) : True for creating asynchronous environments, + and False for creating synchronous environments.
  • +
+

Returns

+

The vectorized environments.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/env/bullet/__init__/index.html b/api_docs/env/bullet/__init__/index.html new file mode 100644 index 00000000..94c31479 --- /dev/null +++ b/api_docs/env/bullet/__init__/index.html @@ -0,0 +1,4126 @@ + + + + + + + + + + + + + + + + + + + + + + + + + make_bullet_env - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

make_bullet_env

+

source +

.make_bullet_env(
+   env_id: str = 'AntBulletEnv-v0', num_envs: int = 1, device: str = 'cpu', seed: int = 0,
+   parallel: bool = True
+)
+

+
+

Create PyBullet robotics environments.

+

Args

+
    +
  • env_id (str) : Name of environment.
  • +
  • num_envs (int) : Number of environments.
  • +
  • device (str) : Device to convert the data.
  • +
  • seed (int) : Random seed.
  • +
  • parallel (bool) : True for creating asynchronous environments, and False + for creating synchronous environments.
  • +
+

Returns

+

The vectorized environments.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/env/dmc/__init__/index.html b/api_docs/env/dmc/__init__/index.html new file mode 100644 index 00000000..3c1d485a --- /dev/null +++ b/api_docs/env/dmc/__init__/index.html @@ -0,0 +1,4133 @@ + + + + + + + + + + + + + + + + + + + + + + + + + make_dmc_env - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

make_dmc_env

+

source +

.make_dmc_env(
+   env_id: str = 'humanoid_run', num_envs: int = 1, device: str = 'cpu', seed: int = 1,
+   visualize_reward: bool = True, from_pixels: bool = False, height: int = 84,
+   width: int = 84, frame_stack: int = 3, action_repeat: int = 1, asynchronous: bool = True
+)
+

+
+

Create DeepMind Control Suite environments.

+

Args

+
    +
  • env_id (str) : Name of environment.
  • +
  • num_envs (int) : Number of environments.
  • +
  • device (str) : Device to convert the data.
  • +
  • seed (int) : Random seed.
  • +
  • visualize_reward (bool) : Opposite to from_pixels.
  • +
  • from_pixels (bool) : Provide image-based observations or not.
  • +
  • height (int) : Image observation height.
  • +
  • width (int) : Image observation width.
  • +
  • frame_stack (int) : Number of stacked frames.
  • +
  • action_repeat (int) : Number of action repeats.
  • +
  • asynchronous (bool) : True for creating asynchronous environments, + and False for creating synchronous environments.
  • +
+

Returns

+

The vectorized environments.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/env/minigrid/__init__/index.html b/api_docs/env/minigrid/__init__/index.html new file mode 100644 index 00000000..a0ccc620 --- /dev/null +++ b/api_docs/env/minigrid/__init__/index.html @@ -0,0 +1,4009 @@ + + + + + + + + + + + + + + + + + + + + + init - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

make_minigrid_env

+

source +

.make_minigrid_env(
+   env_id: str = 'MiniGrid-DoorKey-5x5-v0', num_envs: int = 8,
+   fully_observable: bool = True, fully_numerical: bool = False, seed: int = 0,
+   frame_stack: int = 1, device: str = 'cpu', asynchronous: bool = True
+)
+

+
+

Create MiniGrid environments.

+

Args

+
    +
  • env_id (str) : Name of environment.
  • +
  • num_envs (int) : Number of environments.
  • +
  • fully_observable (bool) : Fully observable gridworld using a compact grid encoding instead of the agent view.
  • +
  • fully_numerical (bool) : Transforms the observation space (that has a textual component) to a fully numerical + observation space, where the textual instructions are replaced by arrays representing the indices of each + word in a fixed vocabulary.
  • +
  • seed (int) : Random seed.
  • +
  • frame_stack (int) : Number of stacked frames.
  • +
  • device (str) : Device to convert the data.
  • +
  • asynchronous (bool) : True for creating asynchronous environments, + and False for creating synchronous environments.
  • +
+

Returns

+

The vectorized environments.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/env/procgen/__init__/index.html b/api_docs/env/procgen/__init__/index.html new file mode 100644 index 00000000..b112a385 --- /dev/null +++ b/api_docs/env/procgen/__init__/index.html @@ -0,0 +1,4237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + make_procgen_env - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

make_procgen_env

+

source +

.make_procgen_env(
+   env_id: str = 'bigfish', num_envs: int = 64, device: str = 'cpu', seed: int = 1,
+   gamma: float = 0.99, num_levels: int = 200, start_level: int = 0,
+   distribution_mode: str = 'easy'
+)
+

+
+

Create Procgen environments.

+

Args

+
    +
  • env_id (str) : Name of environment.
  • +
  • num_envs (int) : Number of environments.
  • +
  • device (str) : Device to convert the data.
  • +
  • seed (int) : Random seed.
  • +
  • gamma (float) : A discount factor.
  • +
  • num_levels (int) : The number of unique levels that can be generated. + Set to 0 to use unlimited levels.
  • +
  • start_level (int) : The lowest seed that will be used to generated levels. + 'start_level' and 'num_levels' fully specify the set of possible levels.
  • +
  • distribution_mode (str) : What variant of the levels to use, the options are "easy", + "hard", "extreme", "memory", "exploration".
  • +
+

Returns

+

The vectorized environment.

+
+

make_envpool_procgen_env

+

source +

.make_envpool_procgen_env(
+   env_id: str = 'bigfish', num_envs: int = 64, device: str = 'cpu', seed: int = 1,
+   gamma: float = 0.99, num_levels: int = 200, start_level: int = 0,
+   distribution_mode: str = 'easy', asynchronous: bool = True
+)
+

+
+

Create Procgen environments.

+

Args

+
    +
  • env_id (str) : Name of environment.
  • +
  • num_envs (int) : Number of environments.
  • +
  • device (str) : Device to convert the data.
  • +
  • seed (int) : Random seed.
  • +
  • gamma (float) : A discount factor.
  • +
  • num_levels (int) : The number of unique levels that can be generated. + Set to 0 to use unlimited levels.
  • +
  • start_level (int) : The lowest seed that will be used to generated levels. + 'start_level' and 'num_levels' fully specify the set of possible levels.
  • +
  • distribution_mode (str) : What variant of the levels to use, the options are "easy", + "hard", "extreme", "memory", "exploration".
  • +
  • asynchronous (bool) : True for creating asynchronous environments, + and False for creating synchronous environments.
  • +
+

Returns

+

The vectorized environments.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/env/utils/index.html b/api_docs/env/utils/index.html new file mode 100644 index 00000000..f84964da --- /dev/null +++ b/api_docs/env/utils/index.html @@ -0,0 +1,4125 @@ + + + + + + + + + + + + + + + + + + + + + + + + + make_rllte_env - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

make_rllte_env

+

source +

.make_rllte_env(
+   env_id: Union[str, Callable[..., gym.Env]], num_envs: int = 1, seed: int = 1,
+   device: str = 'cpu', asynchronous: bool = True, env_kwargs: Optional[Dict[str,
+   Any]] = None
+)
+

+
+

Create environments that adapt to rllte engine.

+

Args

+
    +
  • env_id (Union[str, Callable[..., gym.Env]]) : either the env ID, the env class or a callable returning an env
  • +
  • num_envs (int) : Number of environments.
  • +
  • seed (int) : Random seed.
  • +
  • device (str) : Device to convert data.
  • +
  • asynchronous (bool) : True for AsyncVectorEnv and False for SyncVectorEnv.
  • +
  • env_kwargs : Optional keyword argument to pass to the env constructor.
  • +
+

Returns

+

Environment wrapped by TorchVecEnvWrapper.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/evaluation/comparison/index.html b/api_docs/evaluation/comparison/index.html new file mode 100644 index 00000000..e95d2470 --- /dev/null +++ b/api_docs/evaluation/comparison/index.html @@ -0,0 +1,4203 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Comparison - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Comparison

+

source +

Comparison(
+   scores_x: np.ndarray, scores_y: np.ndarray, get_ci: bool = False,
+   method: str = 'percentile', reps: int = 2000, confidence_interval_size: float = 0.95,
+   random_state: Optional[random.RandomState] = None
+)
+

+
+

Compare the performance between algorithms. Based on: +https://github.com/google-research/rliable/blob/master/rliable/metrics.py

+

Args

+
    +
  • scores_x (np.ndarray) : A matrix of size (num_runs_x x num_tasks) where scores[n][m] + represent the score on run n of task m for algorithm X.
  • +
  • scores_y (np.ndarray) : A matrix of size (num_runs_y x num_tasks) where scores[n][m] + represent the score on run n of task m for algorithm Y.
  • +
  • get_ci (bool) : Compute CIs or not.
  • +
  • method (str) : One of basic, percentile, bc (identical to debiased, + bias-corrected), or bca.
  • +
  • reps (int) : Number of bootstrap replications.
  • +
  • confidence_interval_size (float) : Coverage of confidence interval.
  • +
  • random_state (int) : If specified, ensures reproducibility in uncertainty estimates.
  • +
+

Returns

+

Comparer instance.

+

Methods:

+

.compute_poi

+

source +

.compute_poi()
+

+
+

Compute the overall probability of imporvement of algorithm X over Y.

+

.get_interval_estimates

+

source +

.get_interval_estimates(
+   scores_x: np.ndarray, scores_y: np.ndarray, metric: Callable
+)
+

+
+

Computes interval estimation of the above performance evaluators.

+

Args

+
    +
  • scores_x (np.ndarray) : A matrix of size (num_runs_x x num_tasks) where scores[n][m] + represent the score on run n of task m for algorithm X.
  • +
  • scores_y (np.ndarray) : A matrix of size (num_runs_y x num_tasks) where scores[n][m] + represent the score on run n of task m for algorithm Y.
  • +
  • metric (Callable) : One of the above performance evaluators used for estimation.
  • +
+

Returns

+

Confidence intervals.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/evaluation/performance/index.html b/api_docs/evaluation/performance/index.html new file mode 100644 index 00000000..77c13297 --- /dev/null +++ b/api_docs/evaluation/performance/index.html @@ -0,0 +1,4318 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Performance - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Performance

+

source +

Performance(
+   scores: np.ndarray, get_ci: bool = False, method: str = 'percentile',
+   task_bootstrap: bool = False, reps: int = 50000,
+   confidence_interval_size: float = 0.95,
+   random_state: Optional[random.RandomState] = None
+)
+

+
+

Evaluate the performance of an algorithm. Based on: +https://github.com/google-research/rliable/blob/master/rliable/metrics.py

+

Args

+
    +
  • scores (np.ndarray) : A matrix of size (num_runs x num_tasks) where scores[n][m] + represent the score on run n of task m.
  • +
  • get_ci (bool) : Compute CIs or not.
  • +
  • method (str) : One of basic, percentile, bc (identical to debiased, + bias-corrected), or bca.
  • +
  • task_bootstrap (bool) : Whether to perform bootstrapping over tasks in addition to + runs. Defaults to False. See StratifiedBoostrap for more details.
  • +
  • reps (int) : Number of bootstrap replications.
  • +
  • confidence_interval_size (float) : Coverage of confidence interval.
  • +
  • random_state (int) : If specified, ensures reproducibility in uncertainty estimates.
  • +
+

Returns

+

Performance evaluator.

+

Methods:

+

.aggregate_mean

+

source +

.aggregate_mean()
+

+
+

Computes mean of sample mean scores per task.

+

.aggregate_median

+

source +

.aggregate_median()
+

+
+

Computes median of sample mean scores per task.

+

.aggregate_og

+

source +

.aggregate_og(
+   gamma: float = 1.0
+)
+

+
+

Computes optimality gap across all runs and tasks.

+

Args

+
    +
  • gamma (float) : Threshold for optimality gap. All scores above gamma are clipped +to gamma.
  • +
+

Returns

+

Optimality gap at threshold gamma.

+

.aggregate_iqm

+

source +

.aggregate_iqm()
+

+
+

Computes the interquartile mean across runs and tasks.

+

.get_interval_estimates

+

source +

.get_interval_estimates(
+   scores: np.ndarray, metric: Callable
+)
+

+
+

Computes interval estimation of the above performance evaluators.

+

Args

+
    +
  • scores (np.ndarray) : A matrix of size (num_runs x num_tasks) where scores[n][m] + represent the score on run n of task m.
  • +
  • metric (Callable) : One of the above performance evaluators used for estimation.
  • +
+

Returns

+

Confidence intervals.

+

.create_performance_profile

+

source +

.create_performance_profile(
+   tau_list: Union[List[float], np.ndarray], use_score_distribution: bool = True
+)
+

+
+

Method for calculating performance profilies.

+

Args

+
    +
  • tau_list (Union[List[float], np.ndarray]) : List of 1D numpy array of threshold + values on which the profile is evaluated.
  • +
  • use_score_distribution (bool) : Whether to report score distributions or average + score distributions.
  • +
+

Returns

+

Point and interval estimates of profiles evaluated at all thresholds in 'tau_list'.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/evaluation/utils/index.html b/api_docs/evaluation/utils/index.html new file mode 100644 index 00000000..3812bbc1 --- /dev/null +++ b/api_docs/evaluation/utils/index.html @@ -0,0 +1,3991 @@ + + + + + + + + + + + + + + + + + + + + + Utils - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

min_max_normalize

+

source +

.min_max_normalize(
+   value: np.ndarray, min_scores: np.ndarray, max_scores: np.ndarray
+)
+

+
+

Perform Max-Min normalization.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/evaluation/visualization/index.html b/api_docs/evaluation/visualization/index.html new file mode 100644 index 00000000..13893316 --- /dev/null +++ b/api_docs/evaluation/visualization/index.html @@ -0,0 +1,4300 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Visualization - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + +

+

plot_interval_estimates

+

source +

.plot_interval_estimates(
+   metrics_dict: Dict[str, Dict], metric_names: List[str], algorithms: List[str],
+   colors: Optional[List[str]] = None, color_palette: str = 'colorblind',
+   max_ticks: float = 4, subfigure_width: float = 3.4, row_height: float = 0.37,
+   interval_height: float = 0.6, xlabel_y_coordinate: float = -0.16,
+   xlabel: str = 'NormalizedScore', **kwargs
+)
+

+
+

Plots verious metrics of algorithms with stratified confidence intervals. +Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py +See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

+

Args

+
    +
  • metrics_dict (Dict[str, Dict]) : The dictionary of various metrics of algorithms.
  • +
  • metric_names (List[str]) : Names of the metrics corresponding to metrics_dict.
  • +
  • algorithms (List[str]) : List of methods used for plotting.
  • +
  • colors (Optional[List[str]]) : Maps each method to a color. + If None, then this mapping is created based on color_palette.
  • +
  • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
  • +
  • max_ticks (float) : Find nice tick locations with no more than max_ticks. Passed to plt.MaxNLocator.
  • +
  • subfigure_width (float) : Width of each subfigure.
  • +
  • row_height (float) : Height of each row in a subfigure.
  • +
  • interval_height (float) : Height of confidence intervals.
  • +
  • xlabel_y_coordinate (float) : y-coordinate of the x-axis label.
  • +
  • xlabel (str) : Label for the x-axis.
  • +
  • kwargs : Arbitrary keyword arguments.
  • +
+

Returns

+

A matplotlib figure and an array of Axes.

+
+

plot_performance_profile

+

source +

.plot_performance_profile(
+   profile_dict: Dict[str, List], tau_list: np.ndarray,
+   use_non_linear_scaling: bool = False, figsize: Tuple[float, float] = (10.0, 5.0),
+   colors: Optional[List[str]] = None, color_palette: str = 'colorblind',
+   alpha: float = 0.15, xticks: Optional[Iterable] = None,
+   yticks: Optional[Iterable] = None,
+   xlabel: Optional[str] = 'NormalizedScore($\\tau$)',
+   ylabel: Optional[str] = 'Fractionofrunswithscore$>\\tau$',
+   linestyles: Optional[str] = None, **kwargs
+)
+

+
+

Plots performance profiles with stratified confidence intervals. +Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py +See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

+

Args

+
    +
  • profile_dict (Dict[str, List]) : A dictionary mapping a method to its performance.
  • +
  • tau_list (np.ndarray) : 1D numpy array of threshold values on which the profile is evaluated.
  • +
  • use_non_linear_scaling (bool) : Whether to scale the x-axis in proportion to the + number of runs within any specified range.
  • +
  • figsize (Tuple[float]) : Size of the figure passed to matplotlib.subplots.
  • +
  • colors (Optional[List[str]]) : Maps each method to a color. If None, then + this mapping is created based on color_palette.
  • +
  • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
  • +
  • alpha (float) : Changes the transparency of the shaded regions corresponding to the confidence intervals.
  • +
  • xticks (Optional[Iterable]) : The list of x-axis tick locations. Passing an empty list removes all xticks.
  • +
  • yticks (Optional[Iterable]) : The list of y-axis tick locations between 0 and 1. + If None, defaults to [0, 0.25, 0.5, 0.75, 1.0].
  • +
  • xlabel (str) : Label for the x-axis.
  • +
  • ylabel (str) : Label for the y-axis.
  • +
  • linestyles (str) : Maps each method to a linestyle. If None, then the 'solid' linestyle is used for all methods.
  • +
  • kwargs : Arbitrary keyword arguments for annotating and decorating the + figure. For valid arguments, refer to _annotate_and_decorate_axis.
  • +
+

Returns

+

A matplotlib figure and axes.Axes which contains the plot for performance profiles.

+
+

plot_probability_improvement

+

source +

.plot_probability_improvement(
+   poi_dict: Dict[str, List], pair_separator: str = '_', figsize: Tuple[float,
+   float] = (3.7, 2.1), colors: Optional[List[str]] = None,
+   color_palette: str = 'colorblind', alpha: float = 0.75, interval_height: float = 0.6,
+   xticks: Optional[Iterable] = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+   xlabel: str = 'P(X>Y)', left_ylabel: str = 'AlgorithmX',
+   right_ylabel: str = 'AlgorithmY', **kwargs
+)
+

+
+

Plots probability of improvement with stratified confidence intervals. +Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py +See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

+

Args

+
    +
  • poi_dict (Dict[str, List]) : The dictionary of probability of improvements of different algorithms pairs.
  • +
  • pair_separator (str) : Each algorithm pair name in dictionaries above is joined by a string separator. + For example, if the pairs are specified as 'X;Y', then the separator corresponds to ';'. Defaults to ','.
  • +
  • figsize (Tuple[float]) : Size of the figure passed to matplotlib.subplots.
  • +
  • colors (Optional[List[str]]) : Maps each method to a color. If None, then this mapping + is created based on color_palette.
  • +
  • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
  • +
  • interval_height (float) : Height of confidence intervals.
  • +
  • alpha (float) : Changes the transparency of the shaded regions corresponding to the confidence intervals.
  • +
  • xticks (Optional[Iterable]) : The list of x-axis tick locations. Passing an empty list removes all xticks.
  • +
  • xlabel (str) : Label for the x-axis.
  • +
  • left_ylabel (str) : Label for the left y-axis. Defaults to 'Algorithm X'.
  • +
  • right_ylabel (str) : Label for the left y-axis. Defaults to 'Algorithm Y'.
  • +
  • kwargs : Arbitrary keyword arguments for annotating and decorating the + figure. For valid arguments, refer to _annotate_and_decorate_axis.
  • +
+

Returns

+

A matplotlib figure and axes.Axes which contains the plot for probability of improvement.

+
+

plot_sample_efficiency_curve

+

source +

.plot_sample_efficiency_curve(
+   sampling_dict: Dict[str, Dict], frames: np.ndarray, algorithms: List[str],
+   colors: Optional[List[str]] = None, color_palette: str = 'colorblind',
+   figsize: Tuple[float, float] = (3.7, 2.1),
+   xlabel: Optional[str] = 'NumberofFrames(inmillions)',
+   ylabel: Optional[str] = 'AggregateHumanNormalizedScore',
+   labelsize: str = 'xx-large', ticklabelsize: str = 'xx-large', **kwargs
+)
+

+
+

Plots an aggregate metric with CIs as a function of environment frames. +Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py +See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

+

Args

+
    +
  • sampling_dict (Dict[str, Dict]) : A dictionary of values with stratified confidence intervals in different frames.
  • +
  • frames (np.ndarray) : Array containing environment frames to mark on the x-axis.
  • +
  • algorithms (List[str]) : List of methods used for plotting.
  • +
  • colors (Optional[List[str]]) : Maps each method to a color. If None, then this mapping + is created based on color_palette.
  • +
  • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
  • +
  • max_ticks (float) : Find nice tick locations with no more than max_ticks. Passed to plt.MaxNLocator.
  • +
  • subfigure_width (float) : Width of each subfigure.
  • +
  • row_height (float) : Height of each row in a subfigure.
  • +
  • interval_height (float) : Height of confidence intervals.
  • +
  • xlabel_y_coordinate (float) : y-coordinate of the x-axis label.
  • +
  • xlabel (str) : Label for the x-axis.
  • +
  • kwargs : Arbitrary keyword arguments.
  • +
+

Returns

+

A matplotlib figure and an array of Axes.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/hub/atari/index.html b/api_docs/hub/atari/index.html new file mode 100644 index 00000000..eafcf3de --- /dev/null +++ b/api_docs/hub/atari/index.html @@ -0,0 +1,4269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Atari - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Atari

+

source +


+

+
+

Scores and learning cures of various RL algorithms on the full Atari benchmark. +Environment link: https://github.com/Farama-Foundation/Arcade-Learning-Environment +Number of environments: 57 +Number of training steps: 10,000,000 +Number of seeds: 10 +Added algorithms: [PPO]

+

Methods:

+

.load_scores

+

source +

.load_scores(
+   env_id: str, agent: str
+)
+

+
+

Returns final performance.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
+

Returns

+

Test scores data array with shape (N_SEEDS, N_POINTS).

+

.load_curves

+

source +

.load_curves(
+   env_id: str, agent: str
+)
+

+
+

Returns learning curves using a Dict of NumPy arrays.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
+

Returns

+
    +
  • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
  • +
  • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) +Learning curves data with structure: +curves
  • +
+

.load_models

+

source +

.load_models(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the model from the hub.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded model.

+

.load_apis

+

source +

.load_apis(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the a training API.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded API.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/hub/dmc/index.html b/api_docs/hub/dmc/index.html new file mode 100644 index 00000000..ecca9f26 --- /dev/null +++ b/api_docs/hub/dmc/index.html @@ -0,0 +1,4304 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DMControl - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DMControl

+

source +


+

+
+

Scores and learning cures of various RL algorithms on the full +DeepMind Control Suite benchmark.

+
+

Environment link: https://github.com/google-deepmind/dm_control +Number of environments: 27 +Number of training steps: 10,000,000 for humanoid, 2,000,000 for others +Number of seeds: 10 +Added algorithms: [SAC, DrQ-v2]

+

Methods:

+

.get_obs_type

+

source +

.get_obs_type(
+   agent: str
+)
+

+
+

Returns the observation type of the agent.

+

Args

+
    +
  • agent (str) : Agent name.
  • +
+

Returns

+

Observation type.

+

.load_scores

+

source +

.load_scores(
+   env_id: str, agent: str
+)
+

+
+

Returns final performance.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
+

Returns

+

Test scores data array with shape (N_SEEDS, N_POINTS).

+

.load_curves

+

source +

.load_curves(
+   env_id: str, agent: str
+)
+

+
+

Returns learning curves using a Dict of NumPy arrays.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
  • obs_type (str) : A type from ['state', 'pixel'].
  • +
+

Returns

+
    +
  • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
  • +
  • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) +Learning curves data with structure: +curves
  • +
+

.load_models

+

source +

.load_models(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the model from the hub.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded model.

+

.load_apis

+

source +

.load_apis(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the a training API.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded API.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/hub/minigrid/index.html b/api_docs/hub/minigrid/index.html new file mode 100644 index 00000000..e4c5812c --- /dev/null +++ b/api_docs/hub/minigrid/index.html @@ -0,0 +1,4269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + MiniGrid - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

MiniGrid

+

source +


+

+
+

Scores and learning cures of various RL algorithms on the MiniGrid benchmark. +Environment link: https://github.com/Farama-Foundation/Minigrid +Number of environments: 16 +Number of training steps: 1,000,000 +Number of seeds: 10 +Added algorithms: [A2C]

+

Methods:

+

.load_scores

+

source +

.load_scores(
+   env_id: str, agent: str
+)
+

+
+

Returns final performance.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
+

Returns

+

Test scores data array with shape (N_SEEDS, N_POINTS).

+

.load_curves

+

source +

.load_curves(
+   env_id: str, agent: str
+)
+

+
+

Returns learning curves using a Dict of NumPy arrays.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
+

Returns

+
    +
  • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
  • +
  • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) +Learning curves data with structure: +curves
  • +
+

.load_models

+

source +

.load_models(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the model from the hub.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded model.

+

.load_apis

+

source +

.load_apis(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the a training API.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded API.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/hub/procgen/index.html b/api_docs/hub/procgen/index.html new file mode 100644 index 00000000..0163b3f2 --- /dev/null +++ b/api_docs/hub/procgen/index.html @@ -0,0 +1,4269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Procgen - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Procgen

+

source +


+

+
+

Scores and learning cures of various RL algorithms on the full Procgen benchmark. +Environment link: https://github.com/openai/procgen +Number of environments: 16 +Number of training steps: 25,000,000 +Number of seeds: 10 +Added algorithms: [PPO]

+

Methods:

+

.load_scores

+

source +

.load_scores(
+   env_id: str, agent: str
+)
+

+
+

Returns final performance.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
+

Returns

+

Test scores data array with shape (N_SEEDS, N_POINTS).

+

.load_curves

+

source +

.load_curves(
+   env_id: str, agent: str
+)
+

+
+

Returns learning curves using a Dict of NumPy arrays.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent_id (str) : Agent name.
  • +
+

Returns

+
    +
  • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
  • +
  • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) +Learning curves data with structure: +curves
  • +
+

.load_models

+

source +

.load_models(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the model from the hub.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded model.

+

.load_apis

+

source +

.load_apis(
+   env_id: str, agent: str, seed: int, device: str = 'cpu'
+)
+

+
+

Load the a training API.

+

Args

+
    +
  • env_id (str) : Environment ID.
  • +
  • agent (str) : Agent name.
  • +
  • seed (int) : The seed to load.
  • +
  • device (str) : The device to load the model on.
  • +
+

Returns

+

The loaded API.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/encoder/espeholt_residual_encoder/index.html b/api_docs/xploit/encoder/espeholt_residual_encoder/index.html new file mode 100644 index 00000000..c76cd835 --- /dev/null +++ b/api_docs/xploit/encoder/espeholt_residual_encoder/index.html @@ -0,0 +1,4173 @@ + + + + + + + + + + + + + + + + + + + + + + + + + EspeholtResidualEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

EspeholtResidualEncoder

+

source +

EspeholtResidualEncoder(
+   observation_space: gym.Space, feature_dim: int = 0, net_arch: List[int] = [16, 32,
+   32]
+)
+

+
+

ResNet-like encoder for processing image-based observations. +Proposed by Espeholt L, Soyer H, Munos R, et al. Impala: Scalable distributed deep-rl with importance +weighted actor-learner architectures[C]//International conference on machine learning. PMLR, 2018: 1407-1416. +Target task: Atari games and Procgen games.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • feature_dim (int) : Number of features extracted.
  • +
  • net_arch (List) : Architecture of the network. + It represents the out channels of each residual layer. + The length of this list is the number of residual layers.
  • +
+

Returns

+

ResNet-like encoder instance.

+

Methods:

+

.forward

+

source +

.forward(
+   obs: th.Tensor
+)
+

+
+

Forward method implementation.

+

Args

+
    +
  • obs (th.Tensor) : Observation tensor.
  • +
+

Returns

+

Encoded observation tensor.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/encoder/identity_encoder/index.html b/api_docs/xploit/encoder/identity_encoder/index.html new file mode 100644 index 00000000..cd62f3e7 --- /dev/null +++ b/api_docs/xploit/encoder/identity_encoder/index.html @@ -0,0 +1,4166 @@ + + + + + + + + + + + + + + + + + + + + + + + + + IdentityEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

IdentityEncoder

+

source +

IdentityEncoder(
+   observation_space: gym.Space, feature_dim: int = 64
+)
+

+
+

Identity encoder for state-based observations.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • feature_dim (int) : Number of features extracted.
  • +
+

Returns

+

Identity encoder instance.

+

Methods:

+

.forward

+

source +

.forward(
+   obs: th.Tensor
+)
+

+
+

Forward method implementation.

+

Args

+
    +
  • obs (th.Tensor) : Observation tensor.
  • +
+

Returns

+

Encoded observation tensor.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/encoder/mnih_cnn_encoder/index.html b/api_docs/xploit/encoder/mnih_cnn_encoder/index.html new file mode 100644 index 00000000..d7c3e9ec --- /dev/null +++ b/api_docs/xploit/encoder/mnih_cnn_encoder/index.html @@ -0,0 +1,4169 @@ + + + + + + + + + + + + + + + + + + + + + + + + + MnihCnnEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

MnihCnnEncoder

+

source +

MnihCnnEncoder(
+   observation_space: gym.Space, feature_dim: int = 0
+)
+

+
+

Convolutional neural network (CNN)-based encoder for processing image-based observations. +Proposed by Mnih V, Kavukcuoglu K, Silver D, et al. Playing atari with +deep reinforcement learning[J]. arXiv preprint arXiv:1312.5602, 2013. +Target task: Atari games.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • feature_dim (int) : Number of features extracted.
  • +
+

Returns

+

CNN-based encoder instance.

+

Methods:

+

.forward

+

source +

.forward(
+   obs: th.Tensor
+)
+

+
+

Forward method implementation.

+

Args

+
    +
  • obs (th.Tensor) : Observation tensor.
  • +
+

Returns

+

Encoded observation tensor.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/encoder/pathak_cnn_encoder/index.html b/api_docs/xploit/encoder/pathak_cnn_encoder/index.html new file mode 100644 index 00000000..00ca2342 --- /dev/null +++ b/api_docs/xploit/encoder/pathak_cnn_encoder/index.html @@ -0,0 +1,4169 @@ + + + + + + + + + + + + + + + + + + + + + + + + + PathakCnnEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

PathakCnnEncoder

+

source +

PathakCnnEncoder(
+   observation_space: gym.Space, feature_dim: int = 0
+)
+

+
+

Convolutional neural network (CNN)-based encoder for processing image-based observations. +Proposed by Pathak D, Agrawal P, Efros A A, et al. Curiosity-driven exploration by self-supervised prediction[C]// +International conference on machine learning. PMLR, 2017: 2778-2787. +Target task: Atari and MiniGrid games.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • feature_dim (int) : Number of features extracted.
  • +
+

Returns

+

CNN-based encoder instance.

+

Methods:

+

.forward

+

source +

.forward(
+   obs: th.Tensor
+)
+

+
+

Forward method implementation.

+

Args

+
    +
  • obs (th.Tensor) : Observation tensor.
  • +
+

Returns

+

Encoded observation tensor.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/encoder/raffin_combined_encoder/index.html b/api_docs/xploit/encoder/raffin_combined_encoder/index.html new file mode 100644 index 00000000..f907b2b7 --- /dev/null +++ b/api_docs/xploit/encoder/raffin_combined_encoder/index.html @@ -0,0 +1,4168 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RaffinCombinedEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RaffinCombinedEncoder

+

source +

RaffinCombinedEncoder(
+   observation_space: gym.Space, feature_dim: int = 256, cnn_output_dim: int = 256
+)
+

+
+

Combined features extractor for Dict observation spaces. +Based on: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/torch_layers.py#L231

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • feature_dim (int) : Number of features extracted.
  • +
  • cnn_output_dim (int) : Number of features extracted by the CNN.
  • +
+

Returns

+

Identity encoder instance.

+

Methods:

+

.forward

+

source +

.forward(
+   obs: Dict[str, th.Tensor]
+)
+

+
+

Forward method implementation.

+

Args

+
    +
  • obs (Dict[str, th.Tensor]) : Observation tensor.
  • +
+

Returns

+

Encoded observation tensor.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/encoder/tassa_cnn_encoder/index.html b/api_docs/xploit/encoder/tassa_cnn_encoder/index.html new file mode 100644 index 00000000..75cce144 --- /dev/null +++ b/api_docs/xploit/encoder/tassa_cnn_encoder/index.html @@ -0,0 +1,4169 @@ + + + + + + + + + + + + + + + + + + + + + + + + + TassaCnnEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

TassaCnnEncoder

+

source +

TassaCnnEncoder(
+   observation_space: gym.Space, feature_dim: int = 50
+)
+

+
+

Convolutional neural network (CNN)-based encoder for processing image-based observations. +Proposed by Tassa Y, Doron Y, Muldal A, et al. Deepmind control suite[J]. +arXiv preprint arXiv:1801.00690, 2018. +Target task: DeepMind Control Suite.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • feature_dim (int) : Number of features extracted by the encoder.
  • +
+

Returns

+

CNN-based encoder instance.

+

Methods:

+

.forward

+

source +

.forward(
+   obs: th.Tensor
+)
+

+
+

Forward method implementation.

+

Args

+
    +
  • obs (th.Tensor) : Observation tensor.
  • +
+

Returns

+

Encoded observation tensor.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/encoder/vanilla_mlp_encoder/index.html b/api_docs/xploit/encoder/vanilla_mlp_encoder/index.html new file mode 100644 index 00000000..9de20130 --- /dev/null +++ b/api_docs/xploit/encoder/vanilla_mlp_encoder/index.html @@ -0,0 +1,4167 @@ + + + + + + + + + + + + + + + + + + + + + + + + + VanillaMlpEncoder - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

VanillaMlpEncoder

+

source +

VanillaMlpEncoder(
+   observation_space: gym.Space, feature_dim: int = 64, hidden_dim: int = 64
+)
+

+
+

Multi layer perceptron (MLP) for processing state-based inputs.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • feature_dim (int) : Number of features extracted.
  • +
  • hidden_dim (int) : Number of hidden units in the hidden layer.
  • +
+

Returns

+

Mlp-based encoder instance.

+

Methods:

+

.forward

+

source +

.forward(
+   obs: th.Tensor
+)
+

+
+

Forward method implementation.

+

Args

+
    +
  • obs (th.Tensor) : Observation tensor.
  • +
+

Returns

+

Encoded observation tensor.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/policy/distributed_actor_learner/index.html b/api_docs/xploit/policy/distributed_actor_learner/index.html new file mode 100644 index 00000000..4cc0b5de --- /dev/null +++ b/api_docs/xploit/policy/distributed_actor_learner/index.html @@ -0,0 +1,4357 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DistributedActorLearner - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DistributedActorLearner

+

source +

DistributedActorLearner(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,
+   hidden_dim: int = 512, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal',
+   use_lstm: bool = False
+)
+

+
+

Actor-Learner network for IMPALA.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
  • use_lstm (bool) : Whether to use LSTM module.
  • +
+

Returns

+

Actor-Critic network.

+

Methods:

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.explore

+

source +

.explore(
+   obs: th.Tensor
+)
+

+
+

Explore the environment and randomly generate actions.

+

Args

+
    +
  • obs (th.Tensor) : Observation from the environment.
  • +
+

Returns

+

Sampled actions.

+

.freeze

+

source +

.freeze(
+   encoder: nn.Module, dist: Distribution
+)
+

+
+

Freeze all the elements like encoder and dist.

+

Args

+
    +
  • encoder (nn.Module) : Encoder network.
  • +
  • dist (Distribution) : Distribution.
  • +
+

Returns

+

None.

+

.forward

+

source +

.forward(
+   *args
+)
+

+
+

Only for inference.

+

.to

+

source +

.to(
+   device: th.device
+)
+

+
+

Only move the learner to device, and keep actor in CPU.

+

Args

+
    +
  • device (th.device) : Device to use.
  • +
+

Returns

+

None.

+

.save

+

source +

.save(
+   path: Path, pretraining: bool, global_step: int
+)
+

+
+

Save models.

+

Args

+
    +
  • path (Path) : Save path.
  • +
  • pretraining (bool) : Pre-training mode.
  • +
  • global_step (int) : Global training step.
  • +
+

Returns

+

None.

+

.load

+

source +

.load(
+   path: str, device: th.device
+)
+

+
+

Load initial parameters.

+

Args

+
    +
  • path (str) : Import path.
  • +
  • device (th.device) : Device to use.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/policy/off_policy_det_actor_double_critic/index.html b/api_docs/xploit/policy/off_policy_det_actor_double_critic/index.html new file mode 100644 index 00000000..5dbb27a6 --- /dev/null +++ b/api_docs/xploit/policy/off_policy_det_actor_double_critic/index.html @@ -0,0 +1,4297 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OffPolicyDetActorDoubleCritic - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OffPolicyDetActorDoubleCritic

+

source +

OffPolicyDetActorDoubleCritic(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,
+   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'
+)
+

+
+

Deterministic actor network and double critic network for off-policy algortithms like DrQv2, DDPG. +Here the 'self.dist' refers to an action noise.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

Actor-Critic network.

+

Methods:

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.freeze

+

source +

.freeze(
+   encoder: nn.Module, dist: Distribution
+)
+

+
+

Freeze all the elements like encoder and dist.

+

Args

+
    +
  • encoder (nn.Module) : Encoder network.
  • +
  • dist (Distribution) : Distribution.
  • +
+

Returns

+

None.

+

.forward

+

source +

.forward(
+   obs: th.Tensor, training: bool = True
+)
+

+
+

Sample actions based on observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • training (bool) : Training mode, True or False.
  • +
+

Returns

+

Sampled actions.

+

.get_dist

+

source +

.get_dist(
+   obs: th.Tensor
+)
+

+
+

Get sample distribution.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

RLLTE distribution.

+

.save

+

source +

.save(
+   path: Path, pretraining: bool = False
+)
+

+
+

Save models.

+

Args

+
    +
  • path (Path) : Save path.
  • +
  • pretraining (bool) : Pre-training mode.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/policy/off_policy_double_actor_double_critic/index.html b/api_docs/xploit/policy/off_policy_double_actor_double_critic/index.html new file mode 100644 index 00000000..76075aea --- /dev/null +++ b/api_docs/xploit/policy/off_policy_double_actor_double_critic/index.html @@ -0,0 +1,4298 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OffPolicyDoubleActorDoubleCritic - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OffPolicyDoubleActorDoubleCritic

+

source +

OffPolicyDoubleActorDoubleCritic(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,
+   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'
+)
+

+
+

Double deterministic actor network and double critic network for off-policy algortithms like DDPG, TD3. +Here the 'self.dist' refers to an action noise.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

Actor-Critic network.

+

Methods:

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.freeze

+

source +

.freeze(
+   encoder: nn.Module, dist: Distribution
+)
+

+
+

Freeze all the elements like encoder and dist.

+

Args

+
    +
  • encoder (nn.Module) : Encoder network.
  • +
  • dist (Distribution) : Distribution.
  • +
+

Returns

+

None.

+

.forward

+

source +

.forward(
+   obs: th.Tensor, training: bool = True
+)
+

+
+

Sample actions based on observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • training (bool) : Training mode, True or False.
  • +
+

Returns

+

Sampled actions.

+

.get_dist

+

source +

.get_dist(
+   obs: th.Tensor
+)
+

+
+

Get sample distribution.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

RLLTE distribution.

+

.save

+

source +

.save(
+   path: Path, pretraining: bool, global_step: int
+)
+

+
+

Save models.

+

Args

+
    +
  • path (Path) : Save path.
  • +
  • pretraining (bool) : Pre-training mode.
  • +
  • global_step (int) : Global training step.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/policy/off_policy_double_qnetwork/index.html b/api_docs/xploit/policy/off_policy_double_qnetwork/index.html new file mode 100644 index 00000000..d39cb323 --- /dev/null +++ b/api_docs/xploit/policy/off_policy_double_qnetwork/index.html @@ -0,0 +1,4267 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OffPolicyDoubleQNetwork - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OffPolicyDoubleQNetwork

+

source +

OffPolicyDoubleQNetwork(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,
+   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'
+)
+

+
+

Q-network for off-policy algortithms like DQN.

+

Structure: self.encoder (shared by actor and critic), self.qnet, self.qnet_target +Optimizers: self.opt -> (self.qnet, self.qnet_target)

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

Actor network instance.

+

Methods:

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.freeze

+

source +

.freeze(
+   encoder: nn.Module, dist: Distribution
+)
+

+
+

Freeze all the elements like encoder and dist.

+

Args

+
    +
  • encoder (nn.Module) : Encoder network.
  • +
  • dist (Distribution) : Distribution class.
  • +
+

Returns

+

None.

+

.forward

+

source +

.forward(
+   obs: th.Tensor, training: bool = True
+)
+

+
+

Sample actions based on observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • training (bool) : Training mode, True or False.
  • +
+

Returns

+

Sampled actions.

+

.save

+

source +

.save(
+   path: Path, pretraining: bool, global_step: int
+)
+

+
+

Save models.

+

Args

+
    +
  • path (Path) : Save path.
  • +
  • pretraining (bool) : Pre-training mode.
  • +
  • global_step (int) : Global training step.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/policy/off_policy_stoch_actor_double_critic/index.html b/api_docs/xploit/policy/off_policy_stoch_actor_double_critic/index.html new file mode 100644 index 00000000..341c90d4 --- /dev/null +++ b/api_docs/xploit/policy/off_policy_stoch_actor_double_critic/index.html @@ -0,0 +1,4301 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OffPolicyStochActorDoubleCritic - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OffPolicyStochActorDoubleCritic

+

source +

OffPolicyStochActorDoubleCritic(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,
+   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, log_std_range: Tuple = (-5, 2),
+   init_fn: str = 'orthogonal'
+)
+

+
+

Stochastic actor network and double critic network for off-policy algortithms like SAC. +Here the 'self.dist' refers to an sampling distribution instance.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • log_std_range (Tuple) : Range of log standard deviation.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

Actor-Critic network.

+

Methods:

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.freeze

+

source +

.freeze(
+   encoder: nn.Module, dist: Distribution
+)
+

+
+

Freeze all the elements like encoder and dist.

+

Args

+
    +
  • encoder (nn.Module) : Encoder network.
  • +
  • dist (Distribution) : Distribution class.
  • +
+

Returns

+

None.

+

.forward

+

source +

.forward(
+   obs: th.Tensor, training: bool = True
+)
+

+
+

Sample actions based on observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • training (bool) : Training mode, True or False.
  • +
+

Returns

+

Sampled actions.

+

.get_dist

+

source +

.get_dist(
+   obs: th.Tensor
+)
+

+
+

Get sample distribution.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • step (int) : Global training step.
  • +
+

Returns

+

Action distribution.

+

.save

+

source +

.save(
+   path: Path, pretraining: bool, global_step: int
+)
+

+
+

Save models.

+

Args

+
    +
  • path (Path) : Save path.
  • +
  • pretraining (bool) : Pre-training mode.
  • +
  • global_step (int) : Global training step.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/policy/on_policy_decoupled_actor_critic/index.html b/api_docs/xploit/policy/on_policy_decoupled_actor_critic/index.html new file mode 100644 index 00000000..3e06b47c --- /dev/null +++ b/api_docs/xploit/policy/on_policy_decoupled_actor_critic/index.html @@ -0,0 +1,4331 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OnPolicyDecoupledActorCritic - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OnPolicyDecoupledActorCritic

+

source +

OnPolicyDecoupledActorCritic(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,
+   hidden_dim: int = 512, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'
+)
+

+
+

Actor-Critic network for on-policy algorithms like DAAC.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

Actor-Critic network instance.

+

Methods:

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.freeze

+

source +

.freeze(
+   encoder: nn.Module, dist: Distribution
+)
+

+
+

Freeze all the elements like encoder and dist.

+

Args

+
    +
  • encoder (nn.Module) : Encoder network.
  • +
  • dist (Distribution) : Distribution class.
  • +
+

Returns

+

None.

+

.forward

+

source +

.forward(
+   obs: th.Tensor, training: bool = True
+)
+

+
+

Get actions and estimated values for observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • training (bool) : training mode, True or False.
  • +
+

Returns

+

Sampled actions, estimated values, and log of probabilities for observations when training is True, +else only deterministic actions.

+

.get_value

+

source +

.get_value(
+   obs: th.Tensor
+)
+

+
+

Get estimated values for observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

Estimated values.

+

.evaluate_actions

+

source +

.evaluate_actions(
+   obs: th.Tensor, actions: th.Tensor
+)
+

+
+

Evaluate actions according to the current policy given the observations.

+

Args

+
    +
  • obs (th.Tensor) : Sampled observations.
  • +
  • actions (th.Tensor) : Sampled actions.
  • +
+

Returns

+

Estimated values, log of the probability evaluated at actions, entropy of distribution.

+

.save

+

source +

.save(
+   path: Path, pretraining: bool, global_step: int
+)
+

+
+

Save models.

+

Args

+
    +
  • path (Path) : Save path.
  • +
  • pretraining (bool) : Pre-training mode.
  • +
  • global_step (int) : Global training step.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/policy/on_policy_shared_actor_critic/index.html b/api_docs/xploit/policy/on_policy_shared_actor_critic/index.html new file mode 100644 index 00000000..99aa6b25 --- /dev/null +++ b/api_docs/xploit/policy/on_policy_shared_actor_critic/index.html @@ -0,0 +1,4397 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OnPolicySharedActorCritic - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OnPolicySharedActorCritic

+

source +

OnPolicySharedActorCritic(
+   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,
+   hidden_dim: int = 512, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,
+   opt_kwargs: Optional[Dict[str, Any]] = None, aux_critic: bool = False,
+   init_fn: str = 'orthogonal'
+)
+

+
+

Actor-Critic network for on-policy algorithms like PPO and A2C.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • feature_dim (int) : Number of features accepted.
  • +
  • hidden_dim (int) : Number of units per hidden layer.
  • +
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • +
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • +
  • aux_critic (bool) : Use auxiliary critic or not, for PPG agent.
  • +
  • init_fn (str) : Parameters initialization method.
  • +
+

Returns

+

Actor-Critic network instance.

+

Methods:

+

.describe

+

source +

.describe()
+

+
+

Describe the policy.

+

.freeze

+

source +

.freeze(
+   encoder: nn.Module, dist: Distribution
+)
+

+
+

Freeze all the elements like encoder and dist.

+

Args

+
    +
  • encoder (nn.Module) : Encoder network.
  • +
  • dist (Distribution) : Distribution class.
  • +
+

Returns

+

None.

+

.forward

+

source +

.forward(
+   obs: th.Tensor, training: bool = True
+)
+

+
+

Get actions and estimated values for observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
  • training (bool) : training mode, True or False.
  • +
+

Returns

+

Sampled actions, estimated values, and log of probabilities for observations when training is True, +else only deterministic actions.

+

.get_value

+

source +

.get_value(
+   obs: th.Tensor
+)
+

+
+

Get estimated values for observations.

+

Args

+
    +
  • obs (th.Tensor) : Observations.
  • +
+

Returns

+

Estimated values.

+

.evaluate_actions

+

source +

.evaluate_actions(
+   obs: th.Tensor, actions: th.Tensor
+)
+

+
+

Evaluate actions according to the current policy given the observations.

+

Args

+
    +
  • obs (th.Tensor) : Sampled observations.
  • +
  • actions (th.Tensor) : Sampled actions.
  • +
+

Returns

+

Estimated values, log of the probability evaluated at actions, entropy of distribution.

+

.get_policy_outputs

+

source +

.get_policy_outputs(
+   obs: th.Tensor
+)
+

+
+

Get policy outputs for training.

+

Args

+
    +
  • obs (Tensor) : Observations.
  • +
+

Returns

+

Policy outputs like unnormalized probabilities for Discrete tasks.

+

.get_dist_and_aux_value

+

source +

.get_dist_and_aux_value(
+   obs: th.Tensor
+)
+

+
+

Get probs and auxiliary estimated values for auxiliary phase update.

+

Args

+
    +
  • obs : Sampled observations.
  • +
+

Returns

+

Sample distribution, estimated values, auxiliary estimated values.

+

.save

+

source +

.save(
+   path: Path, pretraining: bool, global_step: int
+)
+

+
+

Save models.

+

Args

+
    +
  • path (Path) : Save path.
  • +
  • pretraining (bool) : Pre-training mode.
  • +
  • global_step (int) : Global training step.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/dict_replay_storage/index.html b/api_docs/xploit/storage/dict_replay_storage/index.html new file mode 100644 index 00000000..4510db20 --- /dev/null +++ b/api_docs/xploit/storage/dict_replay_storage/index.html @@ -0,0 +1,4253 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DictReplayStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DictReplayStorage

+

source +

DictReplayStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 1000000, batch_size: int = 1024, num_envs: int = 1
+)
+

+
+

Dict replay storage for off-policy algorithms and dictionary observations.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • device (str) : Device to convert the data.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • batch_size (int) : Batch size of samples.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
+

Returns

+

Dict replay storage.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.add

+

source +

.add(
+   observations: Dict[str, th.Tensor], actions: th.Tensor, rewards: th.Tensor,
+   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],
+   next_observations: Dict[str, th.Tensor]
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • observations (Dict[str, th.Tensor]) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Termination flag.
  • +
  • truncateds (th.Tensor) : Truncation flag.
  • +
  • infos (Dict[str, Any]) : Additional information.
  • +
  • next_observations (Dict[str, th.Tensor]) : Next observations.
  • +
+

Returns

+

None.

+

.sample

+

source +

.sample()
+

+
+

Sample from the storage.

+

.update

+

source +

.update(
+   *args, **kwargs
+)
+

+
+

Update the storage if necessary.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/dict_rollout_storage/index.html b/api_docs/xploit/storage/dict_rollout_storage/index.html new file mode 100644 index 00000000..b13b4a53 --- /dev/null +++ b/api_docs/xploit/storage/dict_rollout_storage/index.html @@ -0,0 +1,4233 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DictRolloutStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DictRolloutStorage

+

source +

DictRolloutStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 256, batch_size: int = 64, num_envs: int = 8,
+   discount: float = 0.999, gae_lambda: float = 0.95
+)
+

+
+

Dict Rollout storage for on-policy algorithms and dictionary observations.

+

Args

+
    +
  • observation_space (gym.Space) : The observation space of environment.
  • +
  • action_space (gym.Space) : The action space of environment.
  • +
  • device (str) : Device to convert the data.
  • +
  • storage_size (int) : The capacity of the storage. Here it refers to the length of per rollout.
  • +
  • batch_size (int) : Batch size of samples.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • discount (float) : The discount factor.
  • +
  • gae_lambda (float) : Weighting coefficient for generalized advantage estimation (GAE).
  • +
+

Returns

+

Dict rollout storage.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.add

+

source +

.add(
+   observations: Dict[str, th.Tensor], actions: th.Tensor, rewards: th.Tensor,
+   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict,
+   next_observations: Dict[str, th.Tensor], log_probs: th.Tensor,
+   values: th.Tensor
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • observations (Dict[str, th.Tensor]) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Termination signals.
  • +
  • truncateds (th.Tensor) : Truncation signals.
  • +
  • infos (Dict) : Extra information.
  • +
  • next_observations (Dict[str, th.Tensor]) : Next observations.
  • +
  • log_probs (th.Tensor) : Log of the probability evaluated at actions.
  • +
  • values (th.Tensor) : Estimated values.
  • +
+

Returns

+

None.

+

.sample

+

source +

.sample()
+

+
+

Sample data from storage.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/her_replay_storage/index.html b/api_docs/xploit/storage/her_replay_storage/index.html new file mode 100644 index 00000000..7b6ea12b --- /dev/null +++ b/api_docs/xploit/storage/her_replay_storage/index.html @@ -0,0 +1,4237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + HerReplayStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

HerReplayStorage

+

source +

HerReplayStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 1000000, num_envs: int = 1, batch_size: int = 1024,
+   goal_selection_strategy: str = 'future', num_goals: int = 4,
+   reward_fn: Callable = lambdax: x, copy_info_dict: bool = False
+)
+

+
+

Hindsight experience replay (HER) storage for off-policy algorithms. +Based on: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/her/her_replay_buffer.py

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • device (str) : Device to convert the data.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • batch_size (int) : Batch size of samples.
  • +
  • goal_selection_strategy (str) : A goal selection strategy of ["future", "final", "episode"].
  • +
  • num_goals (int) : The number of goals to sample.
  • +
  • reward_fn (Callable) : Function to compute new rewards based on state and goal, whose definition is + same as https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/envs/bit_flipping_env.py#L190 +copy_info_dict (bool) whether to copy the info dictionary and pass it to compute_reward() method.
  • +
+

Returns

+

Dict replay storage.

+

Methods:

+

.add

+

source +

.add(
+   observations: Dict[str, th.Tensor], actions: th.Tensor, rewards: th.Tensor,
+   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],
+   next_observations: Dict[str, th.Tensor]
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • observations (Dict[str, th.Tensor]) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Termination flag.
  • +
  • truncateds (th.Tensor) : Truncation flag.
  • +
  • infos (Dict[str, Any]) : Additional information.
  • +
  • next_observations (Dict[str, th.Tensor]) : Next observations.
  • +
+

Returns

+

None.

+

.sample

+

source +

.sample()
+

+
+

Sample from the storage.

+

.update

+

source +

.update(
+   *args, **kwargs
+)
+

+
+

Update the storage if necessary.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/nstep_replay_storage/index.html b/api_docs/xploit/storage/nstep_replay_storage/index.html new file mode 100644 index 00000000..74d6579a --- /dev/null +++ b/api_docs/xploit/storage/nstep_replay_storage/index.html @@ -0,0 +1,4286 @@ + + + + + + + + + + + + + + + + + + + + + + + + + NStepReplayStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

NStepReplayStorage

+

source +

NStepReplayStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 1000000, num_envs: int = 1, batch_size: int = 256,
+   num_workers: int = 4, pin_memory: bool = True, n_step: int = 3, discount: float = 0.99,
+   fetch_every: int = 1000, save_snapshot: bool = False
+)
+

+
+

N-step replay storage. +Implemented based on: https://github.com/facebookresearch/drqv2/blob/main/replay_buffer.py

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • device (str) : Device to convert replay data.
  • +
  • storage_size (int) : Max number of element in the storage.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • batch_size (int) : Batch size of samples.
  • +
  • num_workers (int) : Subprocesses to use for data loading.
  • +
  • pin_memory (bool) : Pin memory or not.
  • +
  • nstep (int) : The number of transitions to consider when computing n-step returns
  • +
  • discount (float) : The discount factor for future rewards.
  • +
  • fetch_every (int) : Loading interval.
  • +
  • save_snapshot (bool) : Save loaded file or not.
  • +
+

Returns

+

N-step replay storage.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.add

+

source +

.add(
+   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,
+   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],
+   next_observations: th.Tensor
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • observations (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Termination flag.
  • +
  • truncateds (th.Tensor) : Truncation flag.
  • +
  • infos (Dict[str, Any]) : Additional information.
  • +
  • next_observations (th.Tensor) : Next observations.
  • +
+

Returns

+

None.

+

.replay_iter

+

source +

.replay_iter()
+

+
+

Create iterable dataloader.

+

.sample

+

source +

.sample()
+

+
+

Sample from the storage.

+

.update

+

source +

.update(
+   *args
+)
+

+
+

Update the storage if necessary.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/prioritized_replay_storage/index.html b/api_docs/xploit/storage/prioritized_replay_storage/index.html new file mode 100644 index 00000000..276b1ff8 --- /dev/null +++ b/api_docs/xploit/storage/prioritized_replay_storage/index.html @@ -0,0 +1,4292 @@ + + + + + + + + + + + + + + + + + + + + + + + + + PrioritizedReplayStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

PrioritizedReplayStorage

+

source +

PrioritizedReplayStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 1000000, batch_size: int = 1024, num_envs: int = 1,
+   alpha: float = 0.6, beta: float = 0.4
+)
+

+
+

Prioritized replay storage with proportional prioritization for off-policy algorithms. +Since the storage updates the priorities of the samples based on the TD error, users +should include the indices and weights in the returned information of the .update +method of the agent. An example is: + return {"indices": indices, "weights": weights, ..., "Actor Loss": actor_loss, ...}

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • device (str) : Device to convert the data.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • batch_size (int) : Batch size of samples.
  • +
  • alpha (float) : Prioritization value.
  • +
  • beta (float) : Importance sampling value.
  • +
+

Returns

+

Prioritized replay storage.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.annealing_beta

+

source +

.annealing_beta()
+

+
+

Linearly increases beta from the initial value to 1 over global training steps.

+

.add

+

source +

.add(
+   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,
+   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],
+   next_observations: th.Tensor
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • observations (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Termination flag.
  • +
  • truncateds (th.Tensor) : Truncation flag.
  • +
  • infos (Dict[str, Any]) : Additional information.
  • +
  • next_observations (th.Tensor) : Next observations.
  • +
+

Returns

+

None.

+

.sample

+

source +

.sample()
+

+
+

Sample from the storage.

+

.update

+

source +

.update(
+   metrics: Dict
+)
+

+
+

Update the priorities.

+

Args

+
    +
  • metrics (Dict) : Training metrics from agent to udpate the priorities: + indices (np.ndarray): The indices of current batch data. + priorities (np.ndarray): The priorities of current batch data.
  • +
+

Returns

+

None.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/vanilla_distributed_storage/index.html b/api_docs/xploit/storage/vanilla_distributed_storage/index.html new file mode 100644 index 00000000..d5d972b3 --- /dev/null +++ b/api_docs/xploit/storage/vanilla_distributed_storage/index.html @@ -0,0 +1,4261 @@ + + + + + + + + + + + + + + + + + + + + + + + + + VanillaDistributedStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

VanillaDistributedStorage

+

source +

VanillaDistributedStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 100, num_storages: int = 80, num_envs: int = 45,
+   batch_size: int = 32
+)
+

+
+

Vanilla distributed storage for distributed algorithms like IMPALA.

+

Args

+
    +
  • observation_space (gym.Space) : The observation space of environment.
  • +
  • action_space (gym.Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • storage_size (int) : The capacity of the storage. Here it refers to the length of per rollout.
  • +
  • num_storages (int) : The number of shared-memory storages.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • batch_size (int) : The batch size.
  • +
+

Returns

+

Vanilla distributed storage.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.add

+

source +

.add(
+   idx: int, timestep: int, actor_output: Dict[str, Any], env_output: Dict[str,
+   Any]
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • idx (int) : The index of storage.
  • +
  • timestep (int) : The timestep of rollout.
  • +
  • actor_output (Dict) : Actor output.
  • +
  • env_output (Dict) : Environment output.
  • +
+

Returns

+

None

+

.sample

+

source +

.sample(
+   free_queue: mp.SimpleQueue, full_queue: mp.SimpleQueue, lock = threading.Lock()
+)
+

+
+

Sample transitions from the storage.

+

Args

+
    +
  • free_queue (Queue) : Free queue for communication.
  • +
  • full_queue (Queue) : Full queue for communication.
  • +
  • lock (Lock) : Thread lock.
  • +
+

Returns

+

Batched samples.

+

.update

+

source +

.update(
+   *args, **kwargs
+)
+

+
+

Update the storage

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/vanilla_replay_storage/index.html b/api_docs/xploit/storage/vanilla_replay_storage/index.html new file mode 100644 index 00000000..33aebc51 --- /dev/null +++ b/api_docs/xploit/storage/vanilla_replay_storage/index.html @@ -0,0 +1,4253 @@ + + + + + + + + + + + + + + + + + + + + + + + + + VanillaReplayStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

VanillaReplayStorage

+

source +

VanillaReplayStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 1000000, batch_size: int = 1024, num_envs: int = 1
+)
+

+
+

Vanilla replay storage for off-policy algorithms.

+

Args

+
    +
  • observation_space (gym.Space) : Observation space.
  • +
  • action_space (gym.Space) : Action space.
  • +
  • device (str) : Device to convert the data.
  • +
  • storage_size (int) : The capacity of the storage.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • batch_size (int) : Batch size of samples.
  • +
+

Returns

+

Vanilla replay storage.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.add

+

source +

.add(
+   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,
+   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],
+   next_observations: th.Tensor
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • observations (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Termination flag.
  • +
  • truncateds (th.Tensor) : Truncation flag.
  • +
  • infos (Dict[str, Any]) : Additional information.
  • +
  • next_observations (th.Tensor) : Next observations.
  • +
+

Returns

+

None.

+

.sample

+

source +

.sample()
+

+
+

Sample from the storage.

+

.update

+

source +

.update(
+   *args
+)
+

+
+

Update the storage if necessary.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xploit/storage/vanilla_rollout_storage/index.html b/api_docs/xploit/storage/vanilla_rollout_storage/index.html new file mode 100644 index 00000000..89724225 --- /dev/null +++ b/api_docs/xploit/storage/vanilla_rollout_storage/index.html @@ -0,0 +1,4288 @@ + + + + + + + + + + + + + + + + + + + + + + + + + VanillaRolloutStorage - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

VanillaRolloutStorage

+

source +

VanillaRolloutStorage(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   storage_size: int = 256, batch_size: int = 64, num_envs: int = 8,
+   discount: float = 0.999, gae_lambda: float = 0.95
+)
+

+
+

Vanilla rollout storage for on-policy algorithms.

+

Args

+
    +
  • observation_space (gym.Space) : The observation space of environment.
  • +
  • action_space (gym.Space) : The action space of environment.
  • +
  • device (str) : Device to convert the data.
  • +
  • storage_size (int) : The capacity of the storage. Here it refers to the length of per rollout.
  • +
  • batch_size (int) : Batch size of samples.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • discount (float) : The discount factor.
  • +
  • gae_lambda (float) : Weighting coefficient for generalized advantage estimation (GAE).
  • +
+

Returns

+

Vanilla rollout storage.

+

Methods:

+

.reset

+

source +

.reset()
+

+
+

Reset the storage.

+

.add

+

source +

.add(
+   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,
+   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict,
+   next_observations: th.Tensor, log_probs: th.Tensor, values: th.Tensor
+)
+

+
+

Add sampled transitions into storage.

+

Args

+
    +
  • observations (th.Tensor) : Observations.
  • +
  • actions (th.Tensor) : Actions.
  • +
  • rewards (th.Tensor) : Rewards.
  • +
  • terminateds (th.Tensor) : Termination signals.
  • +
  • truncateds (th.Tensor) : Truncation signals.
  • +
  • infos (Dict) : Extra information.
  • +
  • next_observations (th.Tensor) : Next observations.
  • +
  • log_probs (th.Tensor) : Log of the probability evaluated at actions.
  • +
  • values (th.Tensor) : Estimated values.
  • +
+

Returns

+

None.

+

.update

+

source +

.update()
+

+
+

Update the terminal state of each env.

+

.compute_returns_and_advantages

+

source +

.compute_returns_and_advantages(
+   last_values: th.Tensor
+)
+

+
+

Perform generalized advantage estimation (GAE).

+

Args

+
    +
  • last_values (th.Tensor) : Estimated values of the last step.
  • +
+

Returns

+

None.

+

.sample

+

source +

.sample()
+

+
+

Sample data from storage.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/gaussian_noise/index.html b/api_docs/xplore/augmentation/gaussian_noise/index.html new file mode 100644 index 00000000..4f994bed --- /dev/null +++ b/api_docs/xplore/augmentation/gaussian_noise/index.html @@ -0,0 +1,4158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + GaussianNoise - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

GaussianNoise

+

source +

GaussianNoise(
+   mu: float = 0, sigma: float = 1.0
+)
+

+
+

Gaussian noise operation for processing state-based observations.

+

Args

+
    +
  • mu (float or th.Tensor) : mean of the distribution.
  • +
  • scale (float or th.Tensor) : standard deviation of the distribution.
  • +
+

Returns

+

Augmented states.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/grayscale/index.html b/api_docs/xplore/augmentation/grayscale/index.html new file mode 100644 index 00000000..bf2dc4c3 --- /dev/null +++ b/api_docs/xplore/augmentation/grayscale/index.html @@ -0,0 +1,4149 @@ + + + + + + + + + + + + + + + + + + + + + + + + + GrayScale - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

GrayScale

+

source +


+

+
+

Grayscale operation for image augmentation.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/identity/index.html b/api_docs/xplore/augmentation/identity/index.html new file mode 100644 index 00000000..7d856a93 --- /dev/null +++ b/api_docs/xplore/augmentation/identity/index.html @@ -0,0 +1,4149 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Identity - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Identity

+

source +


+

+
+

Identity augmentation.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_amplitude_scaling/index.html b/api_docs/xplore/augmentation/random_amplitude_scaling/index.html new file mode 100644 index 00000000..1ce69bb1 --- /dev/null +++ b/api_docs/xplore/augmentation/random_amplitude_scaling/index.html @@ -0,0 +1,4158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomAmplitudeScaling - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomAmplitudeScaling

+

source +

RandomAmplitudeScaling(
+   low: float = 0.6, high: float = 1.2
+)
+

+
+

Random amplitude scaling operation for processing state-based observations.

+

Args

+
    +
  • low (float) : lower range (inclusive).
  • +
  • high (float) : upper range (exclusive).
  • +
+

Returns

+

Augmented states.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_colorjitter/index.html b/api_docs/xplore/augmentation/random_colorjitter/index.html new file mode 100644 index 00000000..a6807feb --- /dev/null +++ b/api_docs/xplore/augmentation/random_colorjitter/index.html @@ -0,0 +1,4161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomColorJitter - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomColorJitter

+

source +

RandomColorJitter(
+   brightness: float = 0.4, contrast: float = 0.4, saturation: float = 0.4,
+   hue: float = 0.5
+)
+

+
+

Random ColorJitter operation for image augmentation.

+

Args

+
    +
  • brightness (float) : How much to jitter brightness. Should be non negative numbers.
  • +
  • contrast (float) : How much to jitter contrast. Should be non negative numbers.
  • +
  • saturation (float) : How much to jitter saturation. Should be non negative numbers.
  • +
  • hue (float) : How much to jitter hue. Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
  • +
+

Returns

+

Augmented images.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_convolution/index.html b/api_docs/xplore/augmentation/random_convolution/index.html new file mode 100644 index 00000000..a96d7c44 --- /dev/null +++ b/api_docs/xplore/augmentation/random_convolution/index.html @@ -0,0 +1,4149 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomConvolution - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomConvolution

+

source +


+

+
+

Random Convolution operation for image augmentation. Note that imgs should be normalized and torch tensor.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_crop/index.html b/api_docs/xplore/augmentation/random_crop/index.html new file mode 100644 index 00000000..4fd2f84a --- /dev/null +++ b/api_docs/xplore/augmentation/random_crop/index.html @@ -0,0 +1,4158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomCrop - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomCrop

+

source +

RandomCrop(
+   pad: int = 4, out: int = 84
+)
+

+
+

Random crop operation for processing image-based observations.

+

Args

+
    +
  • pad (int) : Padding size.
  • +
  • out (int) : Desired output size.
  • +
+

Returns

+

Augmented images.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_cutout/index.html b/api_docs/xplore/augmentation/random_cutout/index.html new file mode 100644 index 00000000..ff7d5500 --- /dev/null +++ b/api_docs/xplore/augmentation/random_cutout/index.html @@ -0,0 +1,4158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomCutout - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomCutout

+

source +

RandomCutout(
+   min_cut: int = 10, max_cut: int = 30
+)
+

+
+

Random Cutout operation for image augmentation.

+

Args

+
    +
  • min_cut (int) : Min size of the cut shape.
  • +
  • max_cut (int) : Max size of the cut shape.
  • +
+

Returns

+

Augmented images.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_cutoutcolor/index.html b/api_docs/xplore/augmentation/random_cutoutcolor/index.html new file mode 100644 index 00000000..bee16d26 --- /dev/null +++ b/api_docs/xplore/augmentation/random_cutoutcolor/index.html @@ -0,0 +1,4158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomCutoutColor - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomCutoutColor

+

source +

RandomCutoutColor(
+   min_cut: int = 10, max_cut: int = 30
+)
+

+
+

Random Cutout operation for image augmentation.

+

Args

+
    +
  • min_cut (int) : min size of the cut shape.
  • +
  • max_cut (int) : max size of the cut shape.
  • +
+

Returns

+

Augmented images.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_flip/index.html b/api_docs/xplore/augmentation/random_flip/index.html new file mode 100644 index 00000000..265b748a --- /dev/null +++ b/api_docs/xplore/augmentation/random_flip/index.html @@ -0,0 +1,4157 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomFlip - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomFlip

+

source +

RandomFlip(
+   p: float = 0.2
+)
+

+
+

Random flip operation for image augmentation.

+

Args

+
    +
  • p (float) : The image flip problistily in a batch.
  • +
+

Returns

+

Augmented images.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_rotate/index.html b/api_docs/xplore/augmentation/random_rotate/index.html new file mode 100644 index 00000000..8225eb3b --- /dev/null +++ b/api_docs/xplore/augmentation/random_rotate/index.html @@ -0,0 +1,4157 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomRotate - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomRotate

+

source +

RandomRotate(
+   p: float = 0.2
+)
+

+
+

Random rotate operation for processing image-based observations.

+

Args

+
    +
  • p (float) : The image rotate problistily in a batch.
  • +
+

Returns

+

Random rotate image in a batch.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_shift/index.html b/api_docs/xplore/augmentation/random_shift/index.html new file mode 100644 index 00000000..f2461217 --- /dev/null +++ b/api_docs/xplore/augmentation/random_shift/index.html @@ -0,0 +1,4157 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomShift - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomShift

+

source +

RandomShift(
+   pad: int = 4
+)
+

+
+

Random shift operation for processing image-based observations.

+

Args

+
    +
  • pad (int) : Padding size.
  • +
+

Returns

+

Augmented images.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/augmentation/random_translate/index.html b/api_docs/xplore/augmentation/random_translate/index.html new file mode 100644 index 00000000..196f501d --- /dev/null +++ b/api_docs/xplore/augmentation/random_translate/index.html @@ -0,0 +1,4158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RandomTranslate - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RandomTranslate

+

source +

RandomTranslate(
+   size: int = 256, scale_factor: float = 0.75
+)
+

+
+

Random translate operation for processing image-based observations.

+

Args

+
    +
  • size (int) : The scale size in translated images
  • +
  • scale_factor (float) : The scale factor ratio in translated images. Should have 0.0 <= scale_factor <= 1.0
  • +
+

Returns

+

Augmented images.

+

Methods:

+

.forward

+

source +

.forward(
+   x: th.Tensor
+)
+

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/bernoulli/index.html b/api_docs/xplore/distribution/bernoulli/index.html new file mode 100644 index 00000000..e9be61b8 --- /dev/null +++ b/api_docs/xplore/distribution/bernoulli/index.html @@ -0,0 +1,4310 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Bernoulli - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Bernoulli

+

source +


+

+
+

Bernoulli distribution for sampling actions for 'MultiBinary' tasks.

+

Methods:

+

.probs

+

source +

.probs()
+

+
+

Return probabilities.

+

.logits

+

source +

.logits()
+

+
+

Returns the unnormalized log probabilities.

+

.sample

+

source +

.sample(
+   sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped batch of +samples if the distribution parameters are batched.

+

Args

+
    +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.log_prob

+

source +

.log_prob(
+   actions: th.Tensor
+)
+

+
+

Returns the log of the probability density/mass function evaluated at actions.

+

Args

+
    +
  • actions (th.Tensor) : The actions to be evaluated.
  • +
+

Returns

+

The log_prob value.

+

.entropy

+

source +

.entropy()
+

+
+

Returns the Shannon entropy of distribution.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+

.mean

+

source +

.mean()
+

+
+

Returns the mean of the distribution.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/categorical/index.html b/api_docs/xplore/distribution/categorical/index.html new file mode 100644 index 00000000..cd107bc3 --- /dev/null +++ b/api_docs/xplore/distribution/categorical/index.html @@ -0,0 +1,4310 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Categorical - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

Categorical

+

source +


+

+
+

Categorical distribution for sampling actions for 'Discrete' tasks.

+

Methods:

+

.probs

+

source +

.probs()
+

+
+

Return probabilities.

+

.logits

+

source +

.logits()
+

+
+

Returns the unnormalized log probabilities.

+

.sample

+

source +

.sample(
+   sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped batch of +samples if the distribution parameters are batched.

+

Args

+
    +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.log_prob

+

source +

.log_prob(
+   actions: th.Tensor
+)
+

+
+

Returns the log of the probability density/mass function evaluated at actions.

+

Args

+
    +
  • actions (th.Tensor) : The actions to be evaluated.
  • +
+

Returns

+

The log_prob value.

+

.entropy

+

source +

.entropy()
+

+
+

Returns the Shannon entropy of distribution.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+

.mean

+

source +

.mean()
+

+
+

Returns the mean of the distribution.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/diagonal_gaussian/index.html b/api_docs/xplore/distribution/diagonal_gaussian/index.html new file mode 100644 index 00000000..10e73e14 --- /dev/null +++ b/api_docs/xplore/distribution/diagonal_gaussian/index.html @@ -0,0 +1,4343 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DiagonalGaussian - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

DiagonalGaussian

+

source +


+

+
+

Diagonal Gaussian distribution for 'Box' tasks.

+

Methods:

+

.sample

+

source +

.sample(
+   sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped batch of +samples if the distribution parameters are batched.

+

Args

+
    +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.rsample

+

source +

.rsample(
+   sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped reparameterized sample or sample_shape shaped batch of +reparameterized samples if the distribution parameters are batched.

+

Args

+
    +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.mean

+

source +

.mean()
+

+
+

Returns the mean of the distribution.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+

.stddev

+

source +

.stddev()
+

+
+

Returns the standard deviation of the distribution.

+

.variance

+

source +

.variance()
+

+
+

Returns the variance of the distribution.

+

.log_prob

+

source +

.log_prob(
+   actions: th.Tensor
+)
+

+
+

Returns the log of the probability density/mass function evaluated at actions.

+

Args

+
    +
  • actions (th.Tensor) : The actions to be evaluated.
  • +
+

Returns

+

The log_prob value.

+

.entropy

+

source +

.entropy()
+

+
+

Returns the Shannon entropy of distribution.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/multi_categorical/index.html b/api_docs/xplore/distribution/multi_categorical/index.html new file mode 100644 index 00000000..690cd70e --- /dev/null +++ b/api_docs/xplore/distribution/multi_categorical/index.html @@ -0,0 +1,4310 @@ + + + + + + + + + + + + + + + + + + + + + + + + + MultiCategorical - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

MultiCategorical

+

source +


+

+
+

Multi-categorical distribution for sampling actions for 'MultiDiscrete' tasks.

+

Methods:

+

.probs

+

source +

.probs()
+

+
+

Return probabilities.

+

.logits

+

source +

.logits()
+

+
+

Returns the unnormalized log probabilities.

+

.sample

+

source +

.sample(
+   sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped batch of +samples if the distribution parameters are batched.

+

Args

+
    +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.log_prob

+

source +

.log_prob(
+   actions: th.Tensor
+)
+

+
+

Returns the log of the probability density/mass function evaluated at actions.

+

Args

+
    +
  • actions (th.Tensor) : The actions to be evaluated.
  • +
+

Returns

+

The log_prob value.

+

.entropy

+

source +

.entropy()
+

+
+

Returns the Shannon entropy of distribution.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+

.mean

+

source +

.mean()
+

+
+

Returns the mean of the distribution.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/normal_noise/index.html b/api_docs/xplore/distribution/normal_noise/index.html new file mode 100644 index 00000000..b4d2dd6d --- /dev/null +++ b/api_docs/xplore/distribution/normal_noise/index.html @@ -0,0 +1,4220 @@ + + + + + + + + + + + + + + + + + + + + + + + + + NormalNoise - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

NormalNoise

+

source +

NormalNoise(
+   mu: Union[float, th.Tensor] = 0.0, sigma: Union[float, th.Tensor] = 1.0,
+   low: float = -1.0, high: float = 1.0, eps: float = 1e-06
+)
+

+
+

Gaussian action noise.

+

Args

+
    +
  • mu (Union[float, th.Tensor]) : Mean of the noise.
  • +
  • sigma (Union[float, th.Tensor]) : Standard deviation of the noise.
  • +
  • low (float) : The lower bound of the noise.
  • +
  • high (float) : The upper bound of the noise.
  • +
  • eps (float) : A small value to avoid numerical instability.
  • +
+

Returns

+

Gaussian action noise instance.

+

Methods:

+

.sample

+

source +

.sample(
+   clip: Optional[float] = None, sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped batch of +samples if the distribution parameters are batched.

+

Args

+
    +
  • clip (Optional[float]) : The clip range of the sampled noises.
  • +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.mean

+

source +

.mean()
+

+
+

Returns the mean of the distribution.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/ornstein_uhlenbeck_noise/index.html b/api_docs/xplore/distribution/ornstein_uhlenbeck_noise/index.html new file mode 100644 index 00000000..15aed443 --- /dev/null +++ b/api_docs/xplore/distribution/ornstein_uhlenbeck_noise/index.html @@ -0,0 +1,4250 @@ + + + + + + + + + + + + + + + + + + + + + + + + + OrnsteinUhlenbeckNoise - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

OrnsteinUhlenbeckNoise

+

source +

OrnsteinUhlenbeckNoise(
+   mu: Union[float, th.Tensor] = 0.0, sigma: Union[float, th.Tensor] = 1.0,
+   low: float = -1.0, high: float = 1.0, eps: float = 1e-06, theta: float = 0.15,
+   dt: float = 0.01
+)
+

+
+

Ornstein Uhlenbeck action noise. +Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab

+

Args

+
    +
  • mu (Union[float, th.Tensor]) : Mean of the noise.
  • +
  • sigma (Union[float, th.Tensor]) : Standard deviation of the noise.
  • +
  • low (float) : The lower bound of the noise.
  • +
  • high (float) : The upper bound of the noise.
  • +
  • eps (float) : A small value to avoid numerical instability.
  • +
  • theta (float) : The rate of mean reversion.
  • +
  • dt (float) : Timestep for the noise.
  • +
  • stddev_schedule (str) : Use the exploration std schedule.
  • +
  • stddev_clip (float) : The exploration std clip range.
  • +
+

Returns

+

Ornstein-Uhlenbeck noise instance.

+

Methods:

+

.sample

+

source +

.sample(
+   clip: Optional[float] = None, sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped batch of +samples if the distribution parameters are batched.

+

Args

+
    +
  • clip (Optional[float]) : The clip range of the sampled noises.
  • +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.reset

+

source +

.reset()
+

+
+

Reset the noise.

+

.mean

+

source +

.mean()
+

+
+

Returns the mean of the distribution.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/squashed_normal/index.html b/api_docs/xplore/distribution/squashed_normal/index.html new file mode 100644 index 00000000..2c54c329 --- /dev/null +++ b/api_docs/xplore/distribution/squashed_normal/index.html @@ -0,0 +1,4272 @@ + + + + + + + + + + + + + + + + + + + + + + + + + SquashedNormal - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

SquashedNormal

+

source +


+

+
+

Squashed normal distribution for Box tasks.

+

Methods:

+

.sample

+

source +

.sample(
+   sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped +batch of samples if the distribution parameters are batched.

+

Args

+
    +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.rsample

+

source +

.rsample(
+   sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped reparameterized sample or sample_shape shaped +batch of reparameterized samples if the distribution parameters are batched.

+

Args

+
    +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.mean

+

source +

.mean()
+

+
+

Return the transformed mean.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+

.log_prob

+

source +

.log_prob(
+   actions: th.Tensor
+)
+

+
+

Scores the sample by inverting the transform(s) and computing the score using +the score of the base distribution and the log abs det jacobian.

+

Args

+
    +
  • actions (th.Tensor) : The actions to be evaluated.
  • +
+

Returns

+

The log_prob value.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/distribution/truncated_normal_noise/index.html b/api_docs/xplore/distribution/truncated_normal_noise/index.html new file mode 100644 index 00000000..b01fd1a8 --- /dev/null +++ b/api_docs/xplore/distribution/truncated_normal_noise/index.html @@ -0,0 +1,4224 @@ + + + + + + + + + + + + + + + + + + + + + + + + + TruncatedNormalNoise - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

TruncatedNormalNoise

+

source +

TruncatedNormalNoise(
+   mu: Union[float, th.Tensor] = 0.0, sigma: Union[float, th.Tensor] = 1.0,
+   low: float = -1.0, high: float = 1.0, eps: float = 1e-06,
+   stddev_schedule: str = 'linear(1.0, 0.1, 100000)'
+)
+

+
+

Truncated normal action noise. See Section 3.1 of +"Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning".

+

Args

+
    +
  • mu (Union[float, th.Tensor]) : Mean of the noise.
  • +
  • sigma (Union[float, th.Tensor]) : Standard deviation of the noise.
  • +
  • low (float) : The lower bound of the noise.
  • +
  • high (float) : The upper bound of the noise.
  • +
  • eps (float) : A small value to avoid numerical instability.
  • +
  • stddev_schedule (str) : Use the exploration std schedule, available options are: + linear(init, final, duration) and step_linear(init, final1, duration1, final2, duration2).
  • +
+

Returns

+

Truncated normal noise instance.

+

Methods:

+

.sample

+

source +

.sample(
+   clip: Optional[float] = None, sample_shape: th.Size = th.Size()
+)
+

+
+

Generates a sample_shape shaped sample or sample_shape shaped batch of +samples if the distribution parameters are batched.

+

Args

+
    +
  • clip (Optional[float]) : The clip range of the sampled noises.
  • +
  • sample_shape (th.Size) : The size of the sample to be drawn.
  • +
+

Returns

+

A sample_shape shaped sample.

+

.mean

+

source +

.mean()
+

+
+

Returns the mean of the distribution.

+

.mode

+

source +

.mode()
+

+
+

Returns the mode of the distribution.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/girm/index.html b/api_docs/xplore/reward/girm/index.html new file mode 100644 index 00000000..e897f917 --- /dev/null +++ b/api_docs/xplore/reward/girm/index.html @@ -0,0 +1,4282 @@ + + + + + + + + + + + + + + + + + + + + + + + + + GIRM - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

GIRM

+

source +

GIRM(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,
+   batch_size: int = 64, lambd: float = 0.5, lambd_recon: float = 1.0,
+   lambd_action: float = 1.0, kld_loss_beta: float = 1.0
+)
+

+
+

Intrinsic Reward Driven Imitation Learning via Generative Model (GIRM). +See paper: http://proceedings.mlr.press/v119/yu20d/yu20d.pdf

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • lr (float) : The learning rate.
  • +
  • batch_size (int) : The batch size for update.
  • +
  • lambd (float) : The weighting coefficient for combining actions.
  • +
  • lambd_recon (float) : Weighting coefficient of the reconstruction loss.
  • +
  • lambd_action (float) : Weighting coefficient of the action loss.
  • +
  • kld_loss_beta (float) : Weighting coefficient of the divergence loss.
  • +
+

Returns

+

Instance of GIRM.

+

Methods:

+

.get_vae_loss

+

source +

.get_vae_loss(
+   recon_x: th.Tensor, x: th.Tensor, mean: th.Tensor, logvar: th.Tensor
+)
+

+
+

Compute the vae loss.

+

Args

+
    +
  • recon_x (th.Tensor) : Reconstructed x.
  • +
  • x (th.Tensor) : Input x.
  • +
  • mean (th.Tensor) : Sample mean.
  • +
  • logvar (th.Tensor) : Log of the sample variance.
  • +
+

Returns

+

Loss values.

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add new samples to the intrinsic reward module.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/icm/index.html b/api_docs/xplore/reward/icm/index.html new file mode 100644 index 00000000..c610794d --- /dev/null +++ b/api_docs/xplore/reward/icm/index.html @@ -0,0 +1,4242 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ICM - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

ICM

+

source +

ICM(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,
+   batch_size: int = 64
+)
+

+
+

Curiosity-Driven Exploration by Self-Supervised Prediction. +See paper: http://proceedings.mlr.press/v70/pathak17a/pathak17a.pdf

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • lr (float) : The learning rate.
  • +
  • batch_size (int) : The batch size for update.
  • +
+

Returns

+

Instance of ICM.

+

Methods:

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add new samples to the intrinsic reward module.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/ngu/index.html b/api_docs/xplore/reward/ngu/index.html new file mode 100644 index 00000000..a07eb437 --- /dev/null +++ b/api_docs/xplore/reward/ngu/index.html @@ -0,0 +1,4283 @@ + + + + + + + + + + + + + + + + + + + + + + + + + NGU - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

NGU

+

source +

NGU(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,
+   batch_size: int = 64, capacity: int = 1000, k: int = 10,
+   kernel_cluster_distance: float = 0.008, kernel_epsilon: float = 0.0001,
+   c: float = 0.001, sm: float = 8.0, mrs: float = 5.0
+)
+

+
+

Never Give Up: Learning Directed Exploration Strategies (NGU). +See paper: https://arxiv.org/pdf/2002.06038

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • lr (float) : The learning rate.
  • +
  • batch_size (int) : The batch size for update.
  • +
  • capacity (int) : The of capacity the episodic memory.
  • +
  • k (int) : Number of neighbors.
  • +
  • kernel_cluster_distance (float) : The kernel cluster distance.
  • +
  • kernel_epsilon (float) : The kernel constant.
  • +
  • c (float) : The pseudo-counts constant.
  • +
  • sm (float) : The kernel maximum similarity.
  • +
  • mrs (float) : The maximum reward scaling.
  • +
+

Returns

+

Instance of NGU.

+

Methods:

+

.pseudo_counts

+

source +

.pseudo_counts(
+   e: th.Tensor
+)
+

+
+

Pseudo counts.

+

Args

+
    +
  • e (th.Tensor) : Encoded observations.
  • +
+

Returns

+

Conut values.

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add new samples to the intrinsic reward module.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/pseudo_counts/index.html b/api_docs/xplore/reward/pseudo_counts/index.html new file mode 100644 index 00000000..54c785b2 --- /dev/null +++ b/api_docs/xplore/reward/pseudo_counts/index.html @@ -0,0 +1,4282 @@ + + + + + + + + + + + + + + + + + + + + + + + + + PseudoCounts - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

PseudoCounts

+

source +

PseudoCounts(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 32, lr: float = 0.001,
+   batch_size: int = 64, capacity: int = 1000, k: int = 10,
+   kernel_cluster_distance: float = 0.008, kernel_epsilon: float = 0.0001,
+   c: float = 0.001, sm: float = 8.0
+)
+

+
+

Pseudo-counts based on "Never Give Up: Learning Directed Exploration Strategies (NGU)". +See paper: https://arxiv.org/pdf/2002.06038

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • lr (float) : The learning rate.
  • +
  • batch_size (int) : The batch size for update.
  • +
  • capacity (int) : The of capacity the episodic memory.
  • +
  • k (int) : Number of neighbors.
  • +
  • kernel_cluster_distance (float) : The kernel cluster distance.
  • +
  • kernel_epsilon (float) : The kernel constant.
  • +
  • c (float) : The pseudo-counts constant.
  • +
  • sm (float) : The kernel maximum similarity.
  • +
+

Returns

+

Instance of PseudoCounts.

+

Methods:

+

.pseudo_counts

+

source +

.pseudo_counts(
+   e: th.Tensor
+)
+

+
+

Pseudo counts.

+

Args

+
    +
  • e (th.Tensor) : Encoded observations.
  • +
+

Returns

+

Conut values.

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add new samples to the intrinsic reward module.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/re3/index.html b/api_docs/xplore/reward/re3/index.html new file mode 100644 index 00000000..6de42638 --- /dev/null +++ b/api_docs/xplore/reward/re3/index.html @@ -0,0 +1,4254 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RE3 - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RE3

+

source +

RE3(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128,
+   storage_size: int = 10000, num_envs: int = 1, k: int = 5, average_entropy: bool = False
+)
+

+
+

State Entropy Maximization with Random Encoders for Efficient Exploration (RE3). +See paper: http://proceedings.mlr.press/v139/seo21a/seo21a.pdf

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • storage_size (int) : The size of the storage for random embeddings.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • k (int) : Use the k-th neighbors.
  • +
  • average_entropy (bool) : Use the average of entropy estimation.
  • +
+

Returns

+

Instance of RE3.

+

Methods:

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Calculate the random embeddings and insert them into the storage.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/revd/index.html b/api_docs/xplore/reward/revd/index.html new file mode 100644 index 00000000..112bdf12 --- /dev/null +++ b/api_docs/xplore/reward/revd/index.html @@ -0,0 +1,4243 @@ + + + + + + + + + + + + + + + + + + + + + + + + + REVD - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

REVD

+

source +

REVD(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, alpha: float = 0.5,
+   k: int = 5, average_divergence: bool = False
+)
+

+
+

Rewarding Episodic Visitation Discrepancy for Exploration in Reinforcement Learning (REVD). +See paper: https://openreview.net/pdf?id=V2pw1VYMrDo

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • alpha (alpha) : The order of Rényi divergence.
  • +
  • k (int) : Use the k-th neighbors.
  • +
  • average_divergence (bool) : Use the average of divergence estimation.
  • +
+

Returns

+

Instance of REVD.

+

Methods:

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add new samples to the intrinsic reward module.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/ride/index.html b/api_docs/xplore/reward/ride/index.html new file mode 100644 index 00000000..7d24bd5d --- /dev/null +++ b/api_docs/xplore/reward/ride/index.html @@ -0,0 +1,4282 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RIDE - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RIDE

+

source +

RIDE(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,
+   batch_size: int = 64, capacity: int = 1000, k: int = 10,
+   kernel_cluster_distance: float = 0.008, kernel_epsilon: float = 0.0001,
+   c: float = 0.001, sm: float = 8.0
+)
+

+
+

RIDE: Rewarding Impact-Driven Exploration for Procedurally-Generated Environments. +See paper: https://arxiv.org/pdf/2002.12292

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • lr (float) : The learning rate.
  • +
  • batch_size (int) : The batch size for update.
  • +
  • capacity (int) : The of capacity the episodic memory.
  • +
  • k (int) : Number of neighbors.
  • +
  • kernel_cluster_distance (float) : The kernel cluster distance.
  • +
  • kernel_epsilon (float) : The kernel constant.
  • +
  • c (float) : The pseudo-counts constant.
  • +
  • sm (float) : The kernel maximum similarity.
  • +
+

Returns

+

Instance of RIDE.

+

Methods:

+

.pseudo_counts

+

source +

.pseudo_counts(
+   e: th.Tensor
+)
+

+
+

Pseudo counts.

+

Args

+
    +
  • e (th.Tensor) : Encoded observations.
  • +
+

Returns

+

Conut values.

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add new samples to the intrinsic reward module.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/rise/index.html b/api_docs/xplore/reward/rise/index.html new file mode 100644 index 00000000..ff210734 --- /dev/null +++ b/api_docs/xplore/reward/rise/index.html @@ -0,0 +1,4256 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RISE - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RISE

+

source +

RISE(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128,
+   storage_size: int = 10000, num_envs: int = 1, alpha: float = 0.5, k: int = 5,
+   average_entropy: bool = False
+)
+

+
+

Rényi State Entropy Maximization for Exploration Acceleration in Reinforcement Learning (RISE). +See paper: https://ieeexplore.ieee.org/abstract/document/9802917/

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • storage_size (int) : The size of the storage for random embeddings.
  • +
  • num_envs (int) : The number of parallel environments.
  • +
  • alpha (alpha) : The The order of Rényi entropy.
  • +
  • k (int) : Use the k-th neighbors.
  • +
  • average_entropy (bool) : Use the average of entropy estimation.
  • +
+

Returns

+

Instance of RISE.

+

Methods:

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Calculate the random embeddings and insert them into the storage.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_docs/xplore/reward/rnd/index.html b/api_docs/xplore/reward/rnd/index.html new file mode 100644 index 00000000..c8a05b2b --- /dev/null +++ b/api_docs/xplore/reward/rnd/index.html @@ -0,0 +1,4242 @@ + + + + + + + + + + + + + + + + + + + + + + + + + RND - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

+

RND

+

source +

RND(
+   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',
+   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,
+   batch_size: int = 64
+)
+

+
+

Exploration by Random Network Distillation (RND). +See paper: https://arxiv.org/pdf/1810.12894.pdf

+

Args

+
    +
  • observation_space (Space) : The observation space of environment.
  • +
  • action_space (Space) : The action space of environment.
  • +
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • +
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • +
  • kappa (float) : The decay rate.
  • +
  • latent_dim (int) : The dimension of encoding vectors.
  • +
  • lr (float) : The learning rate.
  • +
  • batch_size (int) : The batch size for update.
  • +
+

Returns

+

Instance of RND.

+

Methods:

+

.compute_irs

+

source +

.compute_irs(
+   samples: Dict, step: int = 0
+)
+

+
+

Compute the intrinsic rewards for current samples.

+

Args

+
    +
  • samples (Dict) : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
  • step (int) : The global training step.
  • +
+

Returns

+

The intrinsic rewards.

+

.add

+

source +

.add(
+   samples: Dict
+)
+

+
+

Add new samples to the intrinsic reward module.

+

.update

+

source +

.update(
+   samples: Dict
+)
+

+
+

Update the intrinsic reward module if necessary.

+

Args

+
    +
  • samples : The collected samples. A python dict like + {obs (n_steps, n_envs, obs_shape) , + actions (n_steps, n_envs, action_shape) , + rewards (n_steps, n_envs) , + next_obs (n_steps, n_envs, *obs_shape) }.
  • +
+

Returns

+

None

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/api_old/index.html b/api_old/index.html new file mode 100644 index 00000000..cebcc32c --- /dev/null +++ b/api_old/index.html @@ -0,0 +1,4560 @@ + + + + + + + + + + + + + + + + + + + + + Api old - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Api old

+ +
+ +
+ +

Common: Auxiliary modules like trainer and logger.

+
    +
  • Engine: Engine for building Hsuanwu application.
  • +
  • Logger: Logger for managing output information.
  • +
+

Xploit: Modules that focus on exploitation in RL.

+
    +
  • Agent: Agent for interacting and learning.
  • +
+ + + + + + + + + + + + + + + + + + + + + +
TypeAlgorithm
On-PolicyA2C🖥️⛓️💰,PPO🖥️⛓️💰 DAAC🖥️⛓️💰,DrAC🖥️⛓️💰🔭,DrDAAC🖥️⛓️💰🔭
Off-PolicyDQN🖥️⛓️💰,DDPG🖥️⛓️💰,SAC🖥️⛓️💰 DrQ-v2🖥️⛓️💰🔭
DistributedIMPALA⛓️
+
+
    +
  • 🖥️: Support Neural-network processing unit.
  • +
  • ⛓️: Multi Processing.
  • +
  • 💰: Support intrinsic reward shaping.
  • +
  • 🔭: Support observation augmentation.
  • +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleRecurrentBoxDiscreteMultiBinaryMulti ProcessingNPUPaperCitations
SAC✔️✔️Link5077⭐
DrQ✔️✔️Link433⭐
DDPG✔️✔️Link11819⭐
DrQ-v2✔️✔️Link100⭐
DAAC✔️✔️✔️✔️✔️Link56⭐
PPO✔️✔️✔️✔️✔️Link11155⭐
DrAC✔️✔️✔️✔️✔️Link29⭐
IMPALA✔️✔️✔️✔️✔️Link1219⭐
+
+

Tips of Agent

+
    +
  • 🐌: Developing.
  • +
  • NPU: Support Neural-network processing unit.
  • +
  • Recurrent: Support recurrent neural network.
  • +
  • Box: A N-dimensional box that containes every point in the action space.
  • +
  • Discrete: A list of possible actions, where each timestep only one of the actions can be used.
  • +
  • MultiBinary: A list of possible actions, where each timestep any of the actions can be used in any combination.
  • +
+
+
    +
  • Encoder: Neural nework-based encoder for processing observations.
  • +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleInputReferenceTarget Task
EspeholtResidualEncoderImagesIMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner ArchitecturesAtari or Procgen games.
IdentityEncoderStatesN/ADeepMind Control Suite: state
MnihCnnEncoderImagesPlaying Atari with Deep Reinforcement LearningAtari games.
TassaCnnEncoderImagesDeepMind Control SuiteDeepMind Control Suite: pixel
PathakCnnEncoderImagesCuriosity-Driven Exploration by Self-Supervised PredictionAtari or MiniGrid games
VanillaMlpEncoderStatesN/ADeepMind Control Suite: state
+
+

Tips of Encoder

+
    +
  • Naming Rule: 'Surname of the first author' + 'Backbone' + 'Encoder'
  • +
  • Input: Input type.
  • +
  • Target Task: The testing tasks in their paper or potential tasks.
  • +
+
+
    +
  • Storage: Storge for storing collected experiences.
  • +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleRemark
VanillaRolloutStorageOn-Policy RL
VanillaReplayStorageOff-Policy RL
NStepReplayStorageOff-Policy RL
PrioritizedReplayStorageOff-Policy RL
DistributedStorageDistributed RL
+

Xplore: Modules that focus on exploration in RL.

+
    +
  • Augmentation: PyTorch.nn-like modules for observation augmentation.
  • +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleInputReference
GaussianNoiseStatesReinforcement Learning with Augmented Data
RandomAmplitudeScalingStatesReinforcement Learning with Augmented Data
GrayScaleImagesReinforcement Learning with Augmented Data
RandomColorJitterImagesReinforcement Learning with Augmented Data
RandomConvolutionImagesReinforcement Learning with Augmented Data
RandomCropImagesReinforcement Learning with Augmented Data
RandomCutoutImagesReinforcement Learning with Augmented Data
RandomCutoutColorImagesReinforcement Learning with Augmented Data
RandomFlipImagesReinforcement Learning with Augmented Data
RandomRotateImagesReinforcement Learning with Augmented Data
RandomShiftImagesMastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning
RandomTranslateImagesReinforcement Learning with Augmented Data
+
    +
  • Distribution: Distributions for sampling actions.
  • +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleTypeReference
NormalNoiseNoisetorch.distributions
OrnsteinUhlenbeckNoiseNoiseContinuous Control with Deep Reinforcement Learning
TruncatedNormalNoiseNoiseMastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning
BernoulliDistributiontorch.distributions
CategoricalDistributiontorch.distributions
DiagonalGaussianDistributiontorch.distributions
SquashedNormalDistributiontorch.distributions
+
+

Tips of Distribution

+
    +
  • In Hsuanwu, the action noise is implemented via a Distribution manner to realize unification.
  • +
+
+
    +
  • Reward: Intrinsic reward modules for enhancing exploration.
  • +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleRemarkRepr.VisualReference
PseudoCountsCount-Based exploration✔️✔️Never Give Up: Learning Directed Exploration Strategies
ICMCuriosity-driven exploration✔️✔️Curiosity-Driven Exploration by Self-Supervised Prediction
RNDCount-based exploration✔️Exploration by Random Network Distillation
GIRMCuriosity-driven exploration✔️✔️Intrinsic Reward Driven Imitation Learning via Generative Model
NGUMemory-based exploration✔️✔️Never Give Up: Learning Directed Exploration Strategies
RIDEProcedurally-generated environment✔️✔️RIDE: Rewarding Impact-Driven Exploration for Procedurally-Generated Environments
RE3Entropy Maximization✔️State Entropy Maximization with Random Encoders for Efficient Exploration
RISEEntropy Maximization✔️Rényi State Entropy Maximization for Exploration Acceleration in Reinforcement Learning
REVDDivergence Maximization✔️Rewarding Episodic Visitation Discrepancy for Exploration in Reinforcement Learning
+
+

Tips of Reward

+
    +
  • 🐌: Developing.
  • +
  • Repr.: The method involves representation learning.
  • +
  • Visual: The method works well in visual RL.
  • +
+
+

See Tutorials: Use intrinsic reward and observation augmentation for usage examples.

+

Evaluation: Reasonable and reliable metrics for algorithm evaluation.

+

See Tutorials: Evaluate your model.

+

Env: Packaged environments (e.g., Atari games) for fast invocation.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModuleNameRemarkReference
make_atari_envAtari GamesDiscrete controlThe Arcade Learning Environment: An Evaluation Platform for General Agents
make_bullet_envPyBullet Robotics EnvironmentsContinuous controlPybullet: A Python Module for Physics Simulation for Games, Robotics and Machine Learning
make_dmc_envDeepMind Control SuiteContinuous controlDeepMind Control Suite
make_minigrid_envMiniGrid GamesDiscrete controlMinimalistic Gridworld Environment for Gymnasium
make_procgen_envProcgen GamesDiscrete controlLeveraging Procedural Generation to Benchmark Reinforcement Learning
make_robosuite_envRobosuite Robotics EnvironmentsContinuous controlRobosuite: A Modular Simulation Framework and Benchmark for Robot Learning
+

Pre-training: Methods of pre-training in RL.

+

See Tutorials: Pre-training in Hsuanwu.

+

Deployment: Methods of model deployment in RL.

+

See Tutorials: Deploy your model in inference devices.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/assets/images/ascendworkflow.png b/assets/images/ascendworkflow.png new file mode 100644 index 00000000..1a72235e Binary files /dev/null and b/assets/images/ascendworkflow.png differ diff --git a/assets/images/colab-logo.svg b/assets/images/colab-logo.svg new file mode 100644 index 00000000..9b6569bc --- /dev/null +++ b/assets/images/colab-logo.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/assets/images/curves_example.png b/assets/images/curves_example.png new file mode 100644 index 00000000..68e531cf Binary files /dev/null and b/assets/images/curves_example.png differ diff --git a/assets/images/docs.gif b/assets/images/docs.gif new file mode 100644 index 00000000..0d58d8e6 Binary files /dev/null and b/assets/images/docs.gif differ diff --git a/assets/images/favicon.png b/assets/images/favicon.png new file mode 100644 index 00000000..1cf13b9f Binary files /dev/null and b/assets/images/favicon.png differ diff --git a/assets/images/github-logo.svg b/assets/images/github-logo.svg new file mode 100644 index 00000000..37fa923d --- /dev/null +++ b/assets/images/github-logo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/assets/images/github_issues.png b/assets/images/github_issues.png new file mode 100644 index 00000000..79127437 Binary files /dev/null and b/assets/images/github_issues.png differ diff --git a/assets/images/icon.svg b/assets/images/icon.svg new file mode 100644 index 00000000..5d02d552 --- /dev/null +++ b/assets/images/icon.svg @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/assets/images/logo_horizontal.svg b/assets/images/logo_horizontal.svg new file mode 100644 index 00000000..2be5b0ad --- /dev/null +++ b/assets/images/logo_horizontal.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/assets/images/module_replacement1.png b/assets/images/module_replacement1.png new file mode 100644 index 00000000..f8ef84ef Binary files /dev/null and b/assets/images/module_replacement1.png differ diff --git a/assets/images/module_replacement2.png b/assets/images/module_replacement2.png new file mode 100644 index 00000000..fd09e3db Binary files /dev/null and b/assets/images/module_replacement2.png differ diff --git a/assets/images/module_replacement3.png b/assets/images/module_replacement3.png new file mode 100644 index 00000000..cac5ed9b Binary files /dev/null and b/assets/images/module_replacement3.png differ diff --git a/assets/images/plot_interval_estimates1.png b/assets/images/plot_interval_estimates1.png new file mode 100644 index 00000000..adcce85b Binary files /dev/null and b/assets/images/plot_interval_estimates1.png differ diff --git a/assets/images/plot_interval_estimates2.png b/assets/images/plot_interval_estimates2.png new file mode 100644 index 00000000..212fcc47 Binary files /dev/null and b/assets/images/plot_interval_estimates2.png differ diff --git a/assets/images/plot_interval_estimates3.png b/assets/images/plot_interval_estimates3.png new file mode 100644 index 00000000..8855cfc7 Binary files /dev/null and b/assets/images/plot_interval_estimates3.png differ diff --git a/assets/images/plot_performance_profile.png b/assets/images/plot_performance_profile.png new file mode 100644 index 00000000..4148abac Binary files /dev/null and b/assets/images/plot_performance_profile.png differ diff --git a/assets/images/plot_probability_improvement.png b/assets/images/plot_probability_improvement.png new file mode 100644 index 00000000..0ee410d4 Binary files /dev/null and b/assets/images/plot_probability_improvement.png differ diff --git a/assets/images/plot_sample_efficiency_curve.png b/assets/images/plot_sample_efficiency_curve.png new file mode 100644 index 00000000..e083d4cb Binary files /dev/null and b/assets/images/plot_sample_efficiency_curve.png differ diff --git a/assets/images/pre_training1.png b/assets/images/pre_training1.png new file mode 100644 index 00000000..07413324 Binary files /dev/null and b/assets/images/pre_training1.png differ diff --git a/assets/images/pre_training2.png b/assets/images/pre_training2.png new file mode 100644 index 00000000..2b6078ad Binary files /dev/null and b/assets/images/pre_training2.png differ diff --git a/assets/images/qq.jpg b/assets/images/qq.jpg new file mode 100644 index 00000000..8e9baaf6 Binary files /dev/null and b/assets/images/qq.jpg differ diff --git a/assets/images/rl_training_gpu.gif b/assets/images/rl_training_gpu.gif new file mode 100644 index 00000000..fd0f0bf0 Binary files /dev/null and b/assets/images/rl_training_gpu.gif differ diff --git a/assets/images/rllte-favicon.png b/assets/images/rllte-favicon.png new file mode 100644 index 00000000..385fe51a Binary files /dev/null and b/assets/images/rllte-favicon.png differ diff --git a/assets/images/rllte-logo.png b/assets/images/rllte-logo.png new file mode 100644 index 00000000..8fa4cc53 Binary files /dev/null and b/assets/images/rllte-logo.png differ diff --git a/assets/images/roadmap.svg b/assets/images/roadmap.svg new file mode 100644 index 00000000..549eaa58 --- /dev/null +++ b/assets/images/roadmap.svg @@ -0,0 +1 @@ +Proximal Policy Optimization(PPO)Data-Regularized Actor Critic(DrAC)Phasic Policy Gradient(PPG)Decoupled Advantage Actor-Critic(DAAC)Deep Deterministic Policy Gradient(DDPG)Soft Actor Critic(SAC)Data-Regularized Q(DrQ)Data-Regularized Q v2(DrQ-v2)On-Policy RLImportance Weighted Actor-Learner Architecture(IMPALA)......Off-Policy RLDistributed RL............ \ No newline at end of file diff --git a/assets/images/slack.png b/assets/images/slack.png new file mode 100644 index 00000000..a120ef20 Binary files /dev/null and b/assets/images/slack.png differ diff --git a/assets/images/structure.svg b/assets/images/structure.svg new file mode 100644 index 00000000..bc8281e2 --- /dev/null +++ b/assets/images/structure.svg @@ -0,0 +1 @@ +CoreApp.PrototypesCommonAuxiliaries...EncoderXploitPolicyStorageDistributionXploreAugmentationRewardToolEnv WrappersEnvGame APIs...PerformanceEvaluationComparisonVisualizationDatasetsHubModels...DeploymentPre-trainingAgentCopilot \ No newline at end of file diff --git a/assets/images/youtube.png b/assets/images/youtube.png new file mode 100644 index 00000000..1c9b7f3d Binary files /dev/null and b/assets/images/youtube.png differ diff --git a/assets/javascripts/bundle.5cfa9459.min.js b/assets/javascripts/bundle.5cfa9459.min.js new file mode 100644 index 00000000..6444d6a5 --- /dev/null +++ b/assets/javascripts/bundle.5cfa9459.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var Fi=Object.create;var gr=Object.defineProperty;var ji=Object.getOwnPropertyDescriptor;var Wi=Object.getOwnPropertyNames,Dt=Object.getOwnPropertySymbols,Ui=Object.getPrototypeOf,xr=Object.prototype.hasOwnProperty,no=Object.prototype.propertyIsEnumerable;var oo=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,R=(e,t)=>{for(var r in t||(t={}))xr.call(t,r)&&oo(e,r,t[r]);if(Dt)for(var r of Dt(t))no.call(t,r)&&oo(e,r,t[r]);return e};var io=(e,t)=>{var r={};for(var o in e)xr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Dt)for(var o of Dt(e))t.indexOf(o)<0&&no.call(e,o)&&(r[o]=e[o]);return r};var yr=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Di=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of Wi(t))!xr.call(e,n)&&n!==r&&gr(e,n,{get:()=>t[n],enumerable:!(o=ji(t,n))||o.enumerable});return e};var Vt=(e,t,r)=>(r=e!=null?Fi(Ui(e)):{},Di(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var ao=(e,t,r)=>new Promise((o,n)=>{var i=p=>{try{s(r.next(p))}catch(c){n(c)}},a=p=>{try{s(r.throw(p))}catch(c){n(c)}},s=p=>p.done?o(p.value):Promise.resolve(p.value).then(i,a);s((r=r.apply(e,t)).next())});var co=yr((Er,so)=>{(function(e,t){typeof Er=="object"&&typeof so!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(Er,function(){"use strict";function e(r){var o=!0,n=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(H){return!!(H&&H!==document&&H.nodeName!=="HTML"&&H.nodeName!=="BODY"&&"classList"in H&&"contains"in H.classList)}function p(H){var mt=H.type,ze=H.tagName;return!!(ze==="INPUT"&&a[mt]&&!H.readOnly||ze==="TEXTAREA"&&!H.readOnly||H.isContentEditable)}function c(H){H.classList.contains("focus-visible")||(H.classList.add("focus-visible"),H.setAttribute("data-focus-visible-added",""))}function l(H){H.hasAttribute("data-focus-visible-added")&&(H.classList.remove("focus-visible"),H.removeAttribute("data-focus-visible-added"))}function f(H){H.metaKey||H.altKey||H.ctrlKey||(s(r.activeElement)&&c(r.activeElement),o=!0)}function u(H){o=!1}function h(H){s(H.target)&&(o||p(H.target))&&c(H.target)}function w(H){s(H.target)&&(H.target.classList.contains("focus-visible")||H.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),l(H.target))}function A(H){document.visibilityState==="hidden"&&(n&&(o=!0),te())}function te(){document.addEventListener("mousemove",J),document.addEventListener("mousedown",J),document.addEventListener("mouseup",J),document.addEventListener("pointermove",J),document.addEventListener("pointerdown",J),document.addEventListener("pointerup",J),document.addEventListener("touchmove",J),document.addEventListener("touchstart",J),document.addEventListener("touchend",J)}function ie(){document.removeEventListener("mousemove",J),document.removeEventListener("mousedown",J),document.removeEventListener("mouseup",J),document.removeEventListener("pointermove",J),document.removeEventListener("pointerdown",J),document.removeEventListener("pointerup",J),document.removeEventListener("touchmove",J),document.removeEventListener("touchstart",J),document.removeEventListener("touchend",J)}function J(H){H.target.nodeName&&H.target.nodeName.toLowerCase()==="html"||(o=!1,ie())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",A,!0),te(),r.addEventListener("focus",h,!0),r.addEventListener("blur",w,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var Yr=yr((Rt,Kr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof Rt=="object"&&typeof Kr=="object"?Kr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Rt=="object"?Rt.ClipboardJS=r():t.ClipboardJS=r()})(Rt,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return Ii}});var a=i(279),s=i.n(a),p=i(370),c=i.n(p),l=i(817),f=i.n(l);function u(V){try{return document.execCommand(V)}catch(_){return!1}}var h=function(_){var O=f()(_);return u("cut"),O},w=h;function A(V){var _=document.documentElement.getAttribute("dir")==="rtl",O=document.createElement("textarea");O.style.fontSize="12pt",O.style.border="0",O.style.padding="0",O.style.margin="0",O.style.position="absolute",O.style[_?"right":"left"]="-9999px";var j=window.pageYOffset||document.documentElement.scrollTop;return O.style.top="".concat(j,"px"),O.setAttribute("readonly",""),O.value=V,O}var te=function(_,O){var j=A(_);O.container.appendChild(j);var D=f()(j);return u("copy"),j.remove(),D},ie=function(_){var O=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},j="";return typeof _=="string"?j=te(_,O):_ instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(_==null?void 0:_.type)?j=te(_.value,O):(j=f()(_),u("copy")),j},J=ie;function H(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?H=function(O){return typeof O}:H=function(O){return O&&typeof Symbol=="function"&&O.constructor===Symbol&&O!==Symbol.prototype?"symbol":typeof O},H(V)}var mt=function(){var _=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},O=_.action,j=O===void 0?"copy":O,D=_.container,Y=_.target,ke=_.text;if(j!=="copy"&&j!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(Y!==void 0)if(Y&&H(Y)==="object"&&Y.nodeType===1){if(j==="copy"&&Y.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(j==="cut"&&(Y.hasAttribute("readonly")||Y.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(ke)return J(ke,{container:D});if(Y)return j==="cut"?w(Y):J(Y,{container:D})},ze=mt;function Ie(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Ie=function(O){return typeof O}:Ie=function(O){return O&&typeof Symbol=="function"&&O.constructor===Symbol&&O!==Symbol.prototype?"symbol":typeof O},Ie(V)}function _i(V,_){if(!(V instanceof _))throw new TypeError("Cannot call a class as a function")}function ro(V,_){for(var O=0;O<_.length;O++){var j=_[O];j.enumerable=j.enumerable||!1,j.configurable=!0,"value"in j&&(j.writable=!0),Object.defineProperty(V,j.key,j)}}function Ai(V,_,O){return _&&ro(V.prototype,_),O&&ro(V,O),V}function Ci(V,_){if(typeof _!="function"&&_!==null)throw new TypeError("Super expression must either be null or a function");V.prototype=Object.create(_&&_.prototype,{constructor:{value:V,writable:!0,configurable:!0}}),_&&br(V,_)}function br(V,_){return br=Object.setPrototypeOf||function(j,D){return j.__proto__=D,j},br(V,_)}function Hi(V){var _=Pi();return function(){var j=Wt(V),D;if(_){var Y=Wt(this).constructor;D=Reflect.construct(j,arguments,Y)}else D=j.apply(this,arguments);return ki(this,D)}}function ki(V,_){return _&&(Ie(_)==="object"||typeof _=="function")?_:$i(V)}function $i(V){if(V===void 0)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return V}function Pi(){if(typeof Reflect=="undefined"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(V){return!1}}function Wt(V){return Wt=Object.setPrototypeOf?Object.getPrototypeOf:function(O){return O.__proto__||Object.getPrototypeOf(O)},Wt(V)}function vr(V,_){var O="data-clipboard-".concat(V);if(_.hasAttribute(O))return _.getAttribute(O)}var Ri=function(V){Ci(O,V);var _=Hi(O);function O(j,D){var Y;return _i(this,O),Y=_.call(this),Y.resolveOptions(D),Y.listenClick(j),Y}return Ai(O,[{key:"resolveOptions",value:function(){var D=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof D.action=="function"?D.action:this.defaultAction,this.target=typeof D.target=="function"?D.target:this.defaultTarget,this.text=typeof D.text=="function"?D.text:this.defaultText,this.container=Ie(D.container)==="object"?D.container:document.body}},{key:"listenClick",value:function(D){var Y=this;this.listener=c()(D,"click",function(ke){return Y.onClick(ke)})}},{key:"onClick",value:function(D){var Y=D.delegateTarget||D.currentTarget,ke=this.action(Y)||"copy",Ut=ze({action:ke,container:this.container,target:this.target(Y),text:this.text(Y)});this.emit(Ut?"success":"error",{action:ke,text:Ut,trigger:Y,clearSelection:function(){Y&&Y.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(D){return vr("action",D)}},{key:"defaultTarget",value:function(D){var Y=vr("target",D);if(Y)return document.querySelector(Y)}},{key:"defaultText",value:function(D){return vr("text",D)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(D){var Y=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return J(D,Y)}},{key:"cut",value:function(D){return w(D)}},{key:"isSupported",value:function(){var D=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],Y=typeof D=="string"?[D]:D,ke=!!document.queryCommandSupported;return Y.forEach(function(Ut){ke=ke&&!!document.queryCommandSupported(Ut)}),ke}}]),O}(s()),Ii=Ri},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,p){for(;s&&s.nodeType!==n;){if(typeof s.matches=="function"&&s.matches(p))return s;s=s.parentNode}}o.exports=a},438:function(o,n,i){var a=i(828);function s(l,f,u,h,w){var A=c.apply(this,arguments);return l.addEventListener(u,A,w),{destroy:function(){l.removeEventListener(u,A,w)}}}function p(l,f,u,h,w){return typeof l.addEventListener=="function"?s.apply(null,arguments):typeof u=="function"?s.bind(null,document).apply(null,arguments):(typeof l=="string"&&(l=document.querySelectorAll(l)),Array.prototype.map.call(l,function(A){return s(A,f,u,h,w)}))}function c(l,f,u,h){return function(w){w.delegateTarget=a(w.target,f),w.delegateTarget&&h.call(l,w)}}o.exports=p},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(o,n,i){var a=i(879),s=i(438);function p(u,h,w){if(!u&&!h&&!w)throw new Error("Missing required arguments");if(!a.string(h))throw new TypeError("Second argument must be a String");if(!a.fn(w))throw new TypeError("Third argument must be a Function");if(a.node(u))return c(u,h,w);if(a.nodeList(u))return l(u,h,w);if(a.string(u))return f(u,h,w);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(u,h,w){return u.addEventListener(h,w),{destroy:function(){u.removeEventListener(h,w)}}}function l(u,h,w){return Array.prototype.forEach.call(u,function(A){A.addEventListener(h,w)}),{destroy:function(){Array.prototype.forEach.call(u,function(A){A.removeEventListener(h,w)})}}}function f(u,h,w){return s(document.body,u,h,w)}o.exports=p},817:function(o){function n(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var p=window.getSelection(),c=document.createRange();c.selectNodeContents(i),p.removeAllRanges(),p.addRange(c),a=p.toString()}return a}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,a,s){var p=this.e||(this.e={});return(p[i]||(p[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var p=this;function c(){p.off(i,c),a.apply(s,arguments)}return c._=a,this.on(i,c,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),p=0,c=s.length;for(p;p{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var ts=/["'&<>]/;ei.exports=rs;function rs(e){var t=""+e,r=ts.exec(t);if(!r)return t;var o,n="",i=0,a=0;for(i=r.index;i0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function N(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],a;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(s){a={error:s}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(a)throw a.error}}return i}function q(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||s(u,h)})})}function s(u,h){try{p(o[u](h))}catch(w){f(i[0][3],w)}}function p(u){u.value instanceof nt?Promise.resolve(u.value.v).then(c,l):f(i[0][2],u)}function c(u){s("next",u)}function l(u){s("throw",u)}function f(u,h){u(h),i.shift(),i.length&&s(i[0][0],i[0][1])}}function mo(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof de=="function"?de(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(a){return new Promise(function(s,p){a=e[i](a),n(s,p,a.done,a.value)})}}function n(i,a,s,p){Promise.resolve(p).then(function(c){i({value:c,done:s})},a)}}function k(e){return typeof e=="function"}function ft(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var zt=ft(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(o,n){return n+1+") "+o.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function qe(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Fe=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=de(a),p=s.next();!p.done;p=s.next()){var c=p.value;c.remove(this)}}catch(A){t={error:A}}finally{try{p&&!p.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var l=this.initialTeardown;if(k(l))try{l()}catch(A){i=A instanceof zt?A.errors:[A]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=de(f),h=u.next();!h.done;h=u.next()){var w=h.value;try{fo(w)}catch(A){i=i!=null?i:[],A instanceof zt?i=q(q([],N(i)),N(A.errors)):i.push(A)}}}catch(A){o={error:A}}finally{try{h&&!h.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new zt(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)fo(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&qe(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&qe(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Tr=Fe.EMPTY;function qt(e){return e instanceof Fe||e&&"closed"in e&&k(e.remove)&&k(e.add)&&k(e.unsubscribe)}function fo(e){k(e)?e():e.unsubscribe()}var $e={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var ut={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,a=n.isStopped,s=n.observers;return i||a?Tr:(this.currentObservers=null,s.push(r),new Fe(function(){o.currentObservers=null,qe(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,a=o.isStopped;n?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new F;return r.source=this,r},t.create=function(r,o){return new Eo(r,o)},t}(F);var Eo=function(e){re(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:Tr},t}(g);var _r=function(e){re(t,e);function t(r){var o=e.call(this)||this;return o._value=r,o}return Object.defineProperty(t.prototype,"value",{get:function(){return this.getValue()},enumerable:!1,configurable:!0}),t.prototype._subscribe=function(r){var o=e.prototype._subscribe.call(this,r);return!o.closed&&r.next(this._value),o},t.prototype.getValue=function(){var r=this,o=r.hasError,n=r.thrownError,i=r._value;if(o)throw n;return this._throwIfClosed(),i},t.prototype.next=function(r){e.prototype.next.call(this,this._value=r)},t}(g);var Lt={now:function(){return(Lt.delegate||Date).now()},delegate:void 0};var _t=function(e){re(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=Lt);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,a=o._infiniteTimeWindow,s=o._timestampProvider,p=o._windowTime;n||(i.push(r),!a&&i.push(s.now()+p)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,a=n._buffer,s=a.slice(),p=0;p0?e.prototype.schedule.call(this,r,o):(this.delay=o,this.state=r,this.scheduler.flush(this),this)},t.prototype.execute=function(r,o){return o>0||this.closed?e.prototype.execute.call(this,r,o):this._execute(r,o)},t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!=null&&n>0||n==null&&this.delay>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.flush(this),0)},t}(vt);var So=function(e){re(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t}(gt);var Hr=new So(To);var Oo=function(e){re(t,e);function t(r,o){var n=e.call(this,r,o)||this;return n.scheduler=r,n.work=o,n}return t.prototype.requestAsyncId=function(r,o,n){return n===void 0&&(n=0),n!==null&&n>0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=bt.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var a=r.actions;o!=null&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==o&&(bt.cancelAnimationFrame(o),r._scheduled=void 0)},t}(vt);var Mo=function(e){re(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o=this._scheduled;this._scheduled=void 0;var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(gt);var me=new Mo(Oo);var M=new F(function(e){return e.complete()});function Yt(e){return e&&k(e.schedule)}function kr(e){return e[e.length-1]}function Xe(e){return k(kr(e))?e.pop():void 0}function He(e){return Yt(kr(e))?e.pop():void 0}function Bt(e,t){return typeof kr(e)=="number"?e.pop():t}var xt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Gt(e){return k(e==null?void 0:e.then)}function Jt(e){return k(e[ht])}function Xt(e){return Symbol.asyncIterator&&k(e==null?void 0:e[Symbol.asyncIterator])}function Zt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Gi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var er=Gi();function tr(e){return k(e==null?void 0:e[er])}function rr(e){return lo(this,arguments,function(){var r,o,n,i;return Nt(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,nt(r.read())];case 3:return o=a.sent(),n=o.value,i=o.done,i?[4,nt(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,nt(n)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function or(e){return k(e==null?void 0:e.getReader)}function W(e){if(e instanceof F)return e;if(e!=null){if(Jt(e))return Ji(e);if(xt(e))return Xi(e);if(Gt(e))return Zi(e);if(Xt(e))return Lo(e);if(tr(e))return ea(e);if(or(e))return ta(e)}throw Zt(e)}function Ji(e){return new F(function(t){var r=e[ht]();if(k(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function Xi(e){return new F(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?b(function(n,i){return e(n,i,o)}):le,we(1),r?Be(t):zo(function(){return new ir}))}}function Fr(e){return e<=0?function(){return M}:x(function(t,r){var o=[];t.subscribe(T(r,function(n){o.push(n),e=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new g}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,p=s===void 0?!0:s;return function(c){var l,f,u,h=0,w=!1,A=!1,te=function(){f==null||f.unsubscribe(),f=void 0},ie=function(){te(),l=u=void 0,w=A=!1},J=function(){var H=l;ie(),H==null||H.unsubscribe()};return x(function(H,mt){h++,!A&&!w&&te();var ze=u=u!=null?u:r();mt.add(function(){h--,h===0&&!A&&!w&&(f=Wr(J,p))}),ze.subscribe(mt),!l&&h>0&&(l=new at({next:function(Ie){return ze.next(Ie)},error:function(Ie){A=!0,te(),f=Wr(ie,n,Ie),ze.error(Ie)},complete:function(){w=!0,te(),f=Wr(ie,a),ze.complete()}}),W(H).subscribe(l))})(c)}}function Wr(e,t){for(var r=[],o=2;oe.next(document)),e}function $(e,t=document){return Array.from(t.querySelectorAll(e))}function P(e,t=document){let r=fe(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function fe(e,t=document){return t.querySelector(e)||void 0}function Re(){var e,t,r,o;return(o=(r=(t=(e=document.activeElement)==null?void 0:e.shadowRoot)==null?void 0:t.activeElement)!=null?r:document.activeElement)!=null?o:void 0}var xa=S(d(document.body,"focusin"),d(document.body,"focusout")).pipe(_e(1),Q(void 0),m(()=>Re()||document.body),B(1));function et(e){return xa.pipe(m(t=>e.contains(t)),K())}function kt(e,t){return C(()=>S(d(e,"mouseenter").pipe(m(()=>!0)),d(e,"mouseleave").pipe(m(()=>!1))).pipe(t?Ht(r=>Me(+!r*t)):le,Q(e.matches(":hover"))))}function Bo(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)Bo(e,r)}function E(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)Bo(o,n);return o}function sr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function wt(e){let t=E("script",{src:e});return C(()=>(document.head.appendChild(t),S(d(t,"load"),d(t,"error").pipe(v(()=>$r(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(m(()=>{}),L(()=>document.head.removeChild(t)),we(1))))}var Go=new g,ya=C(()=>typeof ResizeObserver=="undefined"?wt("https://unpkg.com/resize-observer-polyfill"):I(void 0)).pipe(m(()=>new ResizeObserver(e=>e.forEach(t=>Go.next(t)))),v(e=>S(Ke,I(e)).pipe(L(()=>e.disconnect()))),B(1));function ce(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ge(e){let t=e;for(;t.clientWidth===0&&t.parentElement;)t=t.parentElement;return ya.pipe(y(r=>r.observe(t)),v(r=>Go.pipe(b(o=>o.target===t),L(()=>r.unobserve(t)))),m(()=>ce(e)),Q(ce(e)))}function Tt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function cr(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}function Jo(e){let t=[],r=e.parentElement;for(;r;)(e.clientWidth>r.clientWidth||e.clientHeight>r.clientHeight)&&t.push(r),r=(e=r).parentElement;return t.length===0&&t.push(document.documentElement),t}function Ue(e){return{x:e.offsetLeft,y:e.offsetTop}}function Xo(e){let t=e.getBoundingClientRect();return{x:t.x+window.scrollX,y:t.y+window.scrollY}}function Zo(e){return S(d(window,"load"),d(window,"resize")).pipe(Le(0,me),m(()=>Ue(e)),Q(Ue(e)))}function pr(e){return{x:e.scrollLeft,y:e.scrollTop}}function De(e){return S(d(e,"scroll"),d(window,"scroll"),d(window,"resize")).pipe(Le(0,me),m(()=>pr(e)),Q(pr(e)))}var en=new g,Ea=C(()=>I(new IntersectionObserver(e=>{for(let t of e)en.next(t)},{threshold:0}))).pipe(v(e=>S(Ke,I(e)).pipe(L(()=>e.disconnect()))),B(1));function tt(e){return Ea.pipe(y(t=>t.observe(e)),v(t=>en.pipe(b(({target:r})=>r===e),L(()=>t.unobserve(e)),m(({isIntersecting:r})=>r))))}function tn(e,t=16){return De(e).pipe(m(({y:r})=>{let o=ce(e),n=Tt(e);return r>=n.height-o.height-t}),K())}var lr={drawer:P("[data-md-toggle=drawer]"),search:P("[data-md-toggle=search]")};function rn(e){return lr[e].checked}function Je(e,t){lr[e].checked!==t&&lr[e].click()}function Ve(e){let t=lr[e];return d(t,"change").pipe(m(()=>t.checked),Q(t.checked))}function wa(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Ta(){return S(d(window,"compositionstart").pipe(m(()=>!0)),d(window,"compositionend").pipe(m(()=>!1))).pipe(Q(!1))}function on(){let e=d(window,"keydown").pipe(b(t=>!(t.metaKey||t.ctrlKey)),m(t=>({mode:rn("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),b(({mode:t,type:r})=>{if(t==="global"){let o=Re();if(typeof o!="undefined")return!wa(o,r)}return!0}),pe());return Ta().pipe(v(t=>t?M:e))}function xe(){return new URL(location.href)}function pt(e,t=!1){if(G("navigation.instant")&&!t){let r=E("a",{href:e.href});document.body.appendChild(r),r.click(),r.remove()}else location.href=e.href}function nn(){return new g}function an(){return location.hash.slice(1)}function sn(e){let t=E("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Sa(e){return S(d(window,"hashchange"),e).pipe(m(an),Q(an()),b(t=>t.length>0),B(1))}function cn(e){return Sa(e).pipe(m(t=>fe(`[id="${t}"]`)),b(t=>typeof t!="undefined"))}function $t(e){let t=matchMedia(e);return ar(r=>t.addListener(()=>r(t.matches))).pipe(Q(t.matches))}function pn(){let e=matchMedia("print");return S(d(window,"beforeprint").pipe(m(()=>!0)),d(window,"afterprint").pipe(m(()=>!1))).pipe(Q(e.matches))}function Nr(e,t){return e.pipe(v(r=>r?t():M))}function zr(e,t){return new F(r=>{let o=new XMLHttpRequest;return o.open("GET",`${e}`),o.responseType="blob",o.addEventListener("load",()=>{o.status>=200&&o.status<300?(r.next(o.response),r.complete()):r.error(new Error(o.statusText))}),o.addEventListener("error",()=>{r.error(new Error("Network error"))}),o.addEventListener("abort",()=>{r.complete()}),typeof(t==null?void 0:t.progress$)!="undefined"&&(o.addEventListener("progress",n=>{var i;if(n.lengthComputable)t.progress$.next(n.loaded/n.total*100);else{let a=(i=o.getResponseHeader("Content-Length"))!=null?i:0;t.progress$.next(n.loaded/+a*100)}}),t.progress$.next(5)),o.send(),()=>o.abort()})}function Ne(e,t){return zr(e,t).pipe(v(r=>r.text()),m(r=>JSON.parse(r)),B(1))}function ln(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/html")),B(1))}function mn(e,t){let r=new DOMParser;return zr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/xml")),B(1))}function fn(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function un(){return S(d(window,"scroll",{passive:!0}),d(window,"resize",{passive:!0})).pipe(m(fn),Q(fn()))}function dn(){return{width:innerWidth,height:innerHeight}}function hn(){return d(window,"resize",{passive:!0}).pipe(m(dn),Q(dn()))}function bn(){return z([un(),hn()]).pipe(m(([e,t])=>({offset:e,size:t})),B(1))}function mr(e,{viewport$:t,header$:r}){let o=t.pipe(Z("size")),n=z([o,r]).pipe(m(()=>Ue(e)));return z([r,t,n]).pipe(m(([{height:i},{offset:a,size:s},{x:p,y:c}])=>({offset:{x:a.x-p,y:a.y-c+i},size:s})))}function Oa(e){return d(e,"message",t=>t.data)}function Ma(e){let t=new g;return t.subscribe(r=>e.postMessage(r)),t}function vn(e,t=new Worker(e)){let r=Oa(t),o=Ma(t),n=new g;n.subscribe(o);let i=o.pipe(X(),ne(!0));return n.pipe(X(),Pe(r.pipe(U(i))),pe())}var La=P("#__config"),St=JSON.parse(La.textContent);St.base=`${new URL(St.base,xe())}`;function Te(){return St}function G(e){return St.features.includes(e)}function ye(e,t){return typeof t!="undefined"?St.translations[e].replace("#",t.toString()):St.translations[e]}function Se(e,t=document){return P(`[data-md-component=${e}]`,t)}function ae(e,t=document){return $(`[data-md-component=${e}]`,t)}function _a(e){let t=P(".md-typeset > :first-child",e);return d(t,"click",{once:!0}).pipe(m(()=>P(".md-typeset",e)),m(r=>({hash:__md_hash(r.innerHTML)})))}function gn(e){if(!G("announce.dismiss")||!e.childElementCount)return M;if(!e.hidden){let t=P(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return C(()=>{let t=new g;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),_a(e).pipe(y(r=>t.next(r)),L(()=>t.complete()),m(r=>R({ref:e},r)))})}function Aa(e,{target$:t}){return t.pipe(m(r=>({hidden:r!==e})))}function xn(e,t){let r=new g;return r.subscribe(({hidden:o})=>{e.hidden=o}),Aa(e,t).pipe(y(o=>r.next(o)),L(()=>r.complete()),m(o=>R({ref:e},o)))}function Pt(e,t){return t==="inline"?E("div",{class:"md-tooltip md-tooltip--inline",id:e,role:"tooltip"},E("div",{class:"md-tooltip__inner md-typeset"})):E("div",{class:"md-tooltip",id:e,role:"tooltip"},E("div",{class:"md-tooltip__inner md-typeset"}))}function yn(...e){return E("div",{class:"md-tooltip2",role:"tooltip"},E("div",{class:"md-tooltip2__inner md-typeset"},e))}function En(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return E("aside",{class:"md-annotation",tabIndex:0},Pt(t),E("a",{href:r,class:"md-annotation__index",tabIndex:-1},E("span",{"data-md-annotation-id":e})))}else return E("aside",{class:"md-annotation",tabIndex:0},Pt(t),E("span",{class:"md-annotation__index",tabIndex:-1},E("span",{"data-md-annotation-id":e})))}function wn(e){return E("button",{class:"md-clipboard md-icon",title:ye("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}function qr(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(p=>!e.terms[p]).reduce((p,c)=>[...p,E("del",null,c)," "],[]).slice(0,-1),i=Te(),a=new URL(e.location,i.base);G("search.highlight")&&a.searchParams.set("h",Object.entries(e.terms).filter(([,p])=>p).reduce((p,[c])=>`${p} ${c}`.trim(),""));let{tags:s}=Te();return E("a",{href:`${a}`,class:"md-search-result__link",tabIndex:-1},E("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&E("div",{class:"md-search-result__icon md-icon"}),r>0&&E("h1",null,e.title),r<=0&&E("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&e.tags.map(p=>{let c=s?p in s?`md-tag-icon md-tag--${s[p]}`:"md-tag-icon":"";return E("span",{class:`md-tag ${c}`},p)}),o>0&&n.length>0&&E("p",{class:"md-search-result__terms"},ye("search.result.term.missing"),": ",...n)))}function Tn(e){let t=e[0].score,r=[...e],o=Te(),n=r.findIndex(l=>!`${new URL(l.location,o.base)}`.includes("#")),[i]=r.splice(n,1),a=r.findIndex(l=>l.scoreqr(l,1)),...p.length?[E("details",{class:"md-search-result__more"},E("summary",{tabIndex:-1},E("div",null,p.length>0&&p.length===1?ye("search.result.more.one"):ye("search.result.more.other",p.length))),...p.map(l=>qr(l,1)))]:[]];return E("li",{class:"md-search-result__item"},c)}function Sn(e){return E("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>E("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?sr(r):r)))}function Qr(e){let t=`tabbed-control tabbed-control--${e}`;return E("div",{class:t,hidden:!0},E("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function On(e){return E("div",{class:"md-typeset__scrollwrap"},E("div",{class:"md-typeset__table"},e))}function Ca(e){let t=Te(),r=new URL(`../${e.version}/`,t.base);return E("li",{class:"md-version__item"},E("a",{href:`${r}`,class:"md-version__link"},e.title))}function Mn(e,t){return e=e.filter(r=>{var o;return!((o=r.properties)!=null&&o.hidden)}),E("div",{class:"md-version"},E("button",{class:"md-version__current","aria-label":ye("select.version")},t.title),E("ul",{class:"md-version__list"},e.map(Ca)))}var Ha=0;function ka(e){let t=z([et(e),kt(e)]).pipe(m(([o,n])=>o||n),K()),r=C(()=>Jo(e)).pipe(oe(De),ct(1),m(()=>Xo(e)));return t.pipe(Ae(o=>o),v(()=>z([t,r])),m(([o,n])=>({active:o,offset:n})),pe())}function $a(e,t){let{content$:r,viewport$:o}=t,n=`__tooltip2_${Ha++}`;return C(()=>{let i=new g,a=new _r(!1);i.pipe(X(),ne(!1)).subscribe(a);let s=a.pipe(Ht(c=>Me(+!c*250,Hr)),K(),v(c=>c?r:M),y(c=>c.id=n),pe());z([i.pipe(m(({active:c})=>c)),s.pipe(v(c=>kt(c,250)),Q(!1))]).pipe(m(c=>c.some(l=>l))).subscribe(a);let p=a.pipe(b(c=>c),ee(s,o),m(([c,l,{size:f}])=>{let u=e.getBoundingClientRect(),h=u.width/2;if(l.role==="tooltip")return{x:h,y:8+u.height};if(u.y>=f.height/2){let{height:w}=ce(l);return{x:h,y:-16-w}}else return{x:h,y:16+u.height}}));return z([s,i,p]).subscribe(([c,{offset:l},f])=>{c.style.setProperty("--md-tooltip-host-x",`${l.x}px`),c.style.setProperty("--md-tooltip-host-y",`${l.y}px`),c.style.setProperty("--md-tooltip-x",`${f.x}px`),c.style.setProperty("--md-tooltip-y",`${f.y}px`),c.classList.toggle("md-tooltip2--top",f.y<0),c.classList.toggle("md-tooltip2--bottom",f.y>=0)}),a.pipe(b(c=>c),ee(s,(c,l)=>l),b(c=>c.role==="tooltip")).subscribe(c=>{let l=ce(P(":scope > *",c));c.style.setProperty("--md-tooltip-width",`${l.width}px`),c.style.setProperty("--md-tooltip-tail","0px")}),a.pipe(K(),be(me),ee(s)).subscribe(([c,l])=>{l.classList.toggle("md-tooltip2--active",c)}),z([a.pipe(b(c=>c)),s]).subscribe(([c,l])=>{l.role==="dialog"?(e.setAttribute("aria-controls",n),e.setAttribute("aria-haspopup","dialog")):e.setAttribute("aria-describedby",n)}),a.pipe(b(c=>!c)).subscribe(()=>{e.removeAttribute("aria-controls"),e.removeAttribute("aria-describedby"),e.removeAttribute("aria-haspopup")}),ka(e).pipe(y(c=>i.next(c)),L(()=>i.complete()),m(c=>R({ref:e},c)))})}function lt(e,{viewport$:t},r=document.body){return $a(e,{content$:new F(o=>{let n=e.title,i=yn(n);return o.next(i),e.removeAttribute("title"),r.append(i),()=>{i.remove(),e.setAttribute("title",n)}}),viewport$:t})}function Pa(e,t){let r=C(()=>z([Zo(e),De(t)])).pipe(m(([{x:o,y:n},i])=>{let{width:a,height:s}=ce(e);return{x:o-i.x+a/2,y:n-i.y+s/2}}));return et(e).pipe(v(o=>r.pipe(m(n=>({active:o,offset:n})),we(+!o||1/0))))}function Ln(e,t,{target$:r}){let[o,n]=Array.from(e.children);return C(()=>{let i=new g,a=i.pipe(X(),ne(!0));return i.subscribe({next({offset:s}){e.style.setProperty("--md-tooltip-x",`${s.x}px`),e.style.setProperty("--md-tooltip-y",`${s.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),tt(e).pipe(U(a)).subscribe(s=>{e.toggleAttribute("data-md-visible",s)}),S(i.pipe(b(({active:s})=>s)),i.pipe(_e(250),b(({active:s})=>!s))).subscribe({next({active:s}){s?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Le(16,me)).subscribe(({active:s})=>{o.classList.toggle("md-tooltip--active",s)}),i.pipe(ct(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:s})=>s)).subscribe({next(s){s?e.style.setProperty("--md-tooltip-0",`${-s}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),d(n,"click").pipe(U(a),b(s=>!(s.metaKey||s.ctrlKey))).subscribe(s=>{s.stopPropagation(),s.preventDefault()}),d(n,"mousedown").pipe(U(a),ee(i)).subscribe(([s,{active:p}])=>{var c;if(s.button!==0||s.metaKey||s.ctrlKey)s.preventDefault();else if(p){s.preventDefault();let l=e.parentElement.closest(".md-annotation");l instanceof HTMLElement?l.focus():(c=Re())==null||c.blur()}}),r.pipe(U(a),b(s=>s===o),Ge(125)).subscribe(()=>e.focus()),Pa(e,t).pipe(y(s=>i.next(s)),L(()=>i.complete()),m(s=>R({ref:e},s)))})}function Ra(e){return e.tagName==="CODE"?$(".c, .c1, .cm",e):[e]}function Ia(e){let t=[];for(let r of Ra(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let a;for(;a=/(\(\d+\))(!)?/.exec(i.textContent);){let[,s,p]=a;if(typeof p=="undefined"){let c=i.splitText(a.index);i=c.splitText(s.length),t.push(c)}else{i.textContent=s,t.push(i);break}}}}return t}function _n(e,t){t.append(...Array.from(e.childNodes))}function fr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,a=new Map;for(let s of Ia(t)){let[,p]=s.textContent.match(/\((\d+)\)/);fe(`:scope > li:nth-child(${p})`,e)&&(a.set(p,En(p,i)),s.replaceWith(a.get(p)))}return a.size===0?M:C(()=>{let s=new g,p=s.pipe(X(),ne(!0)),c=[];for(let[l,f]of a)c.push([P(".md-typeset",f),P(`:scope > li:nth-child(${l})`,e)]);return o.pipe(U(p)).subscribe(l=>{e.hidden=!l,e.classList.toggle("md-annotation-list",l);for(let[f,u]of c)l?_n(f,u):_n(u,f)}),S(...[...a].map(([,l])=>Ln(l,t,{target$:r}))).pipe(L(()=>s.complete()),pe())})}function An(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return An(t)}}function Cn(e,t){return C(()=>{let r=An(e);return typeof r!="undefined"?fr(r,e,t):M})}var Hn=Vt(Yr());var Fa=0;function kn(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return kn(t)}}function ja(e){return ge(e).pipe(m(({width:t})=>({scrollable:Tt(e).width>t})),Z("scrollable"))}function $n(e,t){let{matches:r}=matchMedia("(hover)"),o=C(()=>{let n=new g,i=n.pipe(Fr(1));n.subscribe(({scrollable:c})=>{c&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")});let a=[];if(Hn.default.isSupported()&&(e.closest(".copy")||G("content.code.copy")&&!e.closest(".no-copy"))){let c=e.closest("pre");c.id=`__code_${Fa++}`;let l=wn(c.id);c.insertBefore(l,e),G("content.tooltips")&&a.push(lt(l,{viewport$}))}let s=e.closest(".highlight");if(s instanceof HTMLElement){let c=kn(s);if(typeof c!="undefined"&&(s.classList.contains("annotate")||G("content.code.annotate"))){let l=fr(c,e,t);a.push(ge(s).pipe(U(i),m(({width:f,height:u})=>f&&u),K(),v(f=>f?l:M)))}}return $(":scope > span[id]",e).length&&e.classList.add("md-code__content"),ja(e).pipe(y(c=>n.next(c)),L(()=>n.complete()),m(c=>R({ref:e},c)),Pe(...a))});return G("content.lazy")?tt(e).pipe(b(n=>n),we(1),v(()=>o)):o}function Wa(e,{target$:t,print$:r}){let o=!0;return S(t.pipe(m(n=>n.closest("details:not([open])")),b(n=>e===n),m(()=>({action:"open",reveal:!0}))),r.pipe(b(n=>n||!o),y(()=>o=e.open),m(n=>({action:n?"open":"close"}))))}function Pn(e,t){return C(()=>{let r=new g;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),Wa(e,t).pipe(y(o=>r.next(o)),L(()=>r.complete()),m(o=>R({ref:e},o)))})}var Rn=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel rect,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel rect{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel,.nodeLabel p{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}a .nodeLabel{text-decoration:underline}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var Br,Da=0;function Va(){return typeof mermaid=="undefined"||mermaid instanceof Element?wt("https://unpkg.com/mermaid@10/dist/mermaid.min.js"):I(void 0)}function In(e){return e.classList.remove("mermaid"),Br||(Br=Va().pipe(y(()=>mermaid.initialize({startOnLoad:!1,themeCSS:Rn,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),m(()=>{}),B(1))),Br.subscribe(()=>ao(this,null,function*(){e.classList.add("mermaid");let t=`__mermaid_${Da++}`,r=E("div",{class:"mermaid"}),o=e.textContent,{svg:n,fn:i}=yield mermaid.render(t,o),a=r.attachShadow({mode:"closed"});a.innerHTML=n,e.replaceWith(r),i==null||i(a)})),Br.pipe(m(()=>({ref:e})))}var Fn=E("table");function jn(e){return e.replaceWith(Fn),Fn.replaceWith(On(e)),I({ref:e})}function Na(e){let t=e.find(r=>r.checked)||e[0];return S(...e.map(r=>d(r,"change").pipe(m(()=>P(`label[for="${r.id}"]`))))).pipe(Q(P(`label[for="${t.id}"]`)),m(r=>({active:r})))}function Wn(e,{viewport$:t,target$:r}){let o=P(".tabbed-labels",e),n=$(":scope > input",e),i=Qr("prev");e.append(i);let a=Qr("next");return e.append(a),C(()=>{let s=new g,p=s.pipe(X(),ne(!0));z([s,ge(e)]).pipe(U(p),Le(1,me)).subscribe({next([{active:c},l]){let f=Ue(c),{width:u}=ce(c);e.style.setProperty("--md-indicator-x",`${f.x}px`),e.style.setProperty("--md-indicator-width",`${u}px`);let h=pr(o);(f.xh.x+l.width)&&o.scrollTo({left:Math.max(0,f.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),z([De(o),ge(o)]).pipe(U(p)).subscribe(([c,l])=>{let f=Tt(o);i.hidden=c.x<16,a.hidden=c.x>f.width-l.width-16}),S(d(i,"click").pipe(m(()=>-1)),d(a,"click").pipe(m(()=>1))).pipe(U(p)).subscribe(c=>{let{width:l}=ce(o);o.scrollBy({left:l*c,behavior:"smooth"})}),r.pipe(U(p),b(c=>n.includes(c))).subscribe(c=>c.click()),o.classList.add("tabbed-labels--linked");for(let c of n){let l=P(`label[for="${c.id}"]`);l.replaceChildren(E("a",{href:`#${l.htmlFor}`,tabIndex:-1},...Array.from(l.childNodes))),d(l.firstElementChild,"click").pipe(U(p),b(f=>!(f.metaKey||f.ctrlKey)),y(f=>{f.preventDefault(),f.stopPropagation()})).subscribe(()=>{history.replaceState({},"",`#${l.htmlFor}`),l.click()})}return G("content.tabs.link")&&s.pipe(Ce(1),ee(t)).subscribe(([{active:c},{offset:l}])=>{let f=c.innerText.trim();if(c.hasAttribute("data-md-switching"))c.removeAttribute("data-md-switching");else{let u=e.offsetTop-l.y;for(let w of $("[data-tabs]"))for(let A of $(":scope > input",w)){let te=P(`label[for="${A.id}"]`);if(te!==c&&te.innerText.trim()===f){te.setAttribute("data-md-switching",""),A.click();break}}window.scrollTo({top:e.offsetTop-u});let h=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([f,...h])])}}),s.pipe(U(p)).subscribe(()=>{for(let c of $("audio, video",e))c.pause()}),tt(e).pipe(v(()=>Na(n)),y(c=>s.next(c)),L(()=>s.complete()),m(c=>R({ref:e},c)))}).pipe(Qe(se))}function Un(e,{viewport$:t,target$:r,print$:o}){return S(...$(".annotate:not(.highlight)",e).map(n=>Cn(n,{target$:r,print$:o})),...$("pre:not(.mermaid) > code",e).map(n=>$n(n,{target$:r,print$:o})),...$("pre.mermaid",e).map(n=>In(n)),...$("table:not([class])",e).map(n=>jn(n)),...$("details",e).map(n=>Pn(n,{target$:r,print$:o})),...$("[data-tabs]",e).map(n=>Wn(n,{viewport$:t,target$:r})),...$("[title]",e).filter(()=>G("content.tooltips")).map(n=>lt(n,{viewport$:t})))}function za(e,{alert$:t}){return t.pipe(v(r=>S(I(!0),I(!1).pipe(Ge(2e3))).pipe(m(o=>({message:r,active:o})))))}function Dn(e,t){let r=P(".md-typeset",e);return C(()=>{let o=new g;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),za(e,t).pipe(y(n=>o.next(n)),L(()=>o.complete()),m(n=>R({ref:e},n)))})}var qa=0;function Qa(e,t){document.body.append(e);let{width:r}=ce(e);e.style.setProperty("--md-tooltip-width",`${r}px`),e.remove();let o=cr(t),n=typeof o!="undefined"?De(o):I({x:0,y:0}),i=S(et(t),kt(t)).pipe(K());return z([i,n]).pipe(m(([a,s])=>{let{x:p,y:c}=Ue(t),l=ce(t),f=t.closest("table");return f&&t.parentElement&&(p+=f.offsetLeft+t.parentElement.offsetLeft,c+=f.offsetTop+t.parentElement.offsetTop),{active:a,offset:{x:p-s.x+l.width/2-r/2,y:c-s.y+l.height+8}}}))}function Vn(e){let t=e.title;if(!t.length)return M;let r=`__tooltip_${qa++}`,o=Pt(r,"inline"),n=P(".md-typeset",o);return n.innerHTML=t,C(()=>{let i=new g;return i.subscribe({next({offset:a}){o.style.setProperty("--md-tooltip-x",`${a.x}px`),o.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){o.style.removeProperty("--md-tooltip-x"),o.style.removeProperty("--md-tooltip-y")}}),S(i.pipe(b(({active:a})=>a)),i.pipe(_e(250),b(({active:a})=>!a))).subscribe({next({active:a}){a?(e.insertAdjacentElement("afterend",o),e.setAttribute("aria-describedby",r),e.removeAttribute("title")):(o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t))},complete(){o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t)}}),i.pipe(Le(16,me)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(ct(125,me),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:a})=>a)).subscribe({next(a){a?o.style.setProperty("--md-tooltip-0",`${-a}px`):o.style.removeProperty("--md-tooltip-0")},complete(){o.style.removeProperty("--md-tooltip-0")}}),Qa(o,e).pipe(y(a=>i.next(a)),L(()=>i.complete()),m(a=>R({ref:e},a)))}).pipe(Qe(se))}function Ka({viewport$:e}){if(!G("header.autohide"))return I(!1);let t=e.pipe(m(({offset:{y:n}})=>n),Ye(2,1),m(([n,i])=>[nMath.abs(i-n.y)>100),m(([,[n]])=>n),K()),o=Ve("search");return z([e,o]).pipe(m(([{offset:n},i])=>n.y>400&&!i),K(),v(n=>n?r:I(!1)),Q(!1))}function Nn(e,t){return C(()=>z([ge(e),Ka(t)])).pipe(m(([{height:r},o])=>({height:r,hidden:o})),K((r,o)=>r.height===o.height&&r.hidden===o.hidden),B(1))}function zn(e,{header$:t,main$:r}){return C(()=>{let o=new g,n=o.pipe(X(),ne(!0));o.pipe(Z("active"),We(t)).subscribe(([{active:a},{hidden:s}])=>{e.classList.toggle("md-header--shadow",a&&!s),e.hidden=s});let i=ue($("[title]",e)).pipe(b(()=>G("content.tooltips")),oe(a=>Vn(a)));return r.subscribe(o),t.pipe(U(n),m(a=>R({ref:e},a)),Pe(i.pipe(U(n))))})}function Ya(e,{viewport$:t,header$:r}){return mr(e,{viewport$:t,header$:r}).pipe(m(({offset:{y:o}})=>{let{height:n}=ce(e);return{active:o>=n}}),Z("active"))}function qn(e,t){return C(()=>{let r=new g;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=fe(".md-content h1");return typeof o=="undefined"?M:Ya(o,t).pipe(y(n=>r.next(n)),L(()=>r.complete()),m(n=>R({ref:e},n)))})}function Qn(e,{viewport$:t,header$:r}){let o=r.pipe(m(({height:i})=>i),K()),n=o.pipe(v(()=>ge(e).pipe(m(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),Z("bottom"))));return z([o,n,t]).pipe(m(([i,{top:a,bottom:s},{offset:{y:p},size:{height:c}}])=>(c=Math.max(0,c-Math.max(0,a-p,i)-Math.max(0,c+p-s)),{offset:a-i,height:c,active:a-i<=p})),K((i,a)=>i.offset===a.offset&&i.height===a.height&&i.active===a.active))}function Ba(e){let t=__md_get("__palette")||{index:e.findIndex(o=>matchMedia(o.getAttribute("data-md-color-media")).matches)},r=Math.max(0,Math.min(t.index,e.length-1));return I(...e).pipe(oe(o=>d(o,"change").pipe(m(()=>o))),Q(e[r]),m(o=>({index:e.indexOf(o),color:{media:o.getAttribute("data-md-color-media"),scheme:o.getAttribute("data-md-color-scheme"),primary:o.getAttribute("data-md-color-primary"),accent:o.getAttribute("data-md-color-accent")}})),B(1))}function Kn(e){let t=$("input",e),r=E("meta",{name:"theme-color"});document.head.appendChild(r);let o=E("meta",{name:"color-scheme"});document.head.appendChild(o);let n=$t("(prefers-color-scheme: light)");return C(()=>{let i=new g;return i.subscribe(a=>{if(document.body.setAttribute("data-md-color-switching",""),a.color.media==="(prefers-color-scheme)"){let s=matchMedia("(prefers-color-scheme: light)"),p=document.querySelector(s.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");a.color.scheme=p.getAttribute("data-md-color-scheme"),a.color.primary=p.getAttribute("data-md-color-primary"),a.color.accent=p.getAttribute("data-md-color-accent")}for(let[s,p]of Object.entries(a.color))document.body.setAttribute(`data-md-color-${s}`,p);for(let s=0;sa.key==="Enter"),ee(i,(a,s)=>s)).subscribe(({index:a})=>{a=(a+1)%t.length,t[a].click(),t[a].focus()}),i.pipe(m(()=>{let a=Se("header"),s=window.getComputedStyle(a);return o.content=s.colorScheme,s.backgroundColor.match(/\d+/g).map(p=>(+p).toString(16).padStart(2,"0")).join("")})).subscribe(a=>r.content=`#${a}`),i.pipe(be(se)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")}),Ba(t).pipe(U(n.pipe(Ce(1))),st(),y(a=>i.next(a)),L(()=>i.complete()),m(a=>R({ref:e},a)))})}function Yn(e,{progress$:t}){return C(()=>{let r=new g;return r.subscribe(({value:o})=>{e.style.setProperty("--md-progress-value",`${o}`)}),t.pipe(y(o=>r.next({value:o})),L(()=>r.complete()),m(o=>({ref:e,value:o})))})}var Gr=Vt(Yr());function Ga(e){e.setAttribute("data-md-copying","");let t=e.closest("[data-copy]"),r=t?t.getAttribute("data-copy"):e.innerText;return e.removeAttribute("data-md-copying"),r.trimEnd()}function Bn({alert$:e}){Gr.default.isSupported()&&new F(t=>{new Gr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||Ga(P(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(y(t=>{t.trigger.focus()}),m(()=>ye("clipboard.copied"))).subscribe(e)}function Gn(e,t){return e.protocol=t.protocol,e.hostname=t.hostname,e}function Ja(e,t){let r=new Map;for(let o of $("url",e)){let n=P("loc",o),i=[Gn(new URL(n.textContent),t)];r.set(`${i[0]}`,i);for(let a of $("[rel=alternate]",o)){let s=a.getAttribute("href");s!=null&&i.push(Gn(new URL(s),t))}}return r}function ur(e){return mn(new URL("sitemap.xml",e)).pipe(m(t=>Ja(t,new URL(e))),ve(()=>I(new Map)))}function Xa(e,t){if(!(e.target instanceof Element))return M;let r=e.target.closest("a");if(r===null)return M;if(r.target||e.metaKey||e.ctrlKey)return M;let o=new URL(r.href);return o.search=o.hash="",t.has(`${o}`)?(e.preventDefault(),I(new URL(r.href))):M}function Jn(e){let t=new Map;for(let r of $(":scope > *",e.head))t.set(r.outerHTML,r);return t}function Xn(e){for(let t of $("[href], [src]",e))for(let r of["href","src"]){let o=t.getAttribute(r);if(o&&!/^(?:[a-z]+:)?\/\//i.test(o)){t[r]=t[r];break}}return I(e)}function Za(e){for(let o of["[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...G("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let n=fe(o),i=fe(o,e);typeof n!="undefined"&&typeof i!="undefined"&&n.replaceWith(i)}let t=Jn(document);for(let[o,n]of Jn(e))t.has(o)?t.delete(o):document.head.appendChild(n);for(let o of t.values()){let n=o.getAttribute("name");n!=="theme-color"&&n!=="color-scheme"&&o.remove()}let r=Se("container");return je($("script",r)).pipe(v(o=>{let n=e.createElement("script");if(o.src){for(let i of o.getAttributeNames())n.setAttribute(i,o.getAttribute(i));return o.replaceWith(n),new F(i=>{n.onload=()=>i.complete()})}else return n.textContent=o.textContent,o.replaceWith(n),M}),X(),ne(document))}function Zn({location$:e,viewport$:t,progress$:r}){let o=Te();if(location.protocol==="file:")return M;let n=ur(o.base);I(document).subscribe(Xn);let i=d(document.body,"click").pipe(We(n),v(([p,c])=>Xa(p,c)),pe()),a=d(window,"popstate").pipe(m(xe),pe());i.pipe(ee(t)).subscribe(([p,{offset:c}])=>{history.replaceState(c,""),history.pushState(null,"",p)}),S(i,a).subscribe(e);let s=e.pipe(Z("pathname"),v(p=>ln(p,{progress$:r}).pipe(ve(()=>(pt(p,!0),M)))),v(Xn),v(Za),pe());return S(s.pipe(ee(e,(p,c)=>c)),e.pipe(Z("pathname"),v(()=>e),Z("hash")),e.pipe(K((p,c)=>p.pathname===c.pathname&&p.hash===c.hash),v(()=>i),y(()=>history.back()))).subscribe(p=>{var c,l;history.state!==null||!p.hash?window.scrollTo(0,(l=(c=history.state)==null?void 0:c.y)!=null?l:0):(history.scrollRestoration="auto",sn(p.hash),history.scrollRestoration="manual")}),e.subscribe(()=>{history.scrollRestoration="manual"}),d(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),t.pipe(Z("offset"),_e(100)).subscribe(({offset:p})=>{history.replaceState(p,"")}),s}var ri=Vt(ti());function oi(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,a)=>`${i}${a}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return a=>(0,ri.default)(a).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function It(e){return e.type===1}function dr(e){return e.type===3}function ni(e,t){let r=vn(e);return S(I(location.protocol!=="file:"),Ve("search")).pipe(Ae(o=>o),v(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:G("search.suggest")}}})),r}function ii({document$:e}){let t=Te(),r=Ne(new URL("../versions.json",t.base)).pipe(ve(()=>M)),o=r.pipe(m(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:a,aliases:s})=>a===i||s.includes(i))||n[0]}));r.pipe(m(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),v(n=>d(document.body,"click").pipe(b(i=>!i.metaKey&&!i.ctrlKey),ee(o),v(([i,a])=>{if(i.target instanceof Element){let s=i.target.closest("a");if(s&&!s.target&&n.has(s.href)){let p=s.href;return!i.target.closest(".md-version")&&n.get(p)===a?M:(i.preventDefault(),I(p))}}return M}),v(i=>ur(new URL(i)).pipe(m(a=>{let p=xe().href.replace(t.base,i);return a.has(p.split("#")[0])?new URL(p):new URL(i)})))))).subscribe(n=>pt(n,!0)),z([r,o]).subscribe(([n,i])=>{P(".md-header__topic").appendChild(Mn(n,i))}),e.pipe(v(()=>o)).subscribe(n=>{var a;let i=__md_get("__outdated",sessionStorage);if(i===null){i=!0;let s=((a=t.version)==null?void 0:a.default)||"latest";Array.isArray(s)||(s=[s]);e:for(let p of s)for(let c of n.aliases.concat(n.version))if(new RegExp(p,"i").test(c)){i=!1;break e}__md_set("__outdated",i,sessionStorage)}if(i)for(let s of ae("outdated"))s.hidden=!1})}function ns(e,{worker$:t}){let{searchParams:r}=xe();r.has("q")&&(Je("search",!0),e.value=r.get("q"),e.focus(),Ve("search").pipe(Ae(i=>!i)).subscribe(()=>{let i=xe();i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=et(e),n=S(t.pipe(Ae(It)),d(e,"keyup"),o).pipe(m(()=>e.value),K());return z([n,o]).pipe(m(([i,a])=>({value:i,focus:a})),B(1))}function ai(e,{worker$:t}){let r=new g,o=r.pipe(X(),ne(!0));z([t.pipe(Ae(It)),r],(i,a)=>a).pipe(Z("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(Z("focus")).subscribe(({focus:i})=>{i&&Je("search",i)}),d(e.form,"reset").pipe(U(o)).subscribe(()=>e.focus());let n=P("header [for=__search]");return d(n,"click").subscribe(()=>e.focus()),ns(e,{worker$:t}).pipe(y(i=>r.next(i)),L(()=>r.complete()),m(i=>R({ref:e},i)),B(1))}function si(e,{worker$:t,query$:r}){let o=new g,n=tn(e.parentElement).pipe(b(Boolean)),i=e.parentElement,a=P(":scope > :first-child",e),s=P(":scope > :last-child",e);Ve("search").subscribe(l=>s.setAttribute("role",l?"list":"presentation")),o.pipe(ee(r),Ur(t.pipe(Ae(It)))).subscribe(([{items:l},{value:f}])=>{switch(l.length){case 0:a.textContent=f.length?ye("search.result.none"):ye("search.result.placeholder");break;case 1:a.textContent=ye("search.result.one");break;default:let u=sr(l.length);a.textContent=ye("search.result.other",u)}});let p=o.pipe(y(()=>s.innerHTML=""),v(({items:l})=>S(I(...l.slice(0,10)),I(...l.slice(10)).pipe(Ye(4),Vr(n),v(([f])=>f)))),m(Tn),pe());return p.subscribe(l=>s.appendChild(l)),p.pipe(oe(l=>{let f=fe("details",l);return typeof f=="undefined"?M:d(f,"toggle").pipe(U(o),m(()=>f))})).subscribe(l=>{l.open===!1&&l.offsetTop<=i.scrollTop&&i.scrollTo({top:l.offsetTop})}),t.pipe(b(dr),m(({data:l})=>l)).pipe(y(l=>o.next(l)),L(()=>o.complete()),m(l=>R({ref:e},l)))}function is(e,{query$:t}){return t.pipe(m(({value:r})=>{let o=xe();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function ci(e,t){let r=new g,o=r.pipe(X(),ne(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),d(e,"click").pipe(U(o)).subscribe(n=>n.preventDefault()),is(e,t).pipe(y(n=>r.next(n)),L(()=>r.complete()),m(n=>R({ref:e},n)))}function pi(e,{worker$:t,keyboard$:r}){let o=new g,n=Se("search-query"),i=S(d(n,"keydown"),d(n,"focus")).pipe(be(se),m(()=>n.value),K());return o.pipe(We(i),m(([{suggest:s},p])=>{let c=p.split(/([\s-]+)/);if(s!=null&&s.length&&c[c.length-1]){let l=s[s.length-1];l.startsWith(c[c.length-1])&&(c[c.length-1]=l)}else c.length=0;return c})).subscribe(s=>e.innerHTML=s.join("").replace(/\s/g," ")),r.pipe(b(({mode:s})=>s==="search")).subscribe(s=>{switch(s.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(b(dr),m(({data:s})=>s)).pipe(y(s=>o.next(s)),L(()=>o.complete()),m(()=>({ref:e})))}function li(e,{index$:t,keyboard$:r}){let o=Te();try{let n=ni(o.search,t),i=Se("search-query",e),a=Se("search-result",e);d(e,"click").pipe(b(({target:p})=>p instanceof Element&&!!p.closest("a"))).subscribe(()=>Je("search",!1)),r.pipe(b(({mode:p})=>p==="search")).subscribe(p=>{let c=Re();switch(p.type){case"Enter":if(c===i){let l=new Map;for(let f of $(":first-child [href]",a)){let u=f.firstElementChild;l.set(f,parseFloat(u.getAttribute("data-md-score")))}if(l.size){let[[f]]=[...l].sort(([,u],[,h])=>h-u);f.click()}p.claim()}break;case"Escape":case"Tab":Je("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof c=="undefined")i.focus();else{let l=[i,...$(":not(details) > [href], summary, details[open] [href]",a)],f=Math.max(0,(Math.max(0,l.indexOf(c))+l.length+(p.type==="ArrowUp"?-1:1))%l.length);l[f].focus()}p.claim();break;default:i!==Re()&&i.focus()}}),r.pipe(b(({mode:p})=>p==="global")).subscribe(p=>{switch(p.type){case"f":case"s":case"/":i.focus(),i.select(),p.claim();break}});let s=ai(i,{worker$:n});return S(s,si(a,{worker$:n,query$:s})).pipe(Pe(...ae("search-share",e).map(p=>ci(p,{query$:s})),...ae("search-suggest",e).map(p=>pi(p,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,Ke}}function mi(e,{index$:t,location$:r}){return z([t,r.pipe(Q(xe()),b(o=>!!o.searchParams.get("h")))]).pipe(m(([o,n])=>oi(o.config)(n.searchParams.get("h"))),m(o=>{var a;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let s=i.nextNode();s;s=i.nextNode())if((a=s.parentElement)!=null&&a.offsetHeight){let p=s.textContent,c=o(p);c.length>p.length&&n.set(s,c)}for(let[s,p]of n){let{childNodes:c}=E("span",null,p);s.replaceWith(...Array.from(c))}return{ref:e,nodes:n}}))}function as(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return z([r,t]).pipe(m(([{offset:i,height:a},{offset:{y:s}}])=>(a=a+Math.min(n,Math.max(0,s-i))-n,{height:a,locked:s>=i+n})),K((i,a)=>i.height===a.height&&i.locked===a.locked))}function Jr(e,o){var n=o,{header$:t}=n,r=io(n,["header$"]);let i=P(".md-sidebar__scrollwrap",e),{y:a}=Ue(i);return C(()=>{let s=new g,p=s.pipe(X(),ne(!0)),c=s.pipe(Le(0,me));return c.pipe(ee(t)).subscribe({next([{height:l},{height:f}]){i.style.height=`${l-2*a}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),c.pipe(Ae()).subscribe(()=>{for(let l of $(".md-nav__link--active[href]",e)){if(!l.clientHeight)continue;let f=l.closest(".md-sidebar__scrollwrap");if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:h}=ce(f);f.scrollTo({top:u-h/2})}}}),ue($("label[tabindex]",e)).pipe(oe(l=>d(l,"click").pipe(be(se),m(()=>l),U(p)))).subscribe(l=>{let f=P(`[id="${l.htmlFor}"]`);P(`[aria-labelledby="${l.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),as(e,r).pipe(y(l=>s.next(l)),L(()=>s.complete()),m(l=>R({ref:e},l)))})}function fi(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return Ct(Ne(`${r}/releases/latest`).pipe(ve(()=>M),m(o=>({version:o.tag_name})),Be({})),Ne(r).pipe(ve(()=>M),m(o=>({stars:o.stargazers_count,forks:o.forks_count})),Be({}))).pipe(m(([o,n])=>R(R({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return Ne(r).pipe(m(o=>({repositories:o.public_repos})),Be({}))}}function ui(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return Ne(r).pipe(ve(()=>M),m(({star_count:o,forks_count:n})=>({stars:o,forks:n})),Be({}))}function di(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return fi(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return ui(r,o)}return M}var ss;function cs(e){return ss||(ss=C(()=>{let t=__md_get("__source",sessionStorage);if(t)return I(t);if(ae("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return M}return di(e.href).pipe(y(o=>__md_set("__source",o,sessionStorage)))}).pipe(ve(()=>M),b(t=>Object.keys(t).length>0),m(t=>({facts:t})),B(1)))}function hi(e){let t=P(":scope > :last-child",e);return C(()=>{let r=new g;return r.subscribe(({facts:o})=>{t.appendChild(Sn(o)),t.classList.add("md-source__repository--active")}),cs(e).pipe(y(o=>r.next(o)),L(()=>r.complete()),m(o=>R({ref:e},o)))})}function ps(e,{viewport$:t,header$:r}){return ge(document.body).pipe(v(()=>mr(e,{header$:r,viewport$:t})),m(({offset:{y:o}})=>({hidden:o>=10})),Z("hidden"))}function bi(e,t){return C(()=>{let r=new g;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(G("navigation.tabs.sticky")?I({hidden:!1}):ps(e,t)).pipe(y(o=>r.next(o)),L(()=>r.complete()),m(o=>R({ref:e},o)))})}function ls(e,{viewport$:t,header$:r}){let o=new Map,n=$(".md-nav__link",e);for(let s of n){let p=decodeURIComponent(s.hash.substring(1)),c=fe(`[id="${p}"]`);typeof c!="undefined"&&o.set(s,c)}let i=r.pipe(Z("height"),m(({height:s})=>{let p=Se("main"),c=P(":scope > :first-child",p);return s+.8*(c.offsetTop-p.offsetTop)}),pe());return ge(document.body).pipe(Z("height"),v(s=>C(()=>{let p=[];return I([...o].reduce((c,[l,f])=>{for(;p.length&&o.get(p[p.length-1]).tagName>=f.tagName;)p.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let h=f.offsetParent;for(;h;h=h.offsetParent)u+=h.offsetTop;return c.set([...p=[...p,l]].reverse(),u)},new Map))}).pipe(m(p=>new Map([...p].sort(([,c],[,l])=>c-l))),We(i),v(([p,c])=>t.pipe(jr(([l,f],{offset:{y:u},size:h})=>{let w=u+h.height>=Math.floor(s.height);for(;f.length;){let[,A]=f[0];if(A-c=u&&!w)f=[l.pop(),...f];else break}return[l,f]},[[],[...p]]),K((l,f)=>l[0]===f[0]&&l[1]===f[1])))))).pipe(m(([s,p])=>({prev:s.map(([c])=>c),next:p.map(([c])=>c)})),Q({prev:[],next:[]}),Ye(2,1),m(([s,p])=>s.prev.length{let i=new g,a=i.pipe(X(),ne(!0));if(i.subscribe(({prev:s,next:p})=>{for(let[c]of p)c.classList.remove("md-nav__link--passed"),c.classList.remove("md-nav__link--active");for(let[c,[l]]of s.entries())l.classList.add("md-nav__link--passed"),l.classList.toggle("md-nav__link--active",c===s.length-1)}),G("toc.follow")){let s=S(t.pipe(_e(1),m(()=>{})),t.pipe(_e(250),m(()=>"smooth")));i.pipe(b(({prev:p})=>p.length>0),We(o.pipe(be(se))),ee(s)).subscribe(([[{prev:p}],c])=>{let[l]=p[p.length-1];if(l.offsetHeight){let f=cr(l);if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:h}=ce(f);f.scrollTo({top:u-h/2,behavior:c})}}})}return G("navigation.tracking")&&t.pipe(U(a),Z("offset"),_e(250),Ce(1),U(n.pipe(Ce(1))),st({delay:250}),ee(i)).subscribe(([,{prev:s}])=>{let p=xe(),c=s[s.length-1];if(c&&c.length){let[l]=c,{hash:f}=new URL(l.href);p.hash!==f&&(p.hash=f,history.replaceState({},"",`${p}`))}else p.hash="",history.replaceState({},"",`${p}`)}),ls(e,{viewport$:t,header$:r}).pipe(y(s=>i.next(s)),L(()=>i.complete()),m(s=>R({ref:e},s)))})}function ms(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(m(({offset:{y:a}})=>a),Ye(2,1),m(([a,s])=>a>s&&s>0),K()),i=r.pipe(m(({active:a})=>a));return z([i,n]).pipe(m(([a,s])=>!(a&&s)),K(),U(o.pipe(Ce(1))),ne(!0),st({delay:250}),m(a=>({hidden:a})))}function gi(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new g,a=i.pipe(X(),ne(!0));return i.subscribe({next({hidden:s}){e.hidden=s,s?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(U(a),Z("height")).subscribe(({height:s})=>{e.style.top=`${s+16}px`}),d(e,"click").subscribe(s=>{s.preventDefault(),window.scrollTo({top:0})}),ms(e,{viewport$:t,main$:o,target$:n}).pipe(y(s=>i.next(s)),L(()=>i.complete()),m(s=>R({ref:e},s)))}function xi({document$:e,viewport$:t}){e.pipe(v(()=>$(".md-ellipsis")),oe(r=>tt(r).pipe(U(e.pipe(Ce(1))),b(o=>o),m(()=>r),we(1))),b(r=>r.offsetWidth{let o=r.innerText,n=r.closest("a")||r;return n.title=o,lt(n,{viewport$:t}).pipe(U(e.pipe(Ce(1))),L(()=>n.removeAttribute("title")))})).subscribe(),e.pipe(v(()=>$(".md-status")),oe(r=>lt(r,{viewport$:t}))).subscribe()}function yi({document$:e,tablet$:t}){e.pipe(v(()=>$(".md-toggle--indeterminate")),y(r=>{r.indeterminate=!0,r.checked=!1}),oe(r=>d(r,"change").pipe(Dr(()=>r.classList.contains("md-toggle--indeterminate")),m(()=>r))),ee(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function fs(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function Ei({document$:e}){e.pipe(v(()=>$("[data-md-scrollfix]")),y(t=>t.removeAttribute("data-md-scrollfix")),b(fs),oe(t=>d(t,"touchstart").pipe(m(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function wi({viewport$:e,tablet$:t}){z([Ve("search"),t]).pipe(m(([r,o])=>r&&!o),v(r=>I(r).pipe(Ge(r?400:100))),ee(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function us(){return location.protocol==="file:"?wt(`${new URL("search/search_index.js",Xr.base)}`).pipe(m(()=>__index),B(1)):Ne(new URL("search/search_index.json",Xr.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var ot=Yo(),jt=nn(),Ot=cn(jt),Zr=on(),Oe=bn(),hr=$t("(min-width: 960px)"),Si=$t("(min-width: 1220px)"),Oi=pn(),Xr=Te(),Mi=document.forms.namedItem("search")?us():Ke,eo=new g;Bn({alert$:eo});var to=new g;G("navigation.instant")&&Zn({location$:jt,viewport$:Oe,progress$:to}).subscribe(ot);var Ti;((Ti=Xr.version)==null?void 0:Ti.provider)==="mike"&&ii({document$:ot});S(jt,Ot).pipe(Ge(125)).subscribe(()=>{Je("drawer",!1),Je("search",!1)});Zr.pipe(b(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=fe("link[rel=prev]");typeof t!="undefined"&&pt(t);break;case"n":case".":let r=fe("link[rel=next]");typeof r!="undefined"&&pt(r);break;case"Enter":let o=Re();o instanceof HTMLLabelElement&&o.click()}});xi({viewport$:Oe,document$:ot});yi({document$:ot,tablet$:hr});Ei({document$:ot});wi({viewport$:Oe,tablet$:hr});var rt=Nn(Se("header"),{viewport$:Oe}),Ft=ot.pipe(m(()=>Se("main")),v(e=>Qn(e,{viewport$:Oe,header$:rt})),B(1)),ds=S(...ae("consent").map(e=>xn(e,{target$:Ot})),...ae("dialog").map(e=>Dn(e,{alert$:eo})),...ae("header").map(e=>zn(e,{viewport$:Oe,header$:rt,main$:Ft})),...ae("palette").map(e=>Kn(e)),...ae("progress").map(e=>Yn(e,{progress$:to})),...ae("search").map(e=>li(e,{index$:Mi,keyboard$:Zr})),...ae("source").map(e=>hi(e))),hs=C(()=>S(...ae("announce").map(e=>gn(e)),...ae("content").map(e=>Un(e,{viewport$:Oe,target$:Ot,print$:Oi})),...ae("content").map(e=>G("search.highlight")?mi(e,{index$:Mi,location$:jt}):M),...ae("header-title").map(e=>qn(e,{viewport$:Oe,header$:rt})),...ae("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?Nr(Si,()=>Jr(e,{viewport$:Oe,header$:rt,main$:Ft})):Nr(hr,()=>Jr(e,{viewport$:Oe,header$:rt,main$:Ft}))),...ae("tabs").map(e=>bi(e,{viewport$:Oe,header$:rt})),...ae("toc").map(e=>vi(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Ot})),...ae("top").map(e=>gi(e,{viewport$:Oe,header$:rt,main$:Ft,target$:Ot})))),Li=ot.pipe(v(()=>hs),Pe(ds),B(1));Li.subscribe();window.document$=ot;window.location$=jt;window.target$=Ot;window.keyboard$=Zr;window.viewport$=Oe;window.tablet$=hr;window.screen$=Si;window.print$=Oi;window.alert$=eo;window.progress$=to;window.component$=Li;})(); +//# sourceMappingURL=bundle.5cfa9459.min.js.map + diff --git a/assets/javascripts/bundle.5cfa9459.min.js.map b/assets/javascripts/bundle.5cfa9459.min.js.map new file mode 100644 index 00000000..c1f9b4cc --- /dev/null +++ b/assets/javascripts/bundle.5cfa9459.min.js.map @@ -0,0 +1,7 @@ +{ + "version": 3, + "sources": ["node_modules/focus-visible/dist/focus-visible.js", "node_modules/clipboard/dist/clipboard.js", "node_modules/escape-html/index.js", "src/templates/assets/javascripts/bundle.ts", "node_modules/rxjs/node_modules/tslib/tslib.es6.js", "node_modules/rxjs/src/internal/util/isFunction.ts", "node_modules/rxjs/src/internal/util/createErrorClass.ts", "node_modules/rxjs/src/internal/util/UnsubscriptionError.ts", "node_modules/rxjs/src/internal/util/arrRemove.ts", "node_modules/rxjs/src/internal/Subscription.ts", "node_modules/rxjs/src/internal/config.ts", "node_modules/rxjs/src/internal/scheduler/timeoutProvider.ts", "node_modules/rxjs/src/internal/util/reportUnhandledError.ts", "node_modules/rxjs/src/internal/util/noop.ts", "node_modules/rxjs/src/internal/NotificationFactories.ts", "node_modules/rxjs/src/internal/util/errorContext.ts", "node_modules/rxjs/src/internal/Subscriber.ts", "node_modules/rxjs/src/internal/symbol/observable.ts", "node_modules/rxjs/src/internal/util/identity.ts", "node_modules/rxjs/src/internal/util/pipe.ts", "node_modules/rxjs/src/internal/Observable.ts", "node_modules/rxjs/src/internal/util/lift.ts", "node_modules/rxjs/src/internal/operators/OperatorSubscriber.ts", "node_modules/rxjs/src/internal/scheduler/animationFrameProvider.ts", "node_modules/rxjs/src/internal/util/ObjectUnsubscribedError.ts", "node_modules/rxjs/src/internal/Subject.ts", "node_modules/rxjs/src/internal/BehaviorSubject.ts", "node_modules/rxjs/src/internal/scheduler/dateTimestampProvider.ts", "node_modules/rxjs/src/internal/ReplaySubject.ts", "node_modules/rxjs/src/internal/scheduler/Action.ts", "node_modules/rxjs/src/internal/scheduler/intervalProvider.ts", "node_modules/rxjs/src/internal/scheduler/AsyncAction.ts", "node_modules/rxjs/src/internal/Scheduler.ts", "node_modules/rxjs/src/internal/scheduler/AsyncScheduler.ts", "node_modules/rxjs/src/internal/scheduler/async.ts", "node_modules/rxjs/src/internal/scheduler/QueueAction.ts", "node_modules/rxjs/src/internal/scheduler/QueueScheduler.ts", "node_modules/rxjs/src/internal/scheduler/queue.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameAction.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameScheduler.ts", "node_modules/rxjs/src/internal/scheduler/animationFrame.ts", "node_modules/rxjs/src/internal/observable/empty.ts", "node_modules/rxjs/src/internal/util/isScheduler.ts", "node_modules/rxjs/src/internal/util/args.ts", "node_modules/rxjs/src/internal/util/isArrayLike.ts", "node_modules/rxjs/src/internal/util/isPromise.ts", "node_modules/rxjs/src/internal/util/isInteropObservable.ts", "node_modules/rxjs/src/internal/util/isAsyncIterable.ts", "node_modules/rxjs/src/internal/util/throwUnobservableError.ts", "node_modules/rxjs/src/internal/symbol/iterator.ts", "node_modules/rxjs/src/internal/util/isIterable.ts", "node_modules/rxjs/src/internal/util/isReadableStreamLike.ts", "node_modules/rxjs/src/internal/observable/innerFrom.ts", "node_modules/rxjs/src/internal/util/executeSchedule.ts", "node_modules/rxjs/src/internal/operators/observeOn.ts", "node_modules/rxjs/src/internal/operators/subscribeOn.ts", "node_modules/rxjs/src/internal/scheduled/scheduleObservable.ts", "node_modules/rxjs/src/internal/scheduled/schedulePromise.ts", "node_modules/rxjs/src/internal/scheduled/scheduleArray.ts", "node_modules/rxjs/src/internal/scheduled/scheduleIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleAsyncIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleReadableStreamLike.ts", "node_modules/rxjs/src/internal/scheduled/scheduled.ts", "node_modules/rxjs/src/internal/observable/from.ts", "node_modules/rxjs/src/internal/observable/of.ts", "node_modules/rxjs/src/internal/observable/throwError.ts", "node_modules/rxjs/src/internal/util/EmptyError.ts", "node_modules/rxjs/src/internal/util/isDate.ts", "node_modules/rxjs/src/internal/operators/map.ts", "node_modules/rxjs/src/internal/util/mapOneOrManyArgs.ts", "node_modules/rxjs/src/internal/util/argsArgArrayOrObject.ts", "node_modules/rxjs/src/internal/util/createObject.ts", "node_modules/rxjs/src/internal/observable/combineLatest.ts", "node_modules/rxjs/src/internal/operators/mergeInternals.ts", "node_modules/rxjs/src/internal/operators/mergeMap.ts", "node_modules/rxjs/src/internal/operators/mergeAll.ts", "node_modules/rxjs/src/internal/operators/concatAll.ts", "node_modules/rxjs/src/internal/observable/concat.ts", "node_modules/rxjs/src/internal/observable/defer.ts", "node_modules/rxjs/src/internal/observable/fromEvent.ts", "node_modules/rxjs/src/internal/observable/fromEventPattern.ts", "node_modules/rxjs/src/internal/observable/timer.ts", "node_modules/rxjs/src/internal/observable/merge.ts", "node_modules/rxjs/src/internal/observable/never.ts", "node_modules/rxjs/src/internal/util/argsOrArgArray.ts", "node_modules/rxjs/src/internal/operators/filter.ts", "node_modules/rxjs/src/internal/observable/zip.ts", "node_modules/rxjs/src/internal/operators/audit.ts", "node_modules/rxjs/src/internal/operators/auditTime.ts", "node_modules/rxjs/src/internal/operators/bufferCount.ts", "node_modules/rxjs/src/internal/operators/catchError.ts", "node_modules/rxjs/src/internal/operators/scanInternals.ts", "node_modules/rxjs/src/internal/operators/combineLatest.ts", "node_modules/rxjs/src/internal/operators/combineLatestWith.ts", "node_modules/rxjs/src/internal/operators/debounce.ts", "node_modules/rxjs/src/internal/operators/debounceTime.ts", "node_modules/rxjs/src/internal/operators/defaultIfEmpty.ts", "node_modules/rxjs/src/internal/operators/take.ts", "node_modules/rxjs/src/internal/operators/ignoreElements.ts", "node_modules/rxjs/src/internal/operators/mapTo.ts", "node_modules/rxjs/src/internal/operators/delayWhen.ts", "node_modules/rxjs/src/internal/operators/delay.ts", "node_modules/rxjs/src/internal/operators/distinctUntilChanged.ts", "node_modules/rxjs/src/internal/operators/distinctUntilKeyChanged.ts", "node_modules/rxjs/src/internal/operators/throwIfEmpty.ts", "node_modules/rxjs/src/internal/operators/endWith.ts", "node_modules/rxjs/src/internal/operators/finalize.ts", "node_modules/rxjs/src/internal/operators/first.ts", "node_modules/rxjs/src/internal/operators/takeLast.ts", "node_modules/rxjs/src/internal/operators/merge.ts", "node_modules/rxjs/src/internal/operators/mergeWith.ts", "node_modules/rxjs/src/internal/operators/repeat.ts", "node_modules/rxjs/src/internal/operators/scan.ts", "node_modules/rxjs/src/internal/operators/share.ts", "node_modules/rxjs/src/internal/operators/shareReplay.ts", "node_modules/rxjs/src/internal/operators/skip.ts", "node_modules/rxjs/src/internal/operators/skipUntil.ts", "node_modules/rxjs/src/internal/operators/startWith.ts", "node_modules/rxjs/src/internal/operators/switchMap.ts", "node_modules/rxjs/src/internal/operators/takeUntil.ts", "node_modules/rxjs/src/internal/operators/takeWhile.ts", "node_modules/rxjs/src/internal/operators/tap.ts", "node_modules/rxjs/src/internal/operators/throttle.ts", "node_modules/rxjs/src/internal/operators/throttleTime.ts", "node_modules/rxjs/src/internal/operators/withLatestFrom.ts", "node_modules/rxjs/src/internal/operators/zip.ts", "node_modules/rxjs/src/internal/operators/zipWith.ts", "src/templates/assets/javascripts/browser/document/index.ts", "src/templates/assets/javascripts/browser/element/_/index.ts", "src/templates/assets/javascripts/browser/element/focus/index.ts", "src/templates/assets/javascripts/browser/element/hover/index.ts", "src/templates/assets/javascripts/utilities/h/index.ts", "src/templates/assets/javascripts/utilities/round/index.ts", "src/templates/assets/javascripts/browser/script/index.ts", "src/templates/assets/javascripts/browser/element/size/_/index.ts", "src/templates/assets/javascripts/browser/element/size/content/index.ts", "src/templates/assets/javascripts/browser/element/offset/_/index.ts", "src/templates/assets/javascripts/browser/element/offset/content/index.ts", "src/templates/assets/javascripts/browser/element/visibility/index.ts", "src/templates/assets/javascripts/browser/toggle/index.ts", "src/templates/assets/javascripts/browser/keyboard/index.ts", "src/templates/assets/javascripts/browser/location/_/index.ts", "src/templates/assets/javascripts/browser/location/hash/index.ts", "src/templates/assets/javascripts/browser/media/index.ts", "src/templates/assets/javascripts/browser/request/index.ts", "src/templates/assets/javascripts/browser/viewport/offset/index.ts", "src/templates/assets/javascripts/browser/viewport/size/index.ts", "src/templates/assets/javascripts/browser/viewport/_/index.ts", "src/templates/assets/javascripts/browser/viewport/at/index.ts", "src/templates/assets/javascripts/browser/worker/index.ts", "src/templates/assets/javascripts/_/index.ts", "src/templates/assets/javascripts/components/_/index.ts", "src/templates/assets/javascripts/components/announce/index.ts", "src/templates/assets/javascripts/components/consent/index.ts", "src/templates/assets/javascripts/templates/tooltip/index.tsx", "src/templates/assets/javascripts/templates/annotation/index.tsx", "src/templates/assets/javascripts/templates/clipboard/index.tsx", "src/templates/assets/javascripts/templates/search/index.tsx", "src/templates/assets/javascripts/templates/source/index.tsx", "src/templates/assets/javascripts/templates/tabbed/index.tsx", "src/templates/assets/javascripts/templates/table/index.tsx", "src/templates/assets/javascripts/templates/version/index.tsx", "src/templates/assets/javascripts/components/tooltip2/index.ts", "src/templates/assets/javascripts/components/content/annotation/_/index.ts", "src/templates/assets/javascripts/components/content/annotation/list/index.ts", "src/templates/assets/javascripts/components/content/annotation/block/index.ts", "src/templates/assets/javascripts/components/content/code/_/index.ts", "src/templates/assets/javascripts/components/content/details/index.ts", "src/templates/assets/javascripts/components/content/mermaid/index.css", "src/templates/assets/javascripts/components/content/mermaid/index.ts", "src/templates/assets/javascripts/components/content/table/index.ts", "src/templates/assets/javascripts/components/content/tabs/index.ts", "src/templates/assets/javascripts/components/content/_/index.ts", "src/templates/assets/javascripts/components/dialog/index.ts", "src/templates/assets/javascripts/components/tooltip/index.ts", "src/templates/assets/javascripts/components/header/_/index.ts", "src/templates/assets/javascripts/components/header/title/index.ts", "src/templates/assets/javascripts/components/main/index.ts", "src/templates/assets/javascripts/components/palette/index.ts", "src/templates/assets/javascripts/components/progress/index.ts", "src/templates/assets/javascripts/integrations/clipboard/index.ts", "src/templates/assets/javascripts/integrations/sitemap/index.ts", "src/templates/assets/javascripts/integrations/instant/index.ts", "src/templates/assets/javascripts/integrations/search/highlighter/index.ts", "src/templates/assets/javascripts/integrations/search/worker/message/index.ts", "src/templates/assets/javascripts/integrations/search/worker/_/index.ts", "src/templates/assets/javascripts/integrations/version/index.ts", "src/templates/assets/javascripts/components/search/query/index.ts", "src/templates/assets/javascripts/components/search/result/index.ts", "src/templates/assets/javascripts/components/search/share/index.ts", "src/templates/assets/javascripts/components/search/suggest/index.ts", "src/templates/assets/javascripts/components/search/_/index.ts", "src/templates/assets/javascripts/components/search/highlight/index.ts", "src/templates/assets/javascripts/components/sidebar/index.ts", "src/templates/assets/javascripts/components/source/facts/github/index.ts", "src/templates/assets/javascripts/components/source/facts/gitlab/index.ts", "src/templates/assets/javascripts/components/source/facts/_/index.ts", "src/templates/assets/javascripts/components/source/_/index.ts", "src/templates/assets/javascripts/components/tabs/index.ts", "src/templates/assets/javascripts/components/toc/index.ts", "src/templates/assets/javascripts/components/top/index.ts", "src/templates/assets/javascripts/patches/ellipsis/index.ts", "src/templates/assets/javascripts/patches/indeterminate/index.ts", "src/templates/assets/javascripts/patches/scrollfix/index.ts", "src/templates/assets/javascripts/patches/scrolllock/index.ts", "src/templates/assets/javascripts/polyfills/index.ts"], + "sourcesContent": ["(function (global, factory) {\n typeof exports === 'object' && typeof module !== 'undefined' ? factory() :\n typeof define === 'function' && define.amd ? define(factory) :\n (factory());\n}(this, (function () { 'use strict';\n\n /**\n * Applies the :focus-visible polyfill at the given scope.\n * A scope in this case is either the top-level Document or a Shadow Root.\n *\n * @param {(Document|ShadowRoot)} scope\n * @see https://github.com/WICG/focus-visible\n */\n function applyFocusVisiblePolyfill(scope) {\n var hadKeyboardEvent = true;\n var hadFocusVisibleRecently = false;\n var hadFocusVisibleRecentlyTimeout = null;\n\n var inputTypesAllowlist = {\n text: true,\n search: true,\n url: true,\n tel: true,\n email: true,\n password: true,\n number: true,\n date: true,\n month: true,\n week: true,\n time: true,\n datetime: true,\n 'datetime-local': true\n };\n\n /**\n * Helper function for legacy browsers and iframes which sometimes focus\n * elements like document, body, and non-interactive SVG.\n * @param {Element} el\n */\n function isValidFocusTarget(el) {\n if (\n el &&\n el !== document &&\n el.nodeName !== 'HTML' &&\n el.nodeName !== 'BODY' &&\n 'classList' in el &&\n 'contains' in el.classList\n ) {\n return true;\n }\n return false;\n }\n\n /**\n * Computes whether the given element should automatically trigger the\n * `focus-visible` class being added, i.e. whether it should always match\n * `:focus-visible` when focused.\n * @param {Element} el\n * @return {boolean}\n */\n function focusTriggersKeyboardModality(el) {\n var type = el.type;\n var tagName = el.tagName;\n\n if (tagName === 'INPUT' && inputTypesAllowlist[type] && !el.readOnly) {\n return true;\n }\n\n if (tagName === 'TEXTAREA' && !el.readOnly) {\n return true;\n }\n\n if (el.isContentEditable) {\n return true;\n }\n\n return false;\n }\n\n /**\n * Add the `focus-visible` class to the given element if it was not added by\n * the author.\n * @param {Element} el\n */\n function addFocusVisibleClass(el) {\n if (el.classList.contains('focus-visible')) {\n return;\n }\n el.classList.add('focus-visible');\n el.setAttribute('data-focus-visible-added', '');\n }\n\n /**\n * Remove the `focus-visible` class from the given element if it was not\n * originally added by the author.\n * @param {Element} el\n */\n function removeFocusVisibleClass(el) {\n if (!el.hasAttribute('data-focus-visible-added')) {\n return;\n }\n el.classList.remove('focus-visible');\n el.removeAttribute('data-focus-visible-added');\n }\n\n /**\n * If the most recent user interaction was via the keyboard;\n * and the key press did not include a meta, alt/option, or control key;\n * then the modality is keyboard. Otherwise, the modality is not keyboard.\n * Apply `focus-visible` to any current active element and keep track\n * of our keyboard modality state with `hadKeyboardEvent`.\n * @param {KeyboardEvent} e\n */\n function onKeyDown(e) {\n if (e.metaKey || e.altKey || e.ctrlKey) {\n return;\n }\n\n if (isValidFocusTarget(scope.activeElement)) {\n addFocusVisibleClass(scope.activeElement);\n }\n\n hadKeyboardEvent = true;\n }\n\n /**\n * If at any point a user clicks with a pointing device, ensure that we change\n * the modality away from keyboard.\n * This avoids the situation where a user presses a key on an already focused\n * element, and then clicks on a different element, focusing it with a\n * pointing device, while we still think we're in keyboard modality.\n * @param {Event} e\n */\n function onPointerDown(e) {\n hadKeyboardEvent = false;\n }\n\n /**\n * On `focus`, add the `focus-visible` class to the target if:\n * - the target received focus as a result of keyboard navigation, or\n * - the event target is an element that will likely require interaction\n * via the keyboard (e.g. a text box)\n * @param {Event} e\n */\n function onFocus(e) {\n // Prevent IE from focusing the document or HTML element.\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (hadKeyboardEvent || focusTriggersKeyboardModality(e.target)) {\n addFocusVisibleClass(e.target);\n }\n }\n\n /**\n * On `blur`, remove the `focus-visible` class from the target.\n * @param {Event} e\n */\n function onBlur(e) {\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (\n e.target.classList.contains('focus-visible') ||\n e.target.hasAttribute('data-focus-visible-added')\n ) {\n // To detect a tab/window switch, we look for a blur event followed\n // rapidly by a visibility change.\n // If we don't see a visibility change within 100ms, it's probably a\n // regular focus change.\n hadFocusVisibleRecently = true;\n window.clearTimeout(hadFocusVisibleRecentlyTimeout);\n hadFocusVisibleRecentlyTimeout = window.setTimeout(function() {\n hadFocusVisibleRecently = false;\n }, 100);\n removeFocusVisibleClass(e.target);\n }\n }\n\n /**\n * If the user changes tabs, keep track of whether or not the previously\n * focused element had .focus-visible.\n * @param {Event} e\n */\n function onVisibilityChange(e) {\n if (document.visibilityState === 'hidden') {\n // If the tab becomes active again, the browser will handle calling focus\n // on the element (Safari actually calls it twice).\n // If this tab change caused a blur on an element with focus-visible,\n // re-apply the class when the user switches back to the tab.\n if (hadFocusVisibleRecently) {\n hadKeyboardEvent = true;\n }\n addInitialPointerMoveListeners();\n }\n }\n\n /**\n * Add a group of listeners to detect usage of any pointing devices.\n * These listeners will be added when the polyfill first loads, and anytime\n * the window is blurred, so that they are active when the window regains\n * focus.\n */\n function addInitialPointerMoveListeners() {\n document.addEventListener('mousemove', onInitialPointerMove);\n document.addEventListener('mousedown', onInitialPointerMove);\n document.addEventListener('mouseup', onInitialPointerMove);\n document.addEventListener('pointermove', onInitialPointerMove);\n document.addEventListener('pointerdown', onInitialPointerMove);\n document.addEventListener('pointerup', onInitialPointerMove);\n document.addEventListener('touchmove', onInitialPointerMove);\n document.addEventListener('touchstart', onInitialPointerMove);\n document.addEventListener('touchend', onInitialPointerMove);\n }\n\n function removeInitialPointerMoveListeners() {\n document.removeEventListener('mousemove', onInitialPointerMove);\n document.removeEventListener('mousedown', onInitialPointerMove);\n document.removeEventListener('mouseup', onInitialPointerMove);\n document.removeEventListener('pointermove', onInitialPointerMove);\n document.removeEventListener('pointerdown', onInitialPointerMove);\n document.removeEventListener('pointerup', onInitialPointerMove);\n document.removeEventListener('touchmove', onInitialPointerMove);\n document.removeEventListener('touchstart', onInitialPointerMove);\n document.removeEventListener('touchend', onInitialPointerMove);\n }\n\n /**\n * When the polfyill first loads, assume the user is in keyboard modality.\n * If any event is received from a pointing device (e.g. mouse, pointer,\n * touch), turn off keyboard modality.\n * This accounts for situations where focus enters the page from the URL bar.\n * @param {Event} e\n */\n function onInitialPointerMove(e) {\n // Work around a Safari quirk that fires a mousemove on whenever the\n // window blurs, even if you're tabbing out of the page. \u00AF\\_(\u30C4)_/\u00AF\n if (e.target.nodeName && e.target.nodeName.toLowerCase() === 'html') {\n return;\n }\n\n hadKeyboardEvent = false;\n removeInitialPointerMoveListeners();\n }\n\n // For some kinds of state, we are interested in changes at the global scope\n // only. For example, global pointer input, global key presses and global\n // visibility change should affect the state at every scope:\n document.addEventListener('keydown', onKeyDown, true);\n document.addEventListener('mousedown', onPointerDown, true);\n document.addEventListener('pointerdown', onPointerDown, true);\n document.addEventListener('touchstart', onPointerDown, true);\n document.addEventListener('visibilitychange', onVisibilityChange, true);\n\n addInitialPointerMoveListeners();\n\n // For focus and blur, we specifically care about state changes in the local\n // scope. This is because focus / blur events that originate from within a\n // shadow root are not re-dispatched from the host element if it was already\n // the active element in its own scope:\n scope.addEventListener('focus', onFocus, true);\n scope.addEventListener('blur', onBlur, true);\n\n // We detect that a node is a ShadowRoot by ensuring that it is a\n // DocumentFragment and also has a host property. This check covers native\n // implementation and polyfill implementation transparently. If we only cared\n // about the native implementation, we could just check if the scope was\n // an instance of a ShadowRoot.\n if (scope.nodeType === Node.DOCUMENT_FRAGMENT_NODE && scope.host) {\n // Since a ShadowRoot is a special kind of DocumentFragment, it does not\n // have a root element to add a class to. So, we add this attribute to the\n // host element instead:\n scope.host.setAttribute('data-js-focus-visible', '');\n } else if (scope.nodeType === Node.DOCUMENT_NODE) {\n document.documentElement.classList.add('js-focus-visible');\n document.documentElement.setAttribute('data-js-focus-visible', '');\n }\n }\n\n // It is important to wrap all references to global window and document in\n // these checks to support server-side rendering use cases\n // @see https://github.com/WICG/focus-visible/issues/199\n if (typeof window !== 'undefined' && typeof document !== 'undefined') {\n // Make the polyfill helper globally available. This can be used as a signal\n // to interested libraries that wish to coordinate with the polyfill for e.g.,\n // applying the polyfill to a shadow root:\n window.applyFocusVisiblePolyfill = applyFocusVisiblePolyfill;\n\n // Notify interested libraries of the polyfill's presence, in case the\n // polyfill was loaded lazily:\n var event;\n\n try {\n event = new CustomEvent('focus-visible-polyfill-ready');\n } catch (error) {\n // IE11 does not support using CustomEvent as a constructor directly:\n event = document.createEvent('CustomEvent');\n event.initCustomEvent('focus-visible-polyfill-ready', false, false, {});\n }\n\n window.dispatchEvent(event);\n }\n\n if (typeof document !== 'undefined') {\n // Apply the polyfill to the global document, so that no JavaScript\n // coordination is required to use the polyfill in the top-level document:\n applyFocusVisiblePolyfill(document);\n }\n\n})));\n", "/*!\n * clipboard.js v2.0.11\n * https://clipboardjs.com/\n *\n * Licensed MIT \u00A9 Zeno Rocha\n */\n(function webpackUniversalModuleDefinition(root, factory) {\n\tif(typeof exports === 'object' && typeof module === 'object')\n\t\tmodule.exports = factory();\n\telse if(typeof define === 'function' && define.amd)\n\t\tdefine([], factory);\n\telse if(typeof exports === 'object')\n\t\texports[\"ClipboardJS\"] = factory();\n\telse\n\t\troot[\"ClipboardJS\"] = factory();\n})(this, function() {\nreturn /******/ (function() { // webpackBootstrap\n/******/ \tvar __webpack_modules__ = ({\n\n/***/ 686:\n/***/ (function(__unused_webpack_module, __webpack_exports__, __webpack_require__) {\n\n\"use strict\";\n\n// EXPORTS\n__webpack_require__.d(__webpack_exports__, {\n \"default\": function() { return /* binding */ clipboard; }\n});\n\n// EXTERNAL MODULE: ./node_modules/tiny-emitter/index.js\nvar tiny_emitter = __webpack_require__(279);\nvar tiny_emitter_default = /*#__PURE__*/__webpack_require__.n(tiny_emitter);\n// EXTERNAL MODULE: ./node_modules/good-listener/src/listen.js\nvar listen = __webpack_require__(370);\nvar listen_default = /*#__PURE__*/__webpack_require__.n(listen);\n// EXTERNAL MODULE: ./node_modules/select/src/select.js\nvar src_select = __webpack_require__(817);\nvar select_default = /*#__PURE__*/__webpack_require__.n(src_select);\n;// CONCATENATED MODULE: ./src/common/command.js\n/**\n * Executes a given operation type.\n * @param {String} type\n * @return {Boolean}\n */\nfunction command(type) {\n try {\n return document.execCommand(type);\n } catch (err) {\n return false;\n }\n}\n;// CONCATENATED MODULE: ./src/actions/cut.js\n\n\n/**\n * Cut action wrapper.\n * @param {String|HTMLElement} target\n * @return {String}\n */\n\nvar ClipboardActionCut = function ClipboardActionCut(target) {\n var selectedText = select_default()(target);\n command('cut');\n return selectedText;\n};\n\n/* harmony default export */ var actions_cut = (ClipboardActionCut);\n;// CONCATENATED MODULE: ./src/common/create-fake-element.js\n/**\n * Creates a fake textarea element with a value.\n * @param {String} value\n * @return {HTMLElement}\n */\nfunction createFakeElement(value) {\n var isRTL = document.documentElement.getAttribute('dir') === 'rtl';\n var fakeElement = document.createElement('textarea'); // Prevent zooming on iOS\n\n fakeElement.style.fontSize = '12pt'; // Reset box model\n\n fakeElement.style.border = '0';\n fakeElement.style.padding = '0';\n fakeElement.style.margin = '0'; // Move element out of screen horizontally\n\n fakeElement.style.position = 'absolute';\n fakeElement.style[isRTL ? 'right' : 'left'] = '-9999px'; // Move element to the same position vertically\n\n var yPosition = window.pageYOffset || document.documentElement.scrollTop;\n fakeElement.style.top = \"\".concat(yPosition, \"px\");\n fakeElement.setAttribute('readonly', '');\n fakeElement.value = value;\n return fakeElement;\n}\n;// CONCATENATED MODULE: ./src/actions/copy.js\n\n\n\n/**\n * Create fake copy action wrapper using a fake element.\n * @param {String} target\n * @param {Object} options\n * @return {String}\n */\n\nvar fakeCopyAction = function fakeCopyAction(value, options) {\n var fakeElement = createFakeElement(value);\n options.container.appendChild(fakeElement);\n var selectedText = select_default()(fakeElement);\n command('copy');\n fakeElement.remove();\n return selectedText;\n};\n/**\n * Copy action wrapper.\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @return {String}\n */\n\n\nvar ClipboardActionCopy = function ClipboardActionCopy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n var selectedText = '';\n\n if (typeof target === 'string') {\n selectedText = fakeCopyAction(target, options);\n } else if (target instanceof HTMLInputElement && !['text', 'search', 'url', 'tel', 'password'].includes(target === null || target === void 0 ? void 0 : target.type)) {\n // If input type doesn't support `setSelectionRange`. Simulate it. https://developer.mozilla.org/en-US/docs/Web/API/HTMLInputElement/setSelectionRange\n selectedText = fakeCopyAction(target.value, options);\n } else {\n selectedText = select_default()(target);\n command('copy');\n }\n\n return selectedText;\n};\n\n/* harmony default export */ var actions_copy = (ClipboardActionCopy);\n;// CONCATENATED MODULE: ./src/actions/default.js\nfunction _typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return _typeof(obj); }\n\n\n\n/**\n * Inner function which performs selection from either `text` or `target`\n * properties and then executes copy or cut operations.\n * @param {Object} options\n */\n\nvar ClipboardActionDefault = function ClipboardActionDefault() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n // Defines base properties passed from constructor.\n var _options$action = options.action,\n action = _options$action === void 0 ? 'copy' : _options$action,\n container = options.container,\n target = options.target,\n text = options.text; // Sets the `action` to be performed which can be either 'copy' or 'cut'.\n\n if (action !== 'copy' && action !== 'cut') {\n throw new Error('Invalid \"action\" value, use either \"copy\" or \"cut\"');\n } // Sets the `target` property using an element that will be have its content copied.\n\n\n if (target !== undefined) {\n if (target && _typeof(target) === 'object' && target.nodeType === 1) {\n if (action === 'copy' && target.hasAttribute('disabled')) {\n throw new Error('Invalid \"target\" attribute. Please use \"readonly\" instead of \"disabled\" attribute');\n }\n\n if (action === 'cut' && (target.hasAttribute('readonly') || target.hasAttribute('disabled'))) {\n throw new Error('Invalid \"target\" attribute. You can\\'t cut text from elements with \"readonly\" or \"disabled\" attributes');\n }\n } else {\n throw new Error('Invalid \"target\" value, use a valid Element');\n }\n } // Define selection strategy based on `text` property.\n\n\n if (text) {\n return actions_copy(text, {\n container: container\n });\n } // Defines which selection strategy based on `target` property.\n\n\n if (target) {\n return action === 'cut' ? actions_cut(target) : actions_copy(target, {\n container: container\n });\n }\n};\n\n/* harmony default export */ var actions_default = (ClipboardActionDefault);\n;// CONCATENATED MODULE: ./src/clipboard.js\nfunction clipboard_typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { clipboard_typeof = function _typeof(obj) { return typeof obj; }; } else { clipboard_typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return clipboard_typeof(obj); }\n\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }\n\nfunction _inherits(subClass, superClass) { if (typeof superClass !== \"function\" && superClass !== null) { throw new TypeError(\"Super expression must either be null or a function\"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }\n\nfunction _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }\n\nfunction _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }\n\nfunction _possibleConstructorReturn(self, call) { if (call && (clipboard_typeof(call) === \"object\" || typeof call === \"function\")) { return call; } return _assertThisInitialized(self); }\n\nfunction _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\"); } return self; }\n\nfunction _isNativeReflectConstruct() { if (typeof Reflect === \"undefined\" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === \"function\") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } }\n\nfunction _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }\n\n\n\n\n\n\n/**\n * Helper function to retrieve attribute value.\n * @param {String} suffix\n * @param {Element} element\n */\n\nfunction getAttributeValue(suffix, element) {\n var attribute = \"data-clipboard-\".concat(suffix);\n\n if (!element.hasAttribute(attribute)) {\n return;\n }\n\n return element.getAttribute(attribute);\n}\n/**\n * Base class which takes one or more elements, adds event listeners to them,\n * and instantiates a new `ClipboardAction` on each click.\n */\n\n\nvar Clipboard = /*#__PURE__*/function (_Emitter) {\n _inherits(Clipboard, _Emitter);\n\n var _super = _createSuper(Clipboard);\n\n /**\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n * @param {Object} options\n */\n function Clipboard(trigger, options) {\n var _this;\n\n _classCallCheck(this, Clipboard);\n\n _this = _super.call(this);\n\n _this.resolveOptions(options);\n\n _this.listenClick(trigger);\n\n return _this;\n }\n /**\n * Defines if attributes would be resolved using internal setter functions\n * or custom functions that were passed in the constructor.\n * @param {Object} options\n */\n\n\n _createClass(Clipboard, [{\n key: \"resolveOptions\",\n value: function resolveOptions() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n this.action = typeof options.action === 'function' ? options.action : this.defaultAction;\n this.target = typeof options.target === 'function' ? options.target : this.defaultTarget;\n this.text = typeof options.text === 'function' ? options.text : this.defaultText;\n this.container = clipboard_typeof(options.container) === 'object' ? options.container : document.body;\n }\n /**\n * Adds a click event listener to the passed trigger.\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n */\n\n }, {\n key: \"listenClick\",\n value: function listenClick(trigger) {\n var _this2 = this;\n\n this.listener = listen_default()(trigger, 'click', function (e) {\n return _this2.onClick(e);\n });\n }\n /**\n * Defines a new `ClipboardAction` on each click event.\n * @param {Event} e\n */\n\n }, {\n key: \"onClick\",\n value: function onClick(e) {\n var trigger = e.delegateTarget || e.currentTarget;\n var action = this.action(trigger) || 'copy';\n var text = actions_default({\n action: action,\n container: this.container,\n target: this.target(trigger),\n text: this.text(trigger)\n }); // Fires an event based on the copy operation result.\n\n this.emit(text ? 'success' : 'error', {\n action: action,\n text: text,\n trigger: trigger,\n clearSelection: function clearSelection() {\n if (trigger) {\n trigger.focus();\n }\n\n window.getSelection().removeAllRanges();\n }\n });\n }\n /**\n * Default `action` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultAction\",\n value: function defaultAction(trigger) {\n return getAttributeValue('action', trigger);\n }\n /**\n * Default `target` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultTarget\",\n value: function defaultTarget(trigger) {\n var selector = getAttributeValue('target', trigger);\n\n if (selector) {\n return document.querySelector(selector);\n }\n }\n /**\n * Allow fire programmatically a copy action\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @returns Text copied.\n */\n\n }, {\n key: \"defaultText\",\n\n /**\n * Default `text` lookup function.\n * @param {Element} trigger\n */\n value: function defaultText(trigger) {\n return getAttributeValue('text', trigger);\n }\n /**\n * Destroy lifecycle.\n */\n\n }, {\n key: \"destroy\",\n value: function destroy() {\n this.listener.destroy();\n }\n }], [{\n key: \"copy\",\n value: function copy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n return actions_copy(target, options);\n }\n /**\n * Allow fire programmatically a cut action\n * @param {String|HTMLElement} target\n * @returns Text cutted.\n */\n\n }, {\n key: \"cut\",\n value: function cut(target) {\n return actions_cut(target);\n }\n /**\n * Returns the support of the given action, or all actions if no action is\n * given.\n * @param {String} [action]\n */\n\n }, {\n key: \"isSupported\",\n value: function isSupported() {\n var action = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ['copy', 'cut'];\n var actions = typeof action === 'string' ? [action] : action;\n var support = !!document.queryCommandSupported;\n actions.forEach(function (action) {\n support = support && !!document.queryCommandSupported(action);\n });\n return support;\n }\n }]);\n\n return Clipboard;\n}((tiny_emitter_default()));\n\n/* harmony default export */ var clipboard = (Clipboard);\n\n/***/ }),\n\n/***/ 828:\n/***/ (function(module) {\n\nvar DOCUMENT_NODE_TYPE = 9;\n\n/**\n * A polyfill for Element.matches()\n */\nif (typeof Element !== 'undefined' && !Element.prototype.matches) {\n var proto = Element.prototype;\n\n proto.matches = proto.matchesSelector ||\n proto.mozMatchesSelector ||\n proto.msMatchesSelector ||\n proto.oMatchesSelector ||\n proto.webkitMatchesSelector;\n}\n\n/**\n * Finds the closest parent that matches a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @return {Function}\n */\nfunction closest (element, selector) {\n while (element && element.nodeType !== DOCUMENT_NODE_TYPE) {\n if (typeof element.matches === 'function' &&\n element.matches(selector)) {\n return element;\n }\n element = element.parentNode;\n }\n}\n\nmodule.exports = closest;\n\n\n/***/ }),\n\n/***/ 438:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar closest = __webpack_require__(828);\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction _delegate(element, selector, type, callback, useCapture) {\n var listenerFn = listener.apply(this, arguments);\n\n element.addEventListener(type, listenerFn, useCapture);\n\n return {\n destroy: function() {\n element.removeEventListener(type, listenerFn, useCapture);\n }\n }\n}\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element|String|Array} [elements]\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction delegate(elements, selector, type, callback, useCapture) {\n // Handle the regular Element usage\n if (typeof elements.addEventListener === 'function') {\n return _delegate.apply(null, arguments);\n }\n\n // Handle Element-less usage, it defaults to global delegation\n if (typeof type === 'function') {\n // Use `document` as the first parameter, then apply arguments\n // This is a short way to .unshift `arguments` without running into deoptimizations\n return _delegate.bind(null, document).apply(null, arguments);\n }\n\n // Handle Selector-based usage\n if (typeof elements === 'string') {\n elements = document.querySelectorAll(elements);\n }\n\n // Handle Array-like based usage\n return Array.prototype.map.call(elements, function (element) {\n return _delegate(element, selector, type, callback, useCapture);\n });\n}\n\n/**\n * Finds closest match and invokes callback.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Function}\n */\nfunction listener(element, selector, type, callback) {\n return function(e) {\n e.delegateTarget = closest(e.target, selector);\n\n if (e.delegateTarget) {\n callback.call(element, e);\n }\n }\n}\n\nmodule.exports = delegate;\n\n\n/***/ }),\n\n/***/ 879:\n/***/ (function(__unused_webpack_module, exports) {\n\n/**\n * Check if argument is a HTML element.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.node = function(value) {\n return value !== undefined\n && value instanceof HTMLElement\n && value.nodeType === 1;\n};\n\n/**\n * Check if argument is a list of HTML elements.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.nodeList = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return value !== undefined\n && (type === '[object NodeList]' || type === '[object HTMLCollection]')\n && ('length' in value)\n && (value.length === 0 || exports.node(value[0]));\n};\n\n/**\n * Check if argument is a string.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.string = function(value) {\n return typeof value === 'string'\n || value instanceof String;\n};\n\n/**\n * Check if argument is a function.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.fn = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return type === '[object Function]';\n};\n\n\n/***/ }),\n\n/***/ 370:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar is = __webpack_require__(879);\nvar delegate = __webpack_require__(438);\n\n/**\n * Validates all params and calls the right\n * listener function based on its target type.\n *\n * @param {String|HTMLElement|HTMLCollection|NodeList} target\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listen(target, type, callback) {\n if (!target && !type && !callback) {\n throw new Error('Missing required arguments');\n }\n\n if (!is.string(type)) {\n throw new TypeError('Second argument must be a String');\n }\n\n if (!is.fn(callback)) {\n throw new TypeError('Third argument must be a Function');\n }\n\n if (is.node(target)) {\n return listenNode(target, type, callback);\n }\n else if (is.nodeList(target)) {\n return listenNodeList(target, type, callback);\n }\n else if (is.string(target)) {\n return listenSelector(target, type, callback);\n }\n else {\n throw new TypeError('First argument must be a String, HTMLElement, HTMLCollection, or NodeList');\n }\n}\n\n/**\n * Adds an event listener to a HTML element\n * and returns a remove listener function.\n *\n * @param {HTMLElement} node\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNode(node, type, callback) {\n node.addEventListener(type, callback);\n\n return {\n destroy: function() {\n node.removeEventListener(type, callback);\n }\n }\n}\n\n/**\n * Add an event listener to a list of HTML elements\n * and returns a remove listener function.\n *\n * @param {NodeList|HTMLCollection} nodeList\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNodeList(nodeList, type, callback) {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.addEventListener(type, callback);\n });\n\n return {\n destroy: function() {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.removeEventListener(type, callback);\n });\n }\n }\n}\n\n/**\n * Add an event listener to a selector\n * and returns a remove listener function.\n *\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenSelector(selector, type, callback) {\n return delegate(document.body, selector, type, callback);\n}\n\nmodule.exports = listen;\n\n\n/***/ }),\n\n/***/ 817:\n/***/ (function(module) {\n\nfunction select(element) {\n var selectedText;\n\n if (element.nodeName === 'SELECT') {\n element.focus();\n\n selectedText = element.value;\n }\n else if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') {\n var isReadOnly = element.hasAttribute('readonly');\n\n if (!isReadOnly) {\n element.setAttribute('readonly', '');\n }\n\n element.select();\n element.setSelectionRange(0, element.value.length);\n\n if (!isReadOnly) {\n element.removeAttribute('readonly');\n }\n\n selectedText = element.value;\n }\n else {\n if (element.hasAttribute('contenteditable')) {\n element.focus();\n }\n\n var selection = window.getSelection();\n var range = document.createRange();\n\n range.selectNodeContents(element);\n selection.removeAllRanges();\n selection.addRange(range);\n\n selectedText = selection.toString();\n }\n\n return selectedText;\n}\n\nmodule.exports = select;\n\n\n/***/ }),\n\n/***/ 279:\n/***/ (function(module) {\n\nfunction E () {\n // Keep this empty so it's easier to inherit from\n // (via https://github.com/lipsmack from https://github.com/scottcorgan/tiny-emitter/issues/3)\n}\n\nE.prototype = {\n on: function (name, callback, ctx) {\n var e = this.e || (this.e = {});\n\n (e[name] || (e[name] = [])).push({\n fn: callback,\n ctx: ctx\n });\n\n return this;\n },\n\n once: function (name, callback, ctx) {\n var self = this;\n function listener () {\n self.off(name, listener);\n callback.apply(ctx, arguments);\n };\n\n listener._ = callback\n return this.on(name, listener, ctx);\n },\n\n emit: function (name) {\n var data = [].slice.call(arguments, 1);\n var evtArr = ((this.e || (this.e = {}))[name] || []).slice();\n var i = 0;\n var len = evtArr.length;\n\n for (i; i < len; i++) {\n evtArr[i].fn.apply(evtArr[i].ctx, data);\n }\n\n return this;\n },\n\n off: function (name, callback) {\n var e = this.e || (this.e = {});\n var evts = e[name];\n var liveEvents = [];\n\n if (evts && callback) {\n for (var i = 0, len = evts.length; i < len; i++) {\n if (evts[i].fn !== callback && evts[i].fn._ !== callback)\n liveEvents.push(evts[i]);\n }\n }\n\n // Remove event from queue to prevent memory leak\n // Suggested by https://github.com/lazd\n // Ref: https://github.com/scottcorgan/tiny-emitter/commit/c6ebfaa9bc973b33d110a84a307742b7cf94c953#commitcomment-5024910\n\n (liveEvents.length)\n ? e[name] = liveEvents\n : delete e[name];\n\n return this;\n }\n};\n\nmodule.exports = E;\nmodule.exports.TinyEmitter = E;\n\n\n/***/ })\n\n/******/ \t});\n/************************************************************************/\n/******/ \t// The module cache\n/******/ \tvar __webpack_module_cache__ = {};\n/******/ \t\n/******/ \t// The require function\n/******/ \tfunction __webpack_require__(moduleId) {\n/******/ \t\t// Check if module is in cache\n/******/ \t\tif(__webpack_module_cache__[moduleId]) {\n/******/ \t\t\treturn __webpack_module_cache__[moduleId].exports;\n/******/ \t\t}\n/******/ \t\t// Create a new module (and put it into the cache)\n/******/ \t\tvar module = __webpack_module_cache__[moduleId] = {\n/******/ \t\t\t// no module.id needed\n/******/ \t\t\t// no module.loaded needed\n/******/ \t\t\texports: {}\n/******/ \t\t};\n/******/ \t\n/******/ \t\t// Execute the module function\n/******/ \t\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n/******/ \t\n/******/ \t\t// Return the exports of the module\n/******/ \t\treturn module.exports;\n/******/ \t}\n/******/ \t\n/************************************************************************/\n/******/ \t/* webpack/runtime/compat get default export */\n/******/ \t!function() {\n/******/ \t\t// getDefaultExport function for compatibility with non-harmony modules\n/******/ \t\t__webpack_require__.n = function(module) {\n/******/ \t\t\tvar getter = module && module.__esModule ?\n/******/ \t\t\t\tfunction() { return module['default']; } :\n/******/ \t\t\t\tfunction() { return module; };\n/******/ \t\t\t__webpack_require__.d(getter, { a: getter });\n/******/ \t\t\treturn getter;\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/define property getters */\n/******/ \t!function() {\n/******/ \t\t// define getter functions for harmony exports\n/******/ \t\t__webpack_require__.d = function(exports, definition) {\n/******/ \t\t\tfor(var key in definition) {\n/******/ \t\t\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n/******/ \t\t\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n/******/ \t\t\t\t}\n/******/ \t\t\t}\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/hasOwnProperty shorthand */\n/******/ \t!function() {\n/******/ \t\t__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }\n/******/ \t}();\n/******/ \t\n/************************************************************************/\n/******/ \t// module exports must be returned from runtime so entry inlining is disabled\n/******/ \t// startup\n/******/ \t// Load entry module and return exports\n/******/ \treturn __webpack_require__(686);\n/******/ })()\n.default;\n});", "/*!\n * escape-html\n * Copyright(c) 2012-2013 TJ Holowaychuk\n * Copyright(c) 2015 Andreas Lubbe\n * Copyright(c) 2015 Tiancheng \"Timothy\" Gu\n * MIT Licensed\n */\n\n'use strict';\n\n/**\n * Module variables.\n * @private\n */\n\nvar matchHtmlRegExp = /[\"'&<>]/;\n\n/**\n * Module exports.\n * @public\n */\n\nmodule.exports = escapeHtml;\n\n/**\n * Escape special characters in the given string of html.\n *\n * @param {string} string The string to escape for inserting into HTML\n * @return {string}\n * @public\n */\n\nfunction escapeHtml(string) {\n var str = '' + string;\n var match = matchHtmlRegExp.exec(str);\n\n if (!match) {\n return str;\n }\n\n var escape;\n var html = '';\n var index = 0;\n var lastIndex = 0;\n\n for (index = match.index; index < str.length; index++) {\n switch (str.charCodeAt(index)) {\n case 34: // \"\n escape = '"';\n break;\n case 38: // &\n escape = '&';\n break;\n case 39: // '\n escape = ''';\n break;\n case 60: // <\n escape = '<';\n break;\n case 62: // >\n escape = '>';\n break;\n default:\n continue;\n }\n\n if (lastIndex !== index) {\n html += str.substring(lastIndex, index);\n }\n\n lastIndex = index + 1;\n html += escape;\n }\n\n return lastIndex !== index\n ? html + str.substring(lastIndex, index)\n : html;\n}\n", "/*\n * Copyright (c) 2016-2024 Martin Donath \n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\nimport \"focus-visible\"\n\nimport {\n EMPTY,\n NEVER,\n Observable,\n Subject,\n defer,\n delay,\n filter,\n map,\n merge,\n mergeWith,\n shareReplay,\n switchMap\n} from \"rxjs\"\n\nimport { configuration, feature } from \"./_\"\nimport {\n at,\n getActiveElement,\n getOptionalElement,\n requestJSON,\n setLocation,\n setToggle,\n watchDocument,\n watchKeyboard,\n watchLocation,\n watchLocationTarget,\n watchMedia,\n watchPrint,\n watchScript,\n watchViewport\n} from \"./browser\"\nimport {\n getComponentElement,\n getComponentElements,\n mountAnnounce,\n mountBackToTop,\n mountConsent,\n mountContent,\n mountDialog,\n mountHeader,\n mountHeaderTitle,\n mountPalette,\n mountProgress,\n mountSearch,\n mountSearchHiglight,\n mountSidebar,\n mountSource,\n mountTableOfContents,\n mountTabs,\n watchHeader,\n watchMain\n} from \"./components\"\nimport {\n SearchIndex,\n setupClipboardJS,\n setupInstantNavigation,\n setupVersionSelector\n} from \"./integrations\"\nimport {\n patchEllipsis,\n patchIndeterminate,\n patchScrollfix,\n patchScrolllock\n} from \"./patches\"\nimport \"./polyfills\"\n\n/* ----------------------------------------------------------------------------\n * Functions - @todo refactor\n * ------------------------------------------------------------------------- */\n\n/**\n * Fetch search index\n *\n * @returns Search index observable\n */\nfunction fetchSearchIndex(): Observable {\n if (location.protocol === \"file:\") {\n return watchScript(\n `${new URL(\"search/search_index.js\", config.base)}`\n )\n .pipe(\n // @ts-ignore - @todo fix typings\n map(() => __index),\n shareReplay(1)\n )\n } else {\n return requestJSON(\n new URL(\"search/search_index.json\", config.base)\n )\n }\n}\n\n/* ----------------------------------------------------------------------------\n * Application\n * ------------------------------------------------------------------------- */\n\n/* Yay, JavaScript is available */\ndocument.documentElement.classList.remove(\"no-js\")\ndocument.documentElement.classList.add(\"js\")\n\n/* Set up navigation observables and subjects */\nconst document$ = watchDocument()\nconst location$ = watchLocation()\nconst target$ = watchLocationTarget(location$)\nconst keyboard$ = watchKeyboard()\n\n/* Set up media observables */\nconst viewport$ = watchViewport()\nconst tablet$ = watchMedia(\"(min-width: 960px)\")\nconst screen$ = watchMedia(\"(min-width: 1220px)\")\nconst print$ = watchPrint()\n\n/* Retrieve search index, if search is enabled */\nconst config = configuration()\nconst index$ = document.forms.namedItem(\"search\")\n ? fetchSearchIndex()\n : NEVER\n\n/* Set up Clipboard.js integration */\nconst alert$ = new Subject()\nsetupClipboardJS({ alert$ })\n\n/* Set up progress indicator */\nconst progress$ = new Subject()\n\n/* Set up instant navigation, if enabled */\nif (feature(\"navigation.instant\"))\n setupInstantNavigation({ location$, viewport$, progress$ })\n .subscribe(document$)\n\n/* Set up version selector */\nif (config.version?.provider === \"mike\")\n setupVersionSelector({ document$ })\n\n/* Always close drawer and search on navigation */\nmerge(location$, target$)\n .pipe(\n delay(125)\n )\n .subscribe(() => {\n setToggle(\"drawer\", false)\n setToggle(\"search\", false)\n })\n\n/* Set up global keyboard handlers */\nkeyboard$\n .pipe(\n filter(({ mode }) => mode === \"global\")\n )\n .subscribe(key => {\n switch (key.type) {\n\n /* Go to previous page */\n case \"p\":\n case \",\":\n const prev = getOptionalElement(\"link[rel=prev]\")\n if (typeof prev !== \"undefined\")\n setLocation(prev)\n break\n\n /* Go to next page */\n case \"n\":\n case \".\":\n const next = getOptionalElement(\"link[rel=next]\")\n if (typeof next !== \"undefined\")\n setLocation(next)\n break\n\n /* Expand navigation, see https://bit.ly/3ZjG5io */\n case \"Enter\":\n const active = getActiveElement()\n if (active instanceof HTMLLabelElement)\n active.click()\n }\n })\n\n/* Set up patches */\npatchEllipsis({ viewport$, document$ })\npatchIndeterminate({ document$, tablet$ })\npatchScrollfix({ document$ })\npatchScrolllock({ viewport$, tablet$ })\n\n/* Set up header and main area observable */\nconst header$ = watchHeader(getComponentElement(\"header\"), { viewport$ })\nconst main$ = document$\n .pipe(\n map(() => getComponentElement(\"main\")),\n switchMap(el => watchMain(el, { viewport$, header$ })),\n shareReplay(1)\n )\n\n/* Set up control component observables */\nconst control$ = merge(\n\n /* Consent */\n ...getComponentElements(\"consent\")\n .map(el => mountConsent(el, { target$ })),\n\n /* Dialog */\n ...getComponentElements(\"dialog\")\n .map(el => mountDialog(el, { alert$ })),\n\n /* Header */\n ...getComponentElements(\"header\")\n .map(el => mountHeader(el, { viewport$, header$, main$ })),\n\n /* Color palette */\n ...getComponentElements(\"palette\")\n .map(el => mountPalette(el)),\n\n /* Progress bar */\n ...getComponentElements(\"progress\")\n .map(el => mountProgress(el, { progress$ })),\n\n /* Search */\n ...getComponentElements(\"search\")\n .map(el => mountSearch(el, { index$, keyboard$ })),\n\n /* Repository information */\n ...getComponentElements(\"source\")\n .map(el => mountSource(el))\n)\n\n/* Set up content component observables */\nconst content$ = defer(() => merge(\n\n /* Announcement bar */\n ...getComponentElements(\"announce\")\n .map(el => mountAnnounce(el)),\n\n /* Content */\n ...getComponentElements(\"content\")\n .map(el => mountContent(el, { viewport$, target$, print$ })),\n\n /* Search highlighting */\n ...getComponentElements(\"content\")\n .map(el => feature(\"search.highlight\")\n ? mountSearchHiglight(el, { index$, location$ })\n : EMPTY\n ),\n\n /* Header title */\n ...getComponentElements(\"header-title\")\n .map(el => mountHeaderTitle(el, { viewport$, header$ })),\n\n /* Sidebar */\n ...getComponentElements(\"sidebar\")\n .map(el => el.getAttribute(\"data-md-type\") === \"navigation\"\n ? at(screen$, () => mountSidebar(el, { viewport$, header$, main$ }))\n : at(tablet$, () => mountSidebar(el, { viewport$, header$, main$ }))\n ),\n\n /* Navigation tabs */\n ...getComponentElements(\"tabs\")\n .map(el => mountTabs(el, { viewport$, header$ })),\n\n /* Table of contents */\n ...getComponentElements(\"toc\")\n .map(el => mountTableOfContents(el, {\n viewport$, header$, main$, target$\n })),\n\n /* Back-to-top button */\n ...getComponentElements(\"top\")\n .map(el => mountBackToTop(el, { viewport$, header$, main$, target$ }))\n))\n\n/* Set up component observables */\nconst component$ = document$\n .pipe(\n switchMap(() => content$),\n mergeWith(control$),\n shareReplay(1)\n )\n\n/* Subscribe to all components */\ncomponent$.subscribe()\n\n/* ----------------------------------------------------------------------------\n * Exports\n * ------------------------------------------------------------------------- */\n\nwindow.document$ = document$ /* Document observable */\nwindow.location$ = location$ /* Location subject */\nwindow.target$ = target$ /* Location target observable */\nwindow.keyboard$ = keyboard$ /* Keyboard observable */\nwindow.viewport$ = viewport$ /* Viewport observable */\nwindow.tablet$ = tablet$ /* Media tablet observable */\nwindow.screen$ = screen$ /* Media screen observable */\nwindow.print$ = print$ /* Media print observable */\nwindow.alert$ = alert$ /* Alert subject */\nwindow.progress$ = progress$ /* Progress indicator subject */\nwindow.component$ = component$ /* Component observable */\n", "/*! *****************************************************************************\r\nCopyright (c) Microsoft Corporation.\r\n\r\nPermission to use, copy, modify, and/or distribute this software for any\r\npurpose with or without fee is hereby granted.\r\n\r\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH\r\nREGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY\r\nAND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,\r\nINDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM\r\nLOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR\r\nOTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\r\nPERFORMANCE OF THIS SOFTWARE.\r\n***************************************************************************** */\r\n/* global Reflect, Promise */\r\n\r\nvar extendStatics = function(d, b) {\r\n extendStatics = Object.setPrototypeOf ||\r\n ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||\r\n function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };\r\n return extendStatics(d, b);\r\n};\r\n\r\nexport function __extends(d, b) {\r\n if (typeof b !== \"function\" && b !== null)\r\n throw new TypeError(\"Class extends value \" + String(b) + \" is not a constructor or null\");\r\n extendStatics(d, b);\r\n function __() { this.constructor = d; }\r\n d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());\r\n}\r\n\r\nexport var __assign = function() {\r\n __assign = Object.assign || function __assign(t) {\r\n for (var s, i = 1, n = arguments.length; i < n; i++) {\r\n s = arguments[i];\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];\r\n }\r\n return t;\r\n }\r\n return __assign.apply(this, arguments);\r\n}\r\n\r\nexport function __rest(s, e) {\r\n var t = {};\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)\r\n t[p] = s[p];\r\n if (s != null && typeof Object.getOwnPropertySymbols === \"function\")\r\n for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {\r\n if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))\r\n t[p[i]] = s[p[i]];\r\n }\r\n return t;\r\n}\r\n\r\nexport function __decorate(decorators, target, key, desc) {\r\n var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;\r\n if (typeof Reflect === \"object\" && typeof Reflect.decorate === \"function\") r = Reflect.decorate(decorators, target, key, desc);\r\n else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;\r\n return c > 3 && r && Object.defineProperty(target, key, r), r;\r\n}\r\n\r\nexport function __param(paramIndex, decorator) {\r\n return function (target, key) { decorator(target, key, paramIndex); }\r\n}\r\n\r\nexport function __metadata(metadataKey, metadataValue) {\r\n if (typeof Reflect === \"object\" && typeof Reflect.metadata === \"function\") return Reflect.metadata(metadataKey, metadataValue);\r\n}\r\n\r\nexport function __awaiter(thisArg, _arguments, P, generator) {\r\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\r\n return new (P || (P = Promise))(function (resolve, reject) {\r\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\r\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\r\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\r\n step((generator = generator.apply(thisArg, _arguments || [])).next());\r\n });\r\n}\r\n\r\nexport function __generator(thisArg, body) {\r\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\r\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\r\n function verb(n) { return function (v) { return step([n, v]); }; }\r\n function step(op) {\r\n if (f) throw new TypeError(\"Generator is already executing.\");\r\n while (_) try {\r\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\r\n if (y = 0, t) op = [op[0] & 2, t.value];\r\n switch (op[0]) {\r\n case 0: case 1: t = op; break;\r\n case 4: _.label++; return { value: op[1], done: false };\r\n case 5: _.label++; y = op[1]; op = [0]; continue;\r\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\r\n default:\r\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\r\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\r\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\r\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\r\n if (t[2]) _.ops.pop();\r\n _.trys.pop(); continue;\r\n }\r\n op = body.call(thisArg, _);\r\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\r\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\r\n }\r\n}\r\n\r\nexport var __createBinding = Object.create ? (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });\r\n}) : (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n o[k2] = m[k];\r\n});\r\n\r\nexport function __exportStar(m, o) {\r\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(o, p)) __createBinding(o, m, p);\r\n}\r\n\r\nexport function __values(o) {\r\n var s = typeof Symbol === \"function\" && Symbol.iterator, m = s && o[s], i = 0;\r\n if (m) return m.call(o);\r\n if (o && typeof o.length === \"number\") return {\r\n next: function () {\r\n if (o && i >= o.length) o = void 0;\r\n return { value: o && o[i++], done: !o };\r\n }\r\n };\r\n throw new TypeError(s ? \"Object is not iterable.\" : \"Symbol.iterator is not defined.\");\r\n}\r\n\r\nexport function __read(o, n) {\r\n var m = typeof Symbol === \"function\" && o[Symbol.iterator];\r\n if (!m) return o;\r\n var i = m.call(o), r, ar = [], e;\r\n try {\r\n while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);\r\n }\r\n catch (error) { e = { error: error }; }\r\n finally {\r\n try {\r\n if (r && !r.done && (m = i[\"return\"])) m.call(i);\r\n }\r\n finally { if (e) throw e.error; }\r\n }\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spread() {\r\n for (var ar = [], i = 0; i < arguments.length; i++)\r\n ar = ar.concat(__read(arguments[i]));\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spreadArrays() {\r\n for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;\r\n for (var r = Array(s), k = 0, i = 0; i < il; i++)\r\n for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)\r\n r[k] = a[j];\r\n return r;\r\n}\r\n\r\nexport function __spreadArray(to, from, pack) {\r\n if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {\r\n if (ar || !(i in from)) {\r\n if (!ar) ar = Array.prototype.slice.call(from, 0, i);\r\n ar[i] = from[i];\r\n }\r\n }\r\n return to.concat(ar || Array.prototype.slice.call(from));\r\n}\r\n\r\nexport function __await(v) {\r\n return this instanceof __await ? (this.v = v, this) : new __await(v);\r\n}\r\n\r\nexport function __asyncGenerator(thisArg, _arguments, generator) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var g = generator.apply(thisArg, _arguments || []), i, q = [];\r\n return i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i;\r\n function verb(n) { if (g[n]) i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; }\r\n function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }\r\n function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }\r\n function fulfill(value) { resume(\"next\", value); }\r\n function reject(value) { resume(\"throw\", value); }\r\n function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }\r\n}\r\n\r\nexport function __asyncDelegator(o) {\r\n var i, p;\r\n return i = {}, verb(\"next\"), verb(\"throw\", function (e) { throw e; }), verb(\"return\"), i[Symbol.iterator] = function () { return this; }, i;\r\n function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: n === \"return\" } : f ? f(v) : v; } : f; }\r\n}\r\n\r\nexport function __asyncValues(o) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var m = o[Symbol.asyncIterator], i;\r\n return m ? m.call(o) : (o = typeof __values === \"function\" ? __values(o) : o[Symbol.iterator](), i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i);\r\n function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }\r\n function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }\r\n}\r\n\r\nexport function __makeTemplateObject(cooked, raw) {\r\n if (Object.defineProperty) { Object.defineProperty(cooked, \"raw\", { value: raw }); } else { cooked.raw = raw; }\r\n return cooked;\r\n};\r\n\r\nvar __setModuleDefault = Object.create ? (function(o, v) {\r\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\r\n}) : function(o, v) {\r\n o[\"default\"] = v;\r\n};\r\n\r\nexport function __importStar(mod) {\r\n if (mod && mod.__esModule) return mod;\r\n var result = {};\r\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\r\n __setModuleDefault(result, mod);\r\n return result;\r\n}\r\n\r\nexport function __importDefault(mod) {\r\n return (mod && mod.__esModule) ? mod : { default: mod };\r\n}\r\n\r\nexport function __classPrivateFieldGet(receiver, state, kind, f) {\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a getter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot read private member from an object whose class did not declare it\");\r\n return kind === \"m\" ? f : kind === \"a\" ? f.call(receiver) : f ? f.value : state.get(receiver);\r\n}\r\n\r\nexport function __classPrivateFieldSet(receiver, state, value, kind, f) {\r\n if (kind === \"m\") throw new TypeError(\"Private method is not writable\");\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a setter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot write private member to an object whose class did not declare it\");\r\n return (kind === \"a\" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;\r\n}\r\n", "/**\n * Returns true if the object is a function.\n * @param value The value to check\n */\nexport function isFunction(value: any): value is (...args: any[]) => any {\n return typeof value === 'function';\n}\n", "/**\n * Used to create Error subclasses until the community moves away from ES5.\n *\n * This is because compiling from TypeScript down to ES5 has issues with subclassing Errors\n * as well as other built-in types: https://github.com/Microsoft/TypeScript/issues/12123\n *\n * @param createImpl A factory function to create the actual constructor implementation. The returned\n * function should be a named function that calls `_super` internally.\n */\nexport function createErrorClass(createImpl: (_super: any) => any): T {\n const _super = (instance: any) => {\n Error.call(instance);\n instance.stack = new Error().stack;\n };\n\n const ctorFunc = createImpl(_super);\n ctorFunc.prototype = Object.create(Error.prototype);\n ctorFunc.prototype.constructor = ctorFunc;\n return ctorFunc;\n}\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface UnsubscriptionError extends Error {\n readonly errors: any[];\n}\n\nexport interface UnsubscriptionErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (errors: any[]): UnsubscriptionError;\n}\n\n/**\n * An error thrown when one or more errors have occurred during the\n * `unsubscribe` of a {@link Subscription}.\n */\nexport const UnsubscriptionError: UnsubscriptionErrorCtor = createErrorClass(\n (_super) =>\n function UnsubscriptionErrorImpl(this: any, errors: (Error | string)[]) {\n _super(this);\n this.message = errors\n ? `${errors.length} errors occurred during unsubscription:\n${errors.map((err, i) => `${i + 1}) ${err.toString()}`).join('\\n ')}`\n : '';\n this.name = 'UnsubscriptionError';\n this.errors = errors;\n }\n);\n", "/**\n * Removes an item from an array, mutating it.\n * @param arr The array to remove the item from\n * @param item The item to remove\n */\nexport function arrRemove(arr: T[] | undefined | null, item: T) {\n if (arr) {\n const index = arr.indexOf(item);\n 0 <= index && arr.splice(index, 1);\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { UnsubscriptionError } from './util/UnsubscriptionError';\nimport { SubscriptionLike, TeardownLogic, Unsubscribable } from './types';\nimport { arrRemove } from './util/arrRemove';\n\n/**\n * Represents a disposable resource, such as the execution of an Observable. A\n * Subscription has one important method, `unsubscribe`, that takes no argument\n * and just disposes the resource held by the subscription.\n *\n * Additionally, subscriptions may be grouped together through the `add()`\n * method, which will attach a child Subscription to the current Subscription.\n * When a Subscription is unsubscribed, all its children (and its grandchildren)\n * will be unsubscribed as well.\n *\n * @class Subscription\n */\nexport class Subscription implements SubscriptionLike {\n /** @nocollapse */\n public static EMPTY = (() => {\n const empty = new Subscription();\n empty.closed = true;\n return empty;\n })();\n\n /**\n * A flag to indicate whether this Subscription has already been unsubscribed.\n */\n public closed = false;\n\n private _parentage: Subscription[] | Subscription | null = null;\n\n /**\n * The list of registered finalizers to execute upon unsubscription. Adding and removing from this\n * list occurs in the {@link #add} and {@link #remove} methods.\n */\n private _finalizers: Exclude[] | null = null;\n\n /**\n * @param initialTeardown A function executed first as part of the finalization\n * process that is kicked off when {@link #unsubscribe} is called.\n */\n constructor(private initialTeardown?: () => void) {}\n\n /**\n * Disposes the resources held by the subscription. May, for instance, cancel\n * an ongoing Observable execution or cancel any other type of work that\n * started when the Subscription was created.\n * @return {void}\n */\n unsubscribe(): void {\n let errors: any[] | undefined;\n\n if (!this.closed) {\n this.closed = true;\n\n // Remove this from it's parents.\n const { _parentage } = this;\n if (_parentage) {\n this._parentage = null;\n if (Array.isArray(_parentage)) {\n for (const parent of _parentage) {\n parent.remove(this);\n }\n } else {\n _parentage.remove(this);\n }\n }\n\n const { initialTeardown: initialFinalizer } = this;\n if (isFunction(initialFinalizer)) {\n try {\n initialFinalizer();\n } catch (e) {\n errors = e instanceof UnsubscriptionError ? e.errors : [e];\n }\n }\n\n const { _finalizers } = this;\n if (_finalizers) {\n this._finalizers = null;\n for (const finalizer of _finalizers) {\n try {\n execFinalizer(finalizer);\n } catch (err) {\n errors = errors ?? [];\n if (err instanceof UnsubscriptionError) {\n errors = [...errors, ...err.errors];\n } else {\n errors.push(err);\n }\n }\n }\n }\n\n if (errors) {\n throw new UnsubscriptionError(errors);\n }\n }\n }\n\n /**\n * Adds a finalizer to this subscription, so that finalization will be unsubscribed/called\n * when this subscription is unsubscribed. If this subscription is already {@link #closed},\n * because it has already been unsubscribed, then whatever finalizer is passed to it\n * will automatically be executed (unless the finalizer itself is also a closed subscription).\n *\n * Closed Subscriptions cannot be added as finalizers to any subscription. Adding a closed\n * subscription to a any subscription will result in no operation. (A noop).\n *\n * Adding a subscription to itself, or adding `null` or `undefined` will not perform any\n * operation at all. (A noop).\n *\n * `Subscription` instances that are added to this instance will automatically remove themselves\n * if they are unsubscribed. Functions and {@link Unsubscribable} objects that you wish to remove\n * will need to be removed manually with {@link #remove}\n *\n * @param teardown The finalization logic to add to this subscription.\n */\n add(teardown: TeardownLogic): void {\n // Only add the finalizer if it's not undefined\n // and don't add a subscription to itself.\n if (teardown && teardown !== this) {\n if (this.closed) {\n // If this subscription is already closed,\n // execute whatever finalizer is handed to it automatically.\n execFinalizer(teardown);\n } else {\n if (teardown instanceof Subscription) {\n // We don't add closed subscriptions, and we don't add the same subscription\n // twice. Subscription unsubscribe is idempotent.\n if (teardown.closed || teardown._hasParent(this)) {\n return;\n }\n teardown._addParent(this);\n }\n (this._finalizers = this._finalizers ?? []).push(teardown);\n }\n }\n }\n\n /**\n * Checks to see if a this subscription already has a particular parent.\n * This will signal that this subscription has already been added to the parent in question.\n * @param parent the parent to check for\n */\n private _hasParent(parent: Subscription) {\n const { _parentage } = this;\n return _parentage === parent || (Array.isArray(_parentage) && _parentage.includes(parent));\n }\n\n /**\n * Adds a parent to this subscription so it can be removed from the parent if it\n * unsubscribes on it's own.\n *\n * NOTE: THIS ASSUMES THAT {@link _hasParent} HAS ALREADY BEEN CHECKED.\n * @param parent The parent subscription to add\n */\n private _addParent(parent: Subscription) {\n const { _parentage } = this;\n this._parentage = Array.isArray(_parentage) ? (_parentage.push(parent), _parentage) : _parentage ? [_parentage, parent] : parent;\n }\n\n /**\n * Called on a child when it is removed via {@link #remove}.\n * @param parent The parent to remove\n */\n private _removeParent(parent: Subscription) {\n const { _parentage } = this;\n if (_parentage === parent) {\n this._parentage = null;\n } else if (Array.isArray(_parentage)) {\n arrRemove(_parentage, parent);\n }\n }\n\n /**\n * Removes a finalizer from this subscription that was previously added with the {@link #add} method.\n *\n * Note that `Subscription` instances, when unsubscribed, will automatically remove themselves\n * from every other `Subscription` they have been added to. This means that using the `remove` method\n * is not a common thing and should be used thoughtfully.\n *\n * If you add the same finalizer instance of a function or an unsubscribable object to a `Subscription` instance\n * more than once, you will need to call `remove` the same number of times to remove all instances.\n *\n * All finalizer instances are removed to free up memory upon unsubscription.\n *\n * @param teardown The finalizer to remove from this subscription\n */\n remove(teardown: Exclude): void {\n const { _finalizers } = this;\n _finalizers && arrRemove(_finalizers, teardown);\n\n if (teardown instanceof Subscription) {\n teardown._removeParent(this);\n }\n }\n}\n\nexport const EMPTY_SUBSCRIPTION = Subscription.EMPTY;\n\nexport function isSubscription(value: any): value is Subscription {\n return (\n value instanceof Subscription ||\n (value && 'closed' in value && isFunction(value.remove) && isFunction(value.add) && isFunction(value.unsubscribe))\n );\n}\n\nfunction execFinalizer(finalizer: Unsubscribable | (() => void)) {\n if (isFunction(finalizer)) {\n finalizer();\n } else {\n finalizer.unsubscribe();\n }\n}\n", "import { Subscriber } from './Subscriber';\nimport { ObservableNotification } from './types';\n\n/**\n * The {@link GlobalConfig} object for RxJS. It is used to configure things\n * like how to react on unhandled errors.\n */\nexport const config: GlobalConfig = {\n onUnhandledError: null,\n onStoppedNotification: null,\n Promise: undefined,\n useDeprecatedSynchronousErrorHandling: false,\n useDeprecatedNextContext: false,\n};\n\n/**\n * The global configuration object for RxJS, used to configure things\n * like how to react on unhandled errors. Accessible via {@link config}\n * object.\n */\nexport interface GlobalConfig {\n /**\n * A registration point for unhandled errors from RxJS. These are errors that\n * cannot were not handled by consuming code in the usual subscription path. For\n * example, if you have this configured, and you subscribe to an observable without\n * providing an error handler, errors from that subscription will end up here. This\n * will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onUnhandledError: ((err: any) => void) | null;\n\n /**\n * A registration point for notifications that cannot be sent to subscribers because they\n * have completed, errored or have been explicitly unsubscribed. By default, next, complete\n * and error notifications sent to stopped subscribers are noops. However, sometimes callers\n * might want a different behavior. For example, with sources that attempt to report errors\n * to stopped subscribers, a caller can configure RxJS to throw an unhandled error instead.\n * This will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onStoppedNotification: ((notification: ObservableNotification, subscriber: Subscriber) => void) | null;\n\n /**\n * The promise constructor used by default for {@link Observable#toPromise toPromise} and {@link Observable#forEach forEach}\n * methods.\n *\n * @deprecated As of version 8, RxJS will no longer support this sort of injection of a\n * Promise constructor. If you need a Promise implementation other than native promises,\n * please polyfill/patch Promise as you see appropriate. Will be removed in v8.\n */\n Promise?: PromiseConstructorLike;\n\n /**\n * If true, turns on synchronous error rethrowing, which is a deprecated behavior\n * in v6 and higher. This behavior enables bad patterns like wrapping a subscribe\n * call in a try/catch block. It also enables producer interference, a nasty bug\n * where a multicast can be broken for all observers by a downstream consumer with\n * an unhandled error. DO NOT USE THIS FLAG UNLESS IT'S NEEDED TO BUY TIME\n * FOR MIGRATION REASONS.\n *\n * @deprecated As of version 8, RxJS will no longer support synchronous throwing\n * of unhandled errors. All errors will be thrown on a separate call stack to prevent bad\n * behaviors described above. Will be removed in v8.\n */\n useDeprecatedSynchronousErrorHandling: boolean;\n\n /**\n * If true, enables an as-of-yet undocumented feature from v5: The ability to access\n * `unsubscribe()` via `this` context in `next` functions created in observers passed\n * to `subscribe`.\n *\n * This is being removed because the performance was severely problematic, and it could also cause\n * issues when types other than POJOs are passed to subscribe as subscribers, as they will likely have\n * their `this` context overwritten.\n *\n * @deprecated As of version 8, RxJS will no longer support altering the\n * context of next functions provided as part of an observer to Subscribe. Instead,\n * you will have access to a subscription or a signal or token that will allow you to do things like\n * unsubscribe and test closed status. Will be removed in v8.\n */\n useDeprecatedNextContext: boolean;\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetTimeoutFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearTimeoutFunction = (handle: TimerHandle) => void;\n\ninterface TimeoutProvider {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n delegate:\n | {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n }\n | undefined;\n}\n\nexport const timeoutProvider: TimeoutProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setTimeout(handler: () => void, timeout?: number, ...args) {\n const { delegate } = timeoutProvider;\n if (delegate?.setTimeout) {\n return delegate.setTimeout(handler, timeout, ...args);\n }\n return setTimeout(handler, timeout, ...args);\n },\n clearTimeout(handle) {\n const { delegate } = timeoutProvider;\n return (delegate?.clearTimeout || clearTimeout)(handle as any);\n },\n delegate: undefined,\n};\n", "import { config } from '../config';\nimport { timeoutProvider } from '../scheduler/timeoutProvider';\n\n/**\n * Handles an error on another job either with the user-configured {@link onUnhandledError},\n * or by throwing it on that new job so it can be picked up by `window.onerror`, `process.on('error')`, etc.\n *\n * This should be called whenever there is an error that is out-of-band with the subscription\n * or when an error hits a terminal boundary of the subscription and no error handler was provided.\n *\n * @param err the error to report\n */\nexport function reportUnhandledError(err: any) {\n timeoutProvider.setTimeout(() => {\n const { onUnhandledError } = config;\n if (onUnhandledError) {\n // Execute the user-configured error handler.\n onUnhandledError(err);\n } else {\n // Throw so it is picked up by the runtime's uncaught error mechanism.\n throw err;\n }\n });\n}\n", "/* tslint:disable:no-empty */\nexport function noop() { }\n", "import { CompleteNotification, NextNotification, ErrorNotification } from './types';\n\n/**\n * A completion object optimized for memory use and created to be the\n * same \"shape\" as other notifications in v8.\n * @internal\n */\nexport const COMPLETE_NOTIFICATION = (() => createNotification('C', undefined, undefined) as CompleteNotification)();\n\n/**\n * Internal use only. Creates an optimized error notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function errorNotification(error: any): ErrorNotification {\n return createNotification('E', undefined, error) as any;\n}\n\n/**\n * Internal use only. Creates an optimized next notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function nextNotification(value: T) {\n return createNotification('N', value, undefined) as NextNotification;\n}\n\n/**\n * Ensures that all notifications created internally have the same \"shape\" in v8.\n *\n * TODO: This is only exported to support a crazy legacy test in `groupBy`.\n * @internal\n */\nexport function createNotification(kind: 'N' | 'E' | 'C', value: any, error: any) {\n return {\n kind,\n value,\n error,\n };\n}\n", "import { config } from '../config';\n\nlet context: { errorThrown: boolean; error: any } | null = null;\n\n/**\n * Handles dealing with errors for super-gross mode. Creates a context, in which\n * any synchronously thrown errors will be passed to {@link captureError}. Which\n * will record the error such that it will be rethrown after the call back is complete.\n * TODO: Remove in v8\n * @param cb An immediately executed function.\n */\nexport function errorContext(cb: () => void) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n const isRoot = !context;\n if (isRoot) {\n context = { errorThrown: false, error: null };\n }\n cb();\n if (isRoot) {\n const { errorThrown, error } = context!;\n context = null;\n if (errorThrown) {\n throw error;\n }\n }\n } else {\n // This is the general non-deprecated path for everyone that\n // isn't crazy enough to use super-gross mode (useDeprecatedSynchronousErrorHandling)\n cb();\n }\n}\n\n/**\n * Captures errors only in super-gross mode.\n * @param err the error to capture\n */\nexport function captureError(err: any) {\n if (config.useDeprecatedSynchronousErrorHandling && context) {\n context.errorThrown = true;\n context.error = err;\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { Observer, ObservableNotification } from './types';\nimport { isSubscription, Subscription } from './Subscription';\nimport { config } from './config';\nimport { reportUnhandledError } from './util/reportUnhandledError';\nimport { noop } from './util/noop';\nimport { nextNotification, errorNotification, COMPLETE_NOTIFICATION } from './NotificationFactories';\nimport { timeoutProvider } from './scheduler/timeoutProvider';\nimport { captureError } from './util/errorContext';\n\n/**\n * Implements the {@link Observer} interface and extends the\n * {@link Subscription} class. While the {@link Observer} is the public API for\n * consuming the values of an {@link Observable}, all Observers get converted to\n * a Subscriber, in order to provide Subscription-like capabilities such as\n * `unsubscribe`. Subscriber is a common type in RxJS, and crucial for\n * implementing operators, but it is rarely used as a public API.\n *\n * @class Subscriber\n */\nexport class Subscriber extends Subscription implements Observer {\n /**\n * A static factory for a Subscriber, given a (potentially partial) definition\n * of an Observer.\n * @param next The `next` callback of an Observer.\n * @param error The `error` callback of an\n * Observer.\n * @param complete The `complete` callback of an\n * Observer.\n * @return A Subscriber wrapping the (partially defined)\n * Observer represented by the given arguments.\n * @nocollapse\n * @deprecated Do not use. Will be removed in v8. There is no replacement for this\n * method, and there is no reason to be creating instances of `Subscriber` directly.\n * If you have a specific use case, please file an issue.\n */\n static create(next?: (x?: T) => void, error?: (e?: any) => void, complete?: () => void): Subscriber {\n return new SafeSubscriber(next, error, complete);\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected isStopped: boolean = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected destination: Subscriber | Observer; // this `any` is the escape hatch to erase extra type param (e.g. R)\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * There is no reason to directly create an instance of Subscriber. This type is exported for typings reasons.\n */\n constructor(destination?: Subscriber | Observer) {\n super();\n if (destination) {\n this.destination = destination;\n // Automatically chain subscriptions together here.\n // if destination is a Subscription, then it is a Subscriber.\n if (isSubscription(destination)) {\n destination.add(this);\n }\n } else {\n this.destination = EMPTY_OBSERVER;\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `next` from\n * the Observable, with a value. The Observable may call this method 0 or more\n * times.\n * @param {T} [value] The `next` value.\n * @return {void}\n */\n next(value?: T): void {\n if (this.isStopped) {\n handleStoppedNotification(nextNotification(value), this);\n } else {\n this._next(value!);\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `error` from\n * the Observable, with an attached `Error`. Notifies the Observer that\n * the Observable has experienced an error condition.\n * @param {any} [err] The `error` exception.\n * @return {void}\n */\n error(err?: any): void {\n if (this.isStopped) {\n handleStoppedNotification(errorNotification(err), this);\n } else {\n this.isStopped = true;\n this._error(err);\n }\n }\n\n /**\n * The {@link Observer} callback to receive a valueless notification of type\n * `complete` from the Observable. Notifies the Observer that the Observable\n * has finished sending push-based notifications.\n * @return {void}\n */\n complete(): void {\n if (this.isStopped) {\n handleStoppedNotification(COMPLETE_NOTIFICATION, this);\n } else {\n this.isStopped = true;\n this._complete();\n }\n }\n\n unsubscribe(): void {\n if (!this.closed) {\n this.isStopped = true;\n super.unsubscribe();\n this.destination = null!;\n }\n }\n\n protected _next(value: T): void {\n this.destination.next(value);\n }\n\n protected _error(err: any): void {\n try {\n this.destination.error(err);\n } finally {\n this.unsubscribe();\n }\n }\n\n protected _complete(): void {\n try {\n this.destination.complete();\n } finally {\n this.unsubscribe();\n }\n }\n}\n\n/**\n * This bind is captured here because we want to be able to have\n * compatibility with monoid libraries that tend to use a method named\n * `bind`. In particular, a library called Monio requires this.\n */\nconst _bind = Function.prototype.bind;\n\nfunction bind any>(fn: Fn, thisArg: any): Fn {\n return _bind.call(fn, thisArg);\n}\n\n/**\n * Internal optimization only, DO NOT EXPOSE.\n * @internal\n */\nclass ConsumerObserver implements Observer {\n constructor(private partialObserver: Partial>) {}\n\n next(value: T): void {\n const { partialObserver } = this;\n if (partialObserver.next) {\n try {\n partialObserver.next(value);\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n\n error(err: any): void {\n const { partialObserver } = this;\n if (partialObserver.error) {\n try {\n partialObserver.error(err);\n } catch (error) {\n handleUnhandledError(error);\n }\n } else {\n handleUnhandledError(err);\n }\n }\n\n complete(): void {\n const { partialObserver } = this;\n if (partialObserver.complete) {\n try {\n partialObserver.complete();\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n}\n\nexport class SafeSubscriber extends Subscriber {\n constructor(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((e?: any) => void) | null,\n complete?: (() => void) | null\n ) {\n super();\n\n let partialObserver: Partial>;\n if (isFunction(observerOrNext) || !observerOrNext) {\n // The first argument is a function, not an observer. The next\n // two arguments *could* be observers, or they could be empty.\n partialObserver = {\n next: (observerOrNext ?? undefined) as (((value: T) => void) | undefined),\n error: error ?? undefined,\n complete: complete ?? undefined,\n };\n } else {\n // The first argument is a partial observer.\n let context: any;\n if (this && config.useDeprecatedNextContext) {\n // This is a deprecated path that made `this.unsubscribe()` available in\n // next handler functions passed to subscribe. This only exists behind a flag\n // now, as it is *very* slow.\n context = Object.create(observerOrNext);\n context.unsubscribe = () => this.unsubscribe();\n partialObserver = {\n next: observerOrNext.next && bind(observerOrNext.next, context),\n error: observerOrNext.error && bind(observerOrNext.error, context),\n complete: observerOrNext.complete && bind(observerOrNext.complete, context),\n };\n } else {\n // The \"normal\" path. Just use the partial observer directly.\n partialObserver = observerOrNext;\n }\n }\n\n // Wrap the partial observer to ensure it's a full observer, and\n // make sure proper error handling is accounted for.\n this.destination = new ConsumerObserver(partialObserver);\n }\n}\n\nfunction handleUnhandledError(error: any) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n captureError(error);\n } else {\n // Ideal path, we report this as an unhandled error,\n // which is thrown on a new call stack.\n reportUnhandledError(error);\n }\n}\n\n/**\n * An error handler used when no error handler was supplied\n * to the SafeSubscriber -- meaning no error handler was supplied\n * do the `subscribe` call on our observable.\n * @param err The error to handle\n */\nfunction defaultErrorHandler(err: any) {\n throw err;\n}\n\n/**\n * A handler for notifications that cannot be sent to a stopped subscriber.\n * @param notification The notification being sent\n * @param subscriber The stopped subscriber\n */\nfunction handleStoppedNotification(notification: ObservableNotification, subscriber: Subscriber) {\n const { onStoppedNotification } = config;\n onStoppedNotification && timeoutProvider.setTimeout(() => onStoppedNotification(notification, subscriber));\n}\n\n/**\n * The observer used as a stub for subscriptions where the user did not\n * pass any arguments to `subscribe`. Comes with the default error handling\n * behavior.\n */\nexport const EMPTY_OBSERVER: Readonly> & { closed: true } = {\n closed: true,\n next: noop,\n error: defaultErrorHandler,\n complete: noop,\n};\n", "/**\n * Symbol.observable or a string \"@@observable\". Used for interop\n *\n * @deprecated We will no longer be exporting this symbol in upcoming versions of RxJS.\n * Instead polyfill and use Symbol.observable directly *or* use https://www.npmjs.com/package/symbol-observable\n */\nexport const observable: string | symbol = (() => (typeof Symbol === 'function' && Symbol.observable) || '@@observable')();\n", "/**\n * This function takes one parameter and just returns it. Simply put,\n * this is like `(x: T): T => x`.\n *\n * ## Examples\n *\n * This is useful in some cases when using things like `mergeMap`\n *\n * ```ts\n * import { interval, take, map, range, mergeMap, identity } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(5));\n *\n * const result$ = source$.pipe(\n * map(i => range(i)),\n * mergeMap(identity) // same as mergeMap(x => x)\n * );\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * Or when you want to selectively apply an operator\n *\n * ```ts\n * import { interval, take, identity } from 'rxjs';\n *\n * const shouldLimit = () => Math.random() < 0.5;\n *\n * const source$ = interval(1000);\n *\n * const result$ = source$.pipe(shouldLimit() ? take(5) : identity);\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * @param x Any value that is returned by this function\n * @returns The value passed as the first parameter to this function\n */\nexport function identity(x: T): T {\n return x;\n}\n", "import { identity } from './identity';\nimport { UnaryFunction } from '../types';\n\nexport function pipe(): typeof identity;\nexport function pipe(fn1: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction, fn3: UnaryFunction): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction,\n ...fns: UnaryFunction[]\n): UnaryFunction;\n\n/**\n * pipe() can be called on one or more functions, each of which can take one argument (\"UnaryFunction\")\n * and uses it to return a value.\n * It returns a function that takes one argument, passes it to the first UnaryFunction, and then\n * passes the result to the next one, passes that result to the next one, and so on. \n */\nexport function pipe(...fns: Array>): UnaryFunction {\n return pipeFromArray(fns);\n}\n\n/** @internal */\nexport function pipeFromArray(fns: Array>): UnaryFunction {\n if (fns.length === 0) {\n return identity as UnaryFunction;\n }\n\n if (fns.length === 1) {\n return fns[0];\n }\n\n return function piped(input: T): R {\n return fns.reduce((prev: any, fn: UnaryFunction) => fn(prev), input as any);\n };\n}\n", "import { Operator } from './Operator';\nimport { SafeSubscriber, Subscriber } from './Subscriber';\nimport { isSubscription, Subscription } from './Subscription';\nimport { TeardownLogic, OperatorFunction, Subscribable, Observer } from './types';\nimport { observable as Symbol_observable } from './symbol/observable';\nimport { pipeFromArray } from './util/pipe';\nimport { config } from './config';\nimport { isFunction } from './util/isFunction';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A representation of any set of values over any amount of time. This is the most basic building block\n * of RxJS.\n *\n * @class Observable\n */\nexport class Observable implements Subscribable {\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n source: Observable | undefined;\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n operator: Operator | undefined;\n\n /**\n * @constructor\n * @param {Function} subscribe the function that is called when the Observable is\n * initially subscribed to. This function is given a Subscriber, to which new values\n * can be `next`ed, or an `error` method can be called to raise an error, or\n * `complete` can be called to notify of a successful completion.\n */\n constructor(subscribe?: (this: Observable, subscriber: Subscriber) => TeardownLogic) {\n if (subscribe) {\n this._subscribe = subscribe;\n }\n }\n\n // HACK: Since TypeScript inherits static properties too, we have to\n // fight against TypeScript here so Subject can have a different static create signature\n /**\n * Creates a new Observable by calling the Observable constructor\n * @owner Observable\n * @method create\n * @param {Function} subscribe? the subscriber function to be passed to the Observable constructor\n * @return {Observable} a new observable\n * @nocollapse\n * @deprecated Use `new Observable()` instead. Will be removed in v8.\n */\n static create: (...args: any[]) => any = (subscribe?: (subscriber: Subscriber) => TeardownLogic) => {\n return new Observable(subscribe);\n };\n\n /**\n * Creates a new Observable, with this Observable instance as the source, and the passed\n * operator defined as the new observable's operator.\n * @method lift\n * @param operator the operator defining the operation to take on the observable\n * @return a new observable with the Operator applied\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * If you have implemented an operator using `lift`, it is recommended that you create an\n * operator by simply returning `new Observable()` directly. See \"Creating new operators from\n * scratch\" section here: https://rxjs.dev/guide/operators\n */\n lift(operator?: Operator): Observable {\n const observable = new Observable();\n observable.source = this;\n observable.operator = operator;\n return observable;\n }\n\n subscribe(observerOrNext?: Partial> | ((value: T) => void)): Subscription;\n /** @deprecated Instead of passing separate callback arguments, use an observer argument. Signatures taking separate callback arguments will be removed in v8. Details: https://rxjs.dev/deprecations/subscribe-arguments */\n subscribe(next?: ((value: T) => void) | null, error?: ((error: any) => void) | null, complete?: (() => void) | null): Subscription;\n /**\n * Invokes an execution of an Observable and registers Observer handlers for notifications it will emit.\n *\n * Use it when you have all these Observables, but still nothing is happening.\n *\n * `subscribe` is not a regular operator, but a method that calls Observable's internal `subscribe` function. It\n * might be for example a function that you passed to Observable's constructor, but most of the time it is\n * a library implementation, which defines what will be emitted by an Observable, and when it be will emitted. This means\n * that calling `subscribe` is actually the moment when Observable starts its work, not when it is created, as it is often\n * the thought.\n *\n * Apart from starting the execution of an Observable, this method allows you to listen for values\n * that an Observable emits, as well as for when it completes or errors. You can achieve this in two\n * of the following ways.\n *\n * The first way is creating an object that implements {@link Observer} interface. It should have methods\n * defined by that interface, but note that it should be just a regular JavaScript object, which you can create\n * yourself in any way you want (ES6 class, classic function constructor, object literal etc.). In particular, do\n * not attempt to use any RxJS implementation details to create Observers - you don't need them. Remember also\n * that your object does not have to implement all methods. If you find yourself creating a method that doesn't\n * do anything, you can simply omit it. Note however, if the `error` method is not provided and an error happens,\n * it will be thrown asynchronously. Errors thrown asynchronously cannot be caught using `try`/`catch`. Instead,\n * use the {@link onUnhandledError} configuration option or use a runtime handler (like `window.onerror` or\n * `process.on('error)`) to be notified of unhandled errors. Because of this, it's recommended that you provide\n * an `error` method to avoid missing thrown errors.\n *\n * The second way is to give up on Observer object altogether and simply provide callback functions in place of its methods.\n * This means you can provide three functions as arguments to `subscribe`, where the first function is equivalent\n * of a `next` method, the second of an `error` method and the third of a `complete` method. Just as in case of an Observer,\n * if you do not need to listen for something, you can omit a function by passing `undefined` or `null`,\n * since `subscribe` recognizes these functions by where they were placed in function call. When it comes\n * to the `error` function, as with an Observer, if not provided, errors emitted by an Observable will be thrown asynchronously.\n *\n * You can, however, subscribe with no parameters at all. This may be the case where you're not interested in terminal events\n * and you also handled emissions internally by using operators (e.g. using `tap`).\n *\n * Whichever style of calling `subscribe` you use, in both cases it returns a Subscription object.\n * This object allows you to call `unsubscribe` on it, which in turn will stop the work that an Observable does and will clean\n * up all resources that an Observable used. Note that cancelling a subscription will not call `complete` callback\n * provided to `subscribe` function, which is reserved for a regular completion signal that comes from an Observable.\n *\n * Remember that callbacks provided to `subscribe` are not guaranteed to be called asynchronously.\n * It is an Observable itself that decides when these functions will be called. For example {@link of}\n * by default emits all its values synchronously. Always check documentation for how given Observable\n * will behave when subscribed and if its default behavior can be modified with a `scheduler`.\n *\n * #### Examples\n *\n * Subscribe with an {@link guide/observer Observer}\n *\n * ```ts\n * import { of } from 'rxjs';\n *\n * const sumObserver = {\n * sum: 0,\n * next(value) {\n * console.log('Adding: ' + value);\n * this.sum = this.sum + value;\n * },\n * error() {\n * // We actually could just remove this method,\n * // since we do not really care about errors right now.\n * },\n * complete() {\n * console.log('Sum equals: ' + this.sum);\n * }\n * };\n *\n * of(1, 2, 3) // Synchronously emits 1, 2, 3 and then completes.\n * .subscribe(sumObserver);\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Subscribe with functions ({@link deprecations/subscribe-arguments deprecated})\n *\n * ```ts\n * import { of } from 'rxjs'\n *\n * let sum = 0;\n *\n * of(1, 2, 3).subscribe(\n * value => {\n * console.log('Adding: ' + value);\n * sum = sum + value;\n * },\n * undefined,\n * () => console.log('Sum equals: ' + sum)\n * );\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Cancel a subscription\n *\n * ```ts\n * import { interval } from 'rxjs';\n *\n * const subscription = interval(1000).subscribe({\n * next(num) {\n * console.log(num)\n * },\n * complete() {\n * // Will not be called, even when cancelling subscription.\n * console.log('completed!');\n * }\n * });\n *\n * setTimeout(() => {\n * subscription.unsubscribe();\n * console.log('unsubscribed!');\n * }, 2500);\n *\n * // Logs:\n * // 0 after 1s\n * // 1 after 2s\n * // 'unsubscribed!' after 2.5s\n * ```\n *\n * @param {Observer|Function} observerOrNext (optional) Either an observer with methods to be called,\n * or the first of three possible handlers, which is the handler for each value emitted from the subscribed\n * Observable.\n * @param {Function} error (optional) A handler for a terminal event resulting from an error. If no error handler is provided,\n * the error will be thrown asynchronously as unhandled.\n * @param {Function} complete (optional) A handler for a terminal event resulting from successful completion.\n * @return {Subscription} a subscription reference to the registered handlers\n * @method subscribe\n */\n subscribe(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((error: any) => void) | null,\n complete?: (() => void) | null\n ): Subscription {\n const subscriber = isSubscriber(observerOrNext) ? observerOrNext : new SafeSubscriber(observerOrNext, error, complete);\n\n errorContext(() => {\n const { operator, source } = this;\n subscriber.add(\n operator\n ? // We're dealing with a subscription in the\n // operator chain to one of our lifted operators.\n operator.call(subscriber, source)\n : source\n ? // If `source` has a value, but `operator` does not, something that\n // had intimate knowledge of our API, like our `Subject`, must have\n // set it. We're going to just call `_subscribe` directly.\n this._subscribe(subscriber)\n : // In all other cases, we're likely wrapping a user-provided initializer\n // function, so we need to catch errors and handle them appropriately.\n this._trySubscribe(subscriber)\n );\n });\n\n return subscriber;\n }\n\n /** @internal */\n protected _trySubscribe(sink: Subscriber): TeardownLogic {\n try {\n return this._subscribe(sink);\n } catch (err) {\n // We don't need to return anything in this case,\n // because it's just going to try to `add()` to a subscription\n // above.\n sink.error(err);\n }\n }\n\n /**\n * Used as a NON-CANCELLABLE means of subscribing to an observable, for use with\n * APIs that expect promises, like `async/await`. You cannot unsubscribe from this.\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * #### Example\n *\n * ```ts\n * import { interval, take } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(4));\n *\n * async function getTotal() {\n * let total = 0;\n *\n * await source$.forEach(value => {\n * total += value;\n * console.log('observable -> ' + value);\n * });\n *\n * return total;\n * }\n *\n * getTotal().then(\n * total => console.log('Total: ' + total)\n * );\n *\n * // Expected:\n * // 'observable -> 0'\n * // 'observable -> 1'\n * // 'observable -> 2'\n * // 'observable -> 3'\n * // 'Total: 6'\n * ```\n *\n * @param next a handler for each value emitted by the observable\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n */\n forEach(next: (value: T) => void): Promise;\n\n /**\n * @param next a handler for each value emitted by the observable\n * @param promiseCtor a constructor function used to instantiate the Promise\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n * @deprecated Passing a Promise constructor will no longer be available\n * in upcoming versions of RxJS. This is because it adds weight to the library, for very\n * little benefit. If you need this functionality, it is recommended that you either\n * polyfill Promise, or you create an adapter to convert the returned native promise\n * to whatever promise implementation you wanted. Will be removed in v8.\n */\n forEach(next: (value: T) => void, promiseCtor: PromiseConstructorLike): Promise;\n\n forEach(next: (value: T) => void, promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n const subscriber = new SafeSubscriber({\n next: (value) => {\n try {\n next(value);\n } catch (err) {\n reject(err);\n subscriber.unsubscribe();\n }\n },\n error: reject,\n complete: resolve,\n });\n this.subscribe(subscriber);\n }) as Promise;\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): TeardownLogic {\n return this.source?.subscribe(subscriber);\n }\n\n /**\n * An interop point defined by the es7-observable spec https://github.com/zenparsing/es-observable\n * @method Symbol.observable\n * @return {Observable} this instance of the observable\n */\n [Symbol_observable]() {\n return this;\n }\n\n /* tslint:disable:max-line-length */\n pipe(): Observable;\n pipe(op1: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction, op3: OperatorFunction): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction,\n ...operations: OperatorFunction[]\n ): Observable;\n /* tslint:enable:max-line-length */\n\n /**\n * Used to stitch together functional operators into a chain.\n * @method pipe\n * @return {Observable} the Observable result of all of the operators having\n * been called in the order they were passed in.\n *\n * ## Example\n *\n * ```ts\n * import { interval, filter, map, scan } from 'rxjs';\n *\n * interval(1000)\n * .pipe(\n * filter(x => x % 2 === 0),\n * map(x => x + x),\n * scan((acc, x) => acc + x)\n * )\n * .subscribe(x => console.log(x));\n * ```\n */\n pipe(...operations: OperatorFunction[]): Observable {\n return pipeFromArray(operations)(this);\n }\n\n /* tslint:disable:max-line-length */\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: typeof Promise): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: PromiseConstructorLike): Promise;\n /* tslint:enable:max-line-length */\n\n /**\n * Subscribe to this Observable and get a Promise resolving on\n * `complete` with the last emission (if any).\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * @method toPromise\n * @param [promiseCtor] a constructor function used to instantiate\n * the Promise\n * @return A Promise that resolves with the last value emit, or\n * rejects on an error. If there were no emissions, Promise\n * resolves with undefined.\n * @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise\n */\n toPromise(promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n let value: T | undefined;\n this.subscribe(\n (x: T) => (value = x),\n (err: any) => reject(err),\n () => resolve(value)\n );\n }) as Promise;\n }\n}\n\n/**\n * Decides between a passed promise constructor from consuming code,\n * A default configured promise constructor, and the native promise\n * constructor and returns it. If nothing can be found, it will throw\n * an error.\n * @param promiseCtor The optional promise constructor to passed by consuming code\n */\nfunction getPromiseCtor(promiseCtor: PromiseConstructorLike | undefined) {\n return promiseCtor ?? config.Promise ?? Promise;\n}\n\nfunction isObserver(value: any): value is Observer {\n return value && isFunction(value.next) && isFunction(value.error) && isFunction(value.complete);\n}\n\nfunction isSubscriber(value: any): value is Subscriber {\n return (value && value instanceof Subscriber) || (isObserver(value) && isSubscription(value));\n}\n", "import { Observable } from '../Observable';\nimport { Subscriber } from '../Subscriber';\nimport { OperatorFunction } from '../types';\nimport { isFunction } from './isFunction';\n\n/**\n * Used to determine if an object is an Observable with a lift function.\n */\nexport function hasLift(source: any): source is { lift: InstanceType['lift'] } {\n return isFunction(source?.lift);\n}\n\n/**\n * Creates an `OperatorFunction`. Used to define operators throughout the library in a concise way.\n * @param init The logic to connect the liftedSource to the subscriber at the moment of subscription.\n */\nexport function operate(\n init: (liftedSource: Observable, subscriber: Subscriber) => (() => void) | void\n): OperatorFunction {\n return (source: Observable) => {\n if (hasLift(source)) {\n return source.lift(function (this: Subscriber, liftedSource: Observable) {\n try {\n return init(liftedSource, this);\n } catch (err) {\n this.error(err);\n }\n });\n }\n throw new TypeError('Unable to lift unknown Observable type');\n };\n}\n", "import { Subscriber } from '../Subscriber';\n\n/**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional teardown logic here. This will only be called on teardown if the\n * subscriber itself is not already closed. This is called after all other teardown logic is executed.\n */\nexport function createOperatorSubscriber(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n onFinalize?: () => void\n): Subscriber {\n return new OperatorSubscriber(destination, onNext, onComplete, onError, onFinalize);\n}\n\n/**\n * A generic helper for allowing operators to be created with a Subscriber and\n * use closures to capture necessary state from the operator function itself.\n */\nexport class OperatorSubscriber extends Subscriber {\n /**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional finalization logic here. This will only be called on finalization if the\n * subscriber itself is not already closed. This is called after all other finalization logic is executed.\n * @param shouldUnsubscribe An optional check to see if an unsubscribe call should truly unsubscribe.\n * NOTE: This currently **ONLY** exists to support the strange behavior of {@link groupBy}, where unsubscription\n * to the resulting observable does not actually disconnect from the source if there are active subscriptions\n * to any grouped observable. (DO NOT EXPOSE OR USE EXTERNALLY!!!)\n */\n constructor(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n private onFinalize?: () => void,\n private shouldUnsubscribe?: () => boolean\n ) {\n // It's important - for performance reasons - that all of this class's\n // members are initialized and that they are always initialized in the same\n // order. This will ensure that all OperatorSubscriber instances have the\n // same hidden class in V8. This, in turn, will help keep the number of\n // hidden classes involved in property accesses within the base class as\n // low as possible. If the number of hidden classes involved exceeds four,\n // the property accesses will become megamorphic and performance penalties\n // will be incurred - i.e. inline caches won't be used.\n //\n // The reasons for ensuring all instances have the same hidden class are\n // further discussed in this blog post from Benedikt Meurer:\n // https://benediktmeurer.de/2018/03/23/impact-of-polymorphism-on-component-based-frameworks-like-react/\n super(destination);\n this._next = onNext\n ? function (this: OperatorSubscriber, value: T) {\n try {\n onNext(value);\n } catch (err) {\n destination.error(err);\n }\n }\n : super._next;\n this._error = onError\n ? function (this: OperatorSubscriber, err: any) {\n try {\n onError(err);\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._error;\n this._complete = onComplete\n ? function (this: OperatorSubscriber) {\n try {\n onComplete();\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._complete;\n }\n\n unsubscribe() {\n if (!this.shouldUnsubscribe || this.shouldUnsubscribe()) {\n const { closed } = this;\n super.unsubscribe();\n // Execute additional teardown if we have any and we didn't already do so.\n !closed && this.onFinalize?.();\n }\n }\n}\n", "import { Subscription } from '../Subscription';\n\ninterface AnimationFrameProvider {\n schedule(callback: FrameRequestCallback): Subscription;\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n delegate:\n | {\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n }\n | undefined;\n}\n\nexport const animationFrameProvider: AnimationFrameProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n schedule(callback) {\n let request = requestAnimationFrame;\n let cancel: typeof cancelAnimationFrame | undefined = cancelAnimationFrame;\n const { delegate } = animationFrameProvider;\n if (delegate) {\n request = delegate.requestAnimationFrame;\n cancel = delegate.cancelAnimationFrame;\n }\n const handle = request((timestamp) => {\n // Clear the cancel function. The request has been fulfilled, so\n // attempting to cancel the request upon unsubscription would be\n // pointless.\n cancel = undefined;\n callback(timestamp);\n });\n return new Subscription(() => cancel?.(handle));\n },\n requestAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.requestAnimationFrame || requestAnimationFrame)(...args);\n },\n cancelAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.cancelAnimationFrame || cancelAnimationFrame)(...args);\n },\n delegate: undefined,\n};\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface ObjectUnsubscribedError extends Error {}\n\nexport interface ObjectUnsubscribedErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (): ObjectUnsubscribedError;\n}\n\n/**\n * An error thrown when an action is invalid because the object has been\n * unsubscribed.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n *\n * @class ObjectUnsubscribedError\n */\nexport const ObjectUnsubscribedError: ObjectUnsubscribedErrorCtor = createErrorClass(\n (_super) =>\n function ObjectUnsubscribedErrorImpl(this: any) {\n _super(this);\n this.name = 'ObjectUnsubscribedError';\n this.message = 'object unsubscribed';\n }\n);\n", "import { Operator } from './Operator';\nimport { Observable } from './Observable';\nimport { Subscriber } from './Subscriber';\nimport { Subscription, EMPTY_SUBSCRIPTION } from './Subscription';\nimport { Observer, SubscriptionLike, TeardownLogic } from './types';\nimport { ObjectUnsubscribedError } from './util/ObjectUnsubscribedError';\nimport { arrRemove } from './util/arrRemove';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A Subject is a special type of Observable that allows values to be\n * multicasted to many Observers. Subjects are like EventEmitters.\n *\n * Every Subject is an Observable and an Observer. You can subscribe to a\n * Subject, and you can call next to feed values as well as error and complete.\n */\nexport class Subject extends Observable implements SubscriptionLike {\n closed = false;\n\n private currentObservers: Observer[] | null = null;\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n observers: Observer[] = [];\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n isStopped = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n hasError = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n thrownError: any = null;\n\n /**\n * Creates a \"subject\" by basically gluing an observer to an observable.\n *\n * @nocollapse\n * @deprecated Recommended you do not use. Will be removed at some point in the future. Plans for replacement still under discussion.\n */\n static create: (...args: any[]) => any = (destination: Observer, source: Observable): AnonymousSubject => {\n return new AnonymousSubject(destination, source);\n };\n\n constructor() {\n // NOTE: This must be here to obscure Observable's constructor.\n super();\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n lift(operator: Operator): Observable {\n const subject = new AnonymousSubject(this, this);\n subject.operator = operator as any;\n return subject as any;\n }\n\n /** @internal */\n protected _throwIfClosed() {\n if (this.closed) {\n throw new ObjectUnsubscribedError();\n }\n }\n\n next(value: T) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n if (!this.currentObservers) {\n this.currentObservers = Array.from(this.observers);\n }\n for (const observer of this.currentObservers) {\n observer.next(value);\n }\n }\n });\n }\n\n error(err: any) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.hasError = this.isStopped = true;\n this.thrownError = err;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.error(err);\n }\n }\n });\n }\n\n complete() {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.isStopped = true;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.complete();\n }\n }\n });\n }\n\n unsubscribe() {\n this.isStopped = this.closed = true;\n this.observers = this.currentObservers = null!;\n }\n\n get observed() {\n return this.observers?.length > 0;\n }\n\n /** @internal */\n protected _trySubscribe(subscriber: Subscriber): TeardownLogic {\n this._throwIfClosed();\n return super._trySubscribe(subscriber);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._checkFinalizedStatuses(subscriber);\n return this._innerSubscribe(subscriber);\n }\n\n /** @internal */\n protected _innerSubscribe(subscriber: Subscriber) {\n const { hasError, isStopped, observers } = this;\n if (hasError || isStopped) {\n return EMPTY_SUBSCRIPTION;\n }\n this.currentObservers = null;\n observers.push(subscriber);\n return new Subscription(() => {\n this.currentObservers = null;\n arrRemove(observers, subscriber);\n });\n }\n\n /** @internal */\n protected _checkFinalizedStatuses(subscriber: Subscriber) {\n const { hasError, thrownError, isStopped } = this;\n if (hasError) {\n subscriber.error(thrownError);\n } else if (isStopped) {\n subscriber.complete();\n }\n }\n\n /**\n * Creates a new Observable with this Subject as the source. You can do this\n * to create custom Observer-side logic of the Subject and conceal it from\n * code that uses the Observable.\n * @return {Observable} Observable that the Subject casts to\n */\n asObservable(): Observable {\n const observable: any = new Observable();\n observable.source = this;\n return observable;\n }\n}\n\n/**\n * @class AnonymousSubject\n */\nexport class AnonymousSubject extends Subject {\n constructor(\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n public destination?: Observer,\n source?: Observable\n ) {\n super();\n this.source = source;\n }\n\n next(value: T) {\n this.destination?.next?.(value);\n }\n\n error(err: any) {\n this.destination?.error?.(err);\n }\n\n complete() {\n this.destination?.complete?.();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n return this.source?.subscribe(subscriber) ?? EMPTY_SUBSCRIPTION;\n }\n}\n", "import { Subject } from './Subject';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\n\n/**\n * A variant of Subject that requires an initial value and emits its current\n * value whenever it is subscribed to.\n *\n * @class BehaviorSubject\n */\nexport class BehaviorSubject extends Subject {\n constructor(private _value: T) {\n super();\n }\n\n get value(): T {\n return this.getValue();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n const subscription = super._subscribe(subscriber);\n !subscription.closed && subscriber.next(this._value);\n return subscription;\n }\n\n getValue(): T {\n const { hasError, thrownError, _value } = this;\n if (hasError) {\n throw thrownError;\n }\n this._throwIfClosed();\n return _value;\n }\n\n next(value: T): void {\n super.next((this._value = value));\n }\n}\n", "import { TimestampProvider } from '../types';\n\ninterface DateTimestampProvider extends TimestampProvider {\n delegate: TimestampProvider | undefined;\n}\n\nexport const dateTimestampProvider: DateTimestampProvider = {\n now() {\n // Use the variable rather than `this` so that the function can be called\n // without being bound to the provider.\n return (dateTimestampProvider.delegate || Date).now();\n },\n delegate: undefined,\n};\n", "import { Subject } from './Subject';\nimport { TimestampProvider } from './types';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * A variant of {@link Subject} that \"replays\" old values to new subscribers by emitting them when they first subscribe.\n *\n * `ReplaySubject` has an internal buffer that will store a specified number of values that it has observed. Like `Subject`,\n * `ReplaySubject` \"observes\" values by having them passed to its `next` method. When it observes a value, it will store that\n * value for a time determined by the configuration of the `ReplaySubject`, as passed to its constructor.\n *\n * When a new subscriber subscribes to the `ReplaySubject` instance, it will synchronously emit all values in its buffer in\n * a First-In-First-Out (FIFO) manner. The `ReplaySubject` will also complete, if it has observed completion; and it will\n * error if it has observed an error.\n *\n * There are two main configuration items to be concerned with:\n *\n * 1. `bufferSize` - This will determine how many items are stored in the buffer, defaults to infinite.\n * 2. `windowTime` - The amount of time to hold a value in the buffer before removing it from the buffer.\n *\n * Both configurations may exist simultaneously. So if you would like to buffer a maximum of 3 values, as long as the values\n * are less than 2 seconds old, you could do so with a `new ReplaySubject(3, 2000)`.\n *\n * ### Differences with BehaviorSubject\n *\n * `BehaviorSubject` is similar to `new ReplaySubject(1)`, with a couple of exceptions:\n *\n * 1. `BehaviorSubject` comes \"primed\" with a single value upon construction.\n * 2. `ReplaySubject` will replay values, even after observing an error, where `BehaviorSubject` will not.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n * @see {@link shareReplay}\n */\nexport class ReplaySubject extends Subject {\n private _buffer: (T | number)[] = [];\n private _infiniteTimeWindow = true;\n\n /**\n * @param bufferSize The size of the buffer to replay on subscription\n * @param windowTime The amount of time the buffered items will stay buffered\n * @param timestampProvider An object with a `now()` method that provides the current timestamp. This is used to\n * calculate the amount of time something has been buffered.\n */\n constructor(\n private _bufferSize = Infinity,\n private _windowTime = Infinity,\n private _timestampProvider: TimestampProvider = dateTimestampProvider\n ) {\n super();\n this._infiniteTimeWindow = _windowTime === Infinity;\n this._bufferSize = Math.max(1, _bufferSize);\n this._windowTime = Math.max(1, _windowTime);\n }\n\n next(value: T): void {\n const { isStopped, _buffer, _infiniteTimeWindow, _timestampProvider, _windowTime } = this;\n if (!isStopped) {\n _buffer.push(value);\n !_infiniteTimeWindow && _buffer.push(_timestampProvider.now() + _windowTime);\n }\n this._trimBuffer();\n super.next(value);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._trimBuffer();\n\n const subscription = this._innerSubscribe(subscriber);\n\n const { _infiniteTimeWindow, _buffer } = this;\n // We use a copy here, so reentrant code does not mutate our array while we're\n // emitting it to a new subscriber.\n const copy = _buffer.slice();\n for (let i = 0; i < copy.length && !subscriber.closed; i += _infiniteTimeWindow ? 1 : 2) {\n subscriber.next(copy[i] as T);\n }\n\n this._checkFinalizedStatuses(subscriber);\n\n return subscription;\n }\n\n private _trimBuffer() {\n const { _bufferSize, _timestampProvider, _buffer, _infiniteTimeWindow } = this;\n // If we don't have an infinite buffer size, and we're over the length,\n // use splice to truncate the old buffer values off. Note that we have to\n // double the size for instances where we're not using an infinite time window\n // because we're storing the values and the timestamps in the same array.\n const adjustedBufferSize = (_infiniteTimeWindow ? 1 : 2) * _bufferSize;\n _bufferSize < Infinity && adjustedBufferSize < _buffer.length && _buffer.splice(0, _buffer.length - adjustedBufferSize);\n\n // Now, if we're not in an infinite time window, remove all values where the time is\n // older than what is allowed.\n if (!_infiniteTimeWindow) {\n const now = _timestampProvider.now();\n let last = 0;\n // Search the array for the first timestamp that isn't expired and\n // truncate the buffer up to that point.\n for (let i = 1; i < _buffer.length && (_buffer[i] as number) <= now; i += 2) {\n last = i;\n }\n last && _buffer.splice(0, last + 1);\n }\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Subscription } from '../Subscription';\nimport { SchedulerAction } from '../types';\n\n/**\n * A unit of work to be executed in a `scheduler`. An action is typically\n * created from within a {@link SchedulerLike} and an RxJS user does not need to concern\n * themselves about creating and manipulating an Action.\n *\n * ```ts\n * class Action extends Subscription {\n * new (scheduler: Scheduler, work: (state?: T) => void);\n * schedule(state?: T, delay: number = 0): Subscription;\n * }\n * ```\n *\n * @class Action\n */\nexport class Action extends Subscription {\n constructor(scheduler: Scheduler, work: (this: SchedulerAction, state?: T) => void) {\n super();\n }\n /**\n * Schedules this action on its parent {@link SchedulerLike} for execution. May be passed\n * some context object, `state`. May happen at some point in the future,\n * according to the `delay` parameter, if specified.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler.\n * @return {void}\n */\n public schedule(state?: T, delay: number = 0): Subscription {\n return this;\n }\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetIntervalFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearIntervalFunction = (handle: TimerHandle) => void;\n\ninterface IntervalProvider {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n delegate:\n | {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n }\n | undefined;\n}\n\nexport const intervalProvider: IntervalProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setInterval(handler: () => void, timeout?: number, ...args) {\n const { delegate } = intervalProvider;\n if (delegate?.setInterval) {\n return delegate.setInterval(handler, timeout, ...args);\n }\n return setInterval(handler, timeout, ...args);\n },\n clearInterval(handle) {\n const { delegate } = intervalProvider;\n return (delegate?.clearInterval || clearInterval)(handle as any);\n },\n delegate: undefined,\n};\n", "import { Action } from './Action';\nimport { SchedulerAction } from '../types';\nimport { Subscription } from '../Subscription';\nimport { AsyncScheduler } from './AsyncScheduler';\nimport { intervalProvider } from './intervalProvider';\nimport { arrRemove } from '../util/arrRemove';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncAction extends Action {\n public id: TimerHandle | undefined;\n public state?: T;\n // @ts-ignore: Property has no initializer and is not definitely assigned\n public delay: number;\n protected pending: boolean = false;\n\n constructor(protected scheduler: AsyncScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (this.closed) {\n return this;\n }\n\n // Always replace the current state with the new state.\n this.state = state;\n\n const id = this.id;\n const scheduler = this.scheduler;\n\n //\n // Important implementation note:\n //\n // Actions only execute once by default, unless rescheduled from within the\n // scheduled callback. This allows us to implement single and repeat\n // actions via the same code path, without adding API surface area, as well\n // as mimic traditional recursion but across asynchronous boundaries.\n //\n // However, JS runtimes and timers distinguish between intervals achieved by\n // serial `setTimeout` calls vs. a single `setInterval` call. An interval of\n // serial `setTimeout` calls can be individually delayed, which delays\n // scheduling the next `setTimeout`, and so on. `setInterval` attempts to\n // guarantee the interval callback will be invoked more precisely to the\n // interval period, regardless of load.\n //\n // Therefore, we use `setInterval` to schedule single and repeat actions.\n // If the action reschedules itself with the same delay, the interval is not\n // canceled. If the action doesn't reschedule, or reschedules with a\n // different delay, the interval will be canceled after scheduled callback\n // execution.\n //\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, delay);\n }\n\n // Set the pending flag indicating that this action has been scheduled, or\n // has recursively rescheduled itself.\n this.pending = true;\n\n this.delay = delay;\n // If this action has already an async Id, don't request a new one.\n this.id = this.id ?? this.requestAsyncId(scheduler, this.id, delay);\n\n return this;\n }\n\n protected requestAsyncId(scheduler: AsyncScheduler, _id?: TimerHandle, delay: number = 0): TimerHandle {\n return intervalProvider.setInterval(scheduler.flush.bind(scheduler, this), delay);\n }\n\n protected recycleAsyncId(_scheduler: AsyncScheduler, id?: TimerHandle, delay: number | null = 0): TimerHandle | undefined {\n // If this action is rescheduled with the same delay time, don't clear the interval id.\n if (delay != null && this.delay === delay && this.pending === false) {\n return id;\n }\n // Otherwise, if the action's delay time is different from the current delay,\n // or the action has been rescheduled before it's executed, clear the interval id\n if (id != null) {\n intervalProvider.clearInterval(id);\n }\n\n return undefined;\n }\n\n /**\n * Immediately executes this action and the `work` it contains.\n * @return {any}\n */\n public execute(state: T, delay: number): any {\n if (this.closed) {\n return new Error('executing a cancelled action');\n }\n\n this.pending = false;\n const error = this._execute(state, delay);\n if (error) {\n return error;\n } else if (this.pending === false && this.id != null) {\n // Dequeue if the action didn't reschedule itself. Don't call\n // unsubscribe(), because the action could reschedule later.\n // For example:\n // ```\n // scheduler.schedule(function doWork(counter) {\n // /* ... I'm a busy worker bee ... */\n // var originalAction = this;\n // /* wait 100ms before rescheduling the action */\n // setTimeout(function () {\n // originalAction.schedule(counter + 1);\n // }, 100);\n // }, 1000);\n // ```\n this.id = this.recycleAsyncId(this.scheduler, this.id, null);\n }\n }\n\n protected _execute(state: T, _delay: number): any {\n let errored: boolean = false;\n let errorValue: any;\n try {\n this.work(state);\n } catch (e) {\n errored = true;\n // HACK: Since code elsewhere is relying on the \"truthiness\" of the\n // return here, we can't have it return \"\" or 0 or false.\n // TODO: Clean this up when we refactor schedulers mid-version-8 or so.\n errorValue = e ? e : new Error('Scheduled action threw falsy error');\n }\n if (errored) {\n this.unsubscribe();\n return errorValue;\n }\n }\n\n unsubscribe() {\n if (!this.closed) {\n const { id, scheduler } = this;\n const { actions } = scheduler;\n\n this.work = this.state = this.scheduler = null!;\n this.pending = false;\n\n arrRemove(actions, this);\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, null);\n }\n\n this.delay = null!;\n super.unsubscribe();\n }\n }\n}\n", "import { Action } from './scheduler/Action';\nimport { Subscription } from './Subscription';\nimport { SchedulerLike, SchedulerAction } from './types';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * An execution context and a data structure to order tasks and schedule their\n * execution. Provides a notion of (potentially virtual) time, through the\n * `now()` getter method.\n *\n * Each unit of work in a Scheduler is called an `Action`.\n *\n * ```ts\n * class Scheduler {\n * now(): number;\n * schedule(work, delay?, state?): Subscription;\n * }\n * ```\n *\n * @class Scheduler\n * @deprecated Scheduler is an internal implementation detail of RxJS, and\n * should not be used directly. Rather, create your own class and implement\n * {@link SchedulerLike}. Will be made internal in v8.\n */\nexport class Scheduler implements SchedulerLike {\n public static now: () => number = dateTimestampProvider.now;\n\n constructor(private schedulerActionCtor: typeof Action, now: () => number = Scheduler.now) {\n this.now = now;\n }\n\n /**\n * A getter method that returns a number representing the current time\n * (at the time this function was called) according to the scheduler's own\n * internal clock.\n * @return {number} A number that represents the current time. May or may not\n * have a relation to wall-clock time. May or may not refer to a time unit\n * (e.g. milliseconds).\n */\n public now: () => number;\n\n /**\n * Schedules a function, `work`, for execution. May happen at some point in\n * the future, according to the `delay` parameter, if specified. May be passed\n * some context object, `state`, which will be passed to the `work` function.\n *\n * The given arguments will be processed an stored as an Action object in a\n * queue of actions.\n *\n * @param {function(state: ?T): ?Subscription} work A function representing a\n * task, or some unit of work to be executed by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler itself.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @return {Subscription} A subscription in order to be able to unsubscribe\n * the scheduled work.\n */\n public schedule(work: (this: SchedulerAction, state?: T) => void, delay: number = 0, state?: T): Subscription {\n return new this.schedulerActionCtor(this, work).schedule(state, delay);\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Action } from './Action';\nimport { AsyncAction } from './AsyncAction';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncScheduler extends Scheduler {\n public actions: Array> = [];\n /**\n * A flag to indicate whether the Scheduler is currently executing a batch of\n * queued actions.\n * @type {boolean}\n * @internal\n */\n public _active: boolean = false;\n /**\n * An internal ID used to track the latest asynchronous task such as those\n * coming from `setTimeout`, `setInterval`, `requestAnimationFrame`, and\n * others.\n * @type {any}\n * @internal\n */\n public _scheduled: TimerHandle | undefined;\n\n constructor(SchedulerAction: typeof Action, now: () => number = Scheduler.now) {\n super(SchedulerAction, now);\n }\n\n public flush(action: AsyncAction): void {\n const { actions } = this;\n\n if (this._active) {\n actions.push(action);\n return;\n }\n\n let error: any;\n this._active = true;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions.shift()!)); // exhaust the scheduler queue\n\n this._active = false;\n\n if (error) {\n while ((action = actions.shift()!)) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\n/**\n *\n * Async Scheduler\n *\n * Schedule task as if you used setTimeout(task, duration)\n *\n * `async` scheduler schedules tasks asynchronously, by putting them on the JavaScript\n * event loop queue. It is best used to delay tasks in time or to schedule tasks repeating\n * in intervals.\n *\n * If you just want to \"defer\" task, that is to perform it right after currently\n * executing synchronous code ends (commonly achieved by `setTimeout(deferredTask, 0)`),\n * better choice will be the {@link asapScheduler} scheduler.\n *\n * ## Examples\n * Use async scheduler to delay task\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * const task = () => console.log('it works!');\n *\n * asyncScheduler.schedule(task, 2000);\n *\n * // After 2 seconds logs:\n * // \"it works!\"\n * ```\n *\n * Use async scheduler to repeat task in intervals\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * function task(state) {\n * console.log(state);\n * this.schedule(state + 1, 1000); // `this` references currently executing Action,\n * // which we reschedule with new state and delay\n * }\n *\n * asyncScheduler.schedule(task, 3000, 0);\n *\n * // Logs:\n * // 0 after 3s\n * // 1 after 4s\n * // 2 after 5s\n * // 3 after 6s\n * ```\n */\n\nexport const asyncScheduler = new AsyncScheduler(AsyncAction);\n\n/**\n * @deprecated Renamed to {@link asyncScheduler}. Will be removed in v8.\n */\nexport const async = asyncScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { Subscription } from '../Subscription';\nimport { QueueScheduler } from './QueueScheduler';\nimport { SchedulerAction } from '../types';\nimport { TimerHandle } from './timerHandle';\n\nexport class QueueAction extends AsyncAction {\n constructor(protected scheduler: QueueScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (delay > 0) {\n return super.schedule(state, delay);\n }\n this.delay = delay;\n this.state = state;\n this.scheduler.flush(this);\n return this;\n }\n\n public execute(state: T, delay: number): any {\n return delay > 0 || this.closed ? super.execute(state, delay) : this._execute(state, delay);\n }\n\n protected requestAsyncId(scheduler: QueueScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n\n if ((delay != null && delay > 0) || (delay == null && this.delay > 0)) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n\n // Otherwise flush the scheduler starting with this action.\n scheduler.flush(this);\n\n // HACK: In the past, this was returning `void`. However, `void` isn't a valid\n // `TimerHandle`, and generally the return value here isn't really used. So the\n // compromise is to return `0` which is both \"falsy\" and a valid `TimerHandle`,\n // as opposed to refactoring every other instanceo of `requestAsyncId`.\n return 0;\n }\n}\n", "import { AsyncScheduler } from './AsyncScheduler';\n\nexport class QueueScheduler extends AsyncScheduler {\n}\n", "import { QueueAction } from './QueueAction';\nimport { QueueScheduler } from './QueueScheduler';\n\n/**\n *\n * Queue Scheduler\n *\n * Put every next task on a queue, instead of executing it immediately\n *\n * `queue` scheduler, when used with delay, behaves the same as {@link asyncScheduler} scheduler.\n *\n * When used without delay, it schedules given task synchronously - executes it right when\n * it is scheduled. However when called recursively, that is when inside the scheduled task,\n * another task is scheduled with queue scheduler, instead of executing immediately as well,\n * that task will be put on a queue and wait for current one to finish.\n *\n * This means that when you execute task with `queue` scheduler, you are sure it will end\n * before any other task scheduled with that scheduler will start.\n *\n * ## Examples\n * Schedule recursively first, then do something\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(() => {\n * queueScheduler.schedule(() => console.log('second')); // will not happen now, but will be put on a queue\n *\n * console.log('first');\n * });\n *\n * // Logs:\n * // \"first\"\n * // \"second\"\n * ```\n *\n * Reschedule itself recursively\n * ```ts\n * import { queueScheduler } from 'rxjs';\n *\n * queueScheduler.schedule(function(state) {\n * if (state !== 0) {\n * console.log('before', state);\n * this.schedule(state - 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * console.log('after', state);\n * }\n * }, 0, 3);\n *\n * // In scheduler that runs recursively, you would expect:\n * // \"before\", 3\n * // \"before\", 2\n * // \"before\", 1\n * // \"after\", 1\n * // \"after\", 2\n * // \"after\", 3\n *\n * // But with queue it logs:\n * // \"before\", 3\n * // \"after\", 3\n * // \"before\", 2\n * // \"after\", 2\n * // \"before\", 1\n * // \"after\", 1\n * ```\n */\n\nexport const queueScheduler = new QueueScheduler(QueueAction);\n\n/**\n * @deprecated Renamed to {@link queueScheduler}. Will be removed in v8.\n */\nexport const queue = queueScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\nimport { SchedulerAction } from '../types';\nimport { animationFrameProvider } from './animationFrameProvider';\nimport { TimerHandle } from './timerHandle';\n\nexport class AnimationFrameAction extends AsyncAction {\n constructor(protected scheduler: AnimationFrameScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n protected requestAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay is greater than 0, request as an async action.\n if (delay !== null && delay > 0) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n // Push the action to the end of the scheduler queue.\n scheduler.actions.push(this);\n // If an animation frame has already been requested, don't request another\n // one. If an animation frame hasn't been requested yet, request one. Return\n // the current animation frame request id.\n return scheduler._scheduled || (scheduler._scheduled = animationFrameProvider.requestAnimationFrame(() => scheduler.flush(undefined)));\n }\n\n protected recycleAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle | undefined {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n if (delay != null ? delay > 0 : this.delay > 0) {\n return super.recycleAsyncId(scheduler, id, delay);\n }\n // If the scheduler queue has no remaining actions with the same async id,\n // cancel the requested animation frame and set the scheduled flag to\n // undefined so the next AnimationFrameAction will request its own.\n const { actions } = scheduler;\n if (id != null && actions[actions.length - 1]?.id !== id) {\n animationFrameProvider.cancelAnimationFrame(id as number);\n scheduler._scheduled = undefined;\n }\n // Return undefined so the action knows to request a new async id if it's rescheduled.\n return undefined;\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\nexport class AnimationFrameScheduler extends AsyncScheduler {\n public flush(action?: AsyncAction): void {\n this._active = true;\n // The async id that effects a call to flush is stored in _scheduled.\n // Before executing an action, it's necessary to check the action's async\n // id to determine whether it's supposed to be executed in the current\n // flush.\n // Previous implementations of this method used a count to determine this,\n // but that was unsound, as actions that are unsubscribed - i.e. cancelled -\n // are removed from the actions array and that can shift actions that are\n // scheduled to be executed in a subsequent flush into positions at which\n // they are executed within the current flush.\n const flushId = this._scheduled;\n this._scheduled = undefined;\n\n const { actions } = this;\n let error: any;\n action = action || actions.shift()!;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions[0]) && action.id === flushId && actions.shift());\n\n this._active = false;\n\n if (error) {\n while ((action = actions[0]) && action.id === flushId && actions.shift()) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AnimationFrameAction } from './AnimationFrameAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\n\n/**\n *\n * Animation Frame Scheduler\n *\n * Perform task when `window.requestAnimationFrame` would fire\n *\n * When `animationFrame` scheduler is used with delay, it will fall back to {@link asyncScheduler} scheduler\n * behaviour.\n *\n * Without delay, `animationFrame` scheduler can be used to create smooth browser animations.\n * It makes sure scheduled task will happen just before next browser content repaint,\n * thus performing animations as efficiently as possible.\n *\n * ## Example\n * Schedule div height animation\n * ```ts\n * // html:
\n * import { animationFrameScheduler } from 'rxjs';\n *\n * const div = document.querySelector('div');\n *\n * animationFrameScheduler.schedule(function(height) {\n * div.style.height = height + \"px\";\n *\n * this.schedule(height + 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * }, 0, 0);\n *\n * // You will see a div element growing in height\n * ```\n */\n\nexport const animationFrameScheduler = new AnimationFrameScheduler(AnimationFrameAction);\n\n/**\n * @deprecated Renamed to {@link animationFrameScheduler}. Will be removed in v8.\n */\nexport const animationFrame = animationFrameScheduler;\n", "import { Observable } from '../Observable';\nimport { SchedulerLike } from '../types';\n\n/**\n * A simple Observable that emits no items to the Observer and immediately\n * emits a complete notification.\n *\n * Just emits 'complete', and nothing else.\n *\n * ![](empty.png)\n *\n * A simple Observable that only emits the complete notification. It can be used\n * for composing with other Observables, such as in a {@link mergeMap}.\n *\n * ## Examples\n *\n * Log complete notification\n *\n * ```ts\n * import { EMPTY } from 'rxjs';\n *\n * EMPTY.subscribe({\n * next: () => console.log('Next'),\n * complete: () => console.log('Complete!')\n * });\n *\n * // Outputs\n * // Complete!\n * ```\n *\n * Emit the number 7, then complete\n *\n * ```ts\n * import { EMPTY, startWith } from 'rxjs';\n *\n * const result = EMPTY.pipe(startWith(7));\n * result.subscribe(x => console.log(x));\n *\n * // Outputs\n * // 7\n * ```\n *\n * Map and flatten only odd numbers to the sequence `'a'`, `'b'`, `'c'`\n *\n * ```ts\n * import { interval, mergeMap, of, EMPTY } from 'rxjs';\n *\n * const interval$ = interval(1000);\n * const result = interval$.pipe(\n * mergeMap(x => x % 2 === 1 ? of('a', 'b', 'c') : EMPTY),\n * );\n * result.subscribe(x => console.log(x));\n *\n * // Results in the following to the console:\n * // x is equal to the count on the interval, e.g. (0, 1, 2, 3, ...)\n * // x will occur every 1000ms\n * // if x % 2 is equal to 1, print a, b, c (each on its own)\n * // if x % 2 is not equal to 1, nothing will be output\n * ```\n *\n * @see {@link Observable}\n * @see {@link NEVER}\n * @see {@link of}\n * @see {@link throwError}\n */\nexport const EMPTY = new Observable((subscriber) => subscriber.complete());\n\n/**\n * @param scheduler A {@link SchedulerLike} to use for scheduling\n * the emission of the complete notification.\n * @deprecated Replaced with the {@link EMPTY} constant or {@link scheduled} (e.g. `scheduled([], scheduler)`). Will be removed in v8.\n */\nexport function empty(scheduler?: SchedulerLike) {\n return scheduler ? emptyScheduled(scheduler) : EMPTY;\n}\n\nfunction emptyScheduled(scheduler: SchedulerLike) {\n return new Observable((subscriber) => scheduler.schedule(() => subscriber.complete()));\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport function isScheduler(value: any): value is SchedulerLike {\n return value && isFunction(value.schedule);\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\nimport { isScheduler } from './isScheduler';\n\nfunction last(arr: T[]): T | undefined {\n return arr[arr.length - 1];\n}\n\nexport function popResultSelector(args: any[]): ((...args: unknown[]) => unknown) | undefined {\n return isFunction(last(args)) ? args.pop() : undefined;\n}\n\nexport function popScheduler(args: any[]): SchedulerLike | undefined {\n return isScheduler(last(args)) ? args.pop() : undefined;\n}\n\nexport function popNumber(args: any[], defaultValue: number): number {\n return typeof last(args) === 'number' ? args.pop()! : defaultValue;\n}\n", "export const isArrayLike = ((x: any): x is ArrayLike => x && typeof x.length === 'number' && typeof x !== 'function');", "import { isFunction } from \"./isFunction\";\n\n/**\n * Tests to see if the object is \"thennable\".\n * @param value the object to test\n */\nexport function isPromise(value: any): value is PromiseLike {\n return isFunction(value?.then);\n}\n", "import { InteropObservable } from '../types';\nimport { observable as Symbol_observable } from '../symbol/observable';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being Observable (but not necessary an Rx Observable) */\nexport function isInteropObservable(input: any): input is InteropObservable {\n return isFunction(input[Symbol_observable]);\n}\n", "import { isFunction } from './isFunction';\n\nexport function isAsyncIterable(obj: any): obj is AsyncIterable {\n return Symbol.asyncIterator && isFunction(obj?.[Symbol.asyncIterator]);\n}\n", "/**\n * Creates the TypeError to throw if an invalid object is passed to `from` or `scheduled`.\n * @param input The object that was passed.\n */\nexport function createInvalidObservableTypeError(input: any) {\n // TODO: We should create error codes that can be looked up, so this can be less verbose.\n return new TypeError(\n `You provided ${\n input !== null && typeof input === 'object' ? 'an invalid object' : `'${input}'`\n } where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.`\n );\n}\n", "export function getSymbolIterator(): symbol {\n if (typeof Symbol !== 'function' || !Symbol.iterator) {\n return '@@iterator' as any;\n }\n\n return Symbol.iterator;\n}\n\nexport const iterator = getSymbolIterator();\n", "import { iterator as Symbol_iterator } from '../symbol/iterator';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being an Iterable */\nexport function isIterable(input: any): input is Iterable {\n return isFunction(input?.[Symbol_iterator]);\n}\n", "import { ReadableStreamLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport async function* readableStreamLikeToAsyncGenerator(readableStream: ReadableStreamLike): AsyncGenerator {\n const reader = readableStream.getReader();\n try {\n while (true) {\n const { value, done } = await reader.read();\n if (done) {\n return;\n }\n yield value!;\n }\n } finally {\n reader.releaseLock();\n }\n}\n\nexport function isReadableStreamLike(obj: any): obj is ReadableStreamLike {\n // We don't want to use instanceof checks because they would return\n // false for instances from another Realm, like an +
+An introduction to RLLTE. + + +

Why RLLTE?

+
+

A PyTorch for RL

+

RLLTE decouples RL algorithms into minimum primitives and provide standard modules for development.

+

See Fast Algorithm Development for detailed examples.

+
+ +
+ +

Project Evolution

+

RLLTE selects RL algorithms based on the following tenet:

+
    +
  • Generality is the most important;
  • +
  • Improvements in sample efficiency or generalization ability;
  • +
  • Excellent performance on recognized benchmarks;
  • +
  • Promising tools for RL.
  • +
+

Cite Us

+

If you use RLLTE in your research, please cite this project like this: +

@article{yuan2023rllte,
+  title={RLLTE: Long-Term Evolution Project of Reinforcement Learning}, 
+  author={Mingqi Yuan and Zequn Zhang and Yang Xu and Shihao Luo and Bo Li and Xin Jin and Wenjun Zeng},
+  year={2023},
+  journal={arXiv preprint arXiv:2309.16382}
+}
+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/license/index.html b/license/index.html new file mode 100644 index 00000000..f462b242 --- /dev/null +++ b/license/index.html @@ -0,0 +1,4034 @@ + + + + + + + + + + + + + + + + + + + + + + + + + License - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

License

+ +

MIT License

+

Copyright (c) 2023 Reinforcement Learning Evolution Foundation

+

Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions:

+

The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software.

+

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mkgendocs.yml b/mkgendocs.yml new file mode 100644 index 00000000..03d51e16 --- /dev/null +++ b/mkgendocs.yml @@ -0,0 +1,387 @@ +sources_dir: docs/api_docs +templates_dir: +repo: https://github.com/RLE-Foundation/rllte +version: main +pages: + - page: common/preprocessing.md + source: rllte/common/preprocessing.py + functions: + - process_observation_space + - process_action_space + - get_flattened_obs_dim + - is_image_space_channels_first + - is_image_space + - preprocess_obs + - page: common/initialization.md + source: rllte/common/initialization.py + functions: + - get_init_fn + - _xavier_normal + - _xavier_uniform + - _orthogonal + - _identity + - page: common/prototype/base_agent.md + source: rllte/common/prototype/base_agent.py + classes: + - BaseAgent + - page: common/prototype/base_augmentation.md + source: rllte/common/prototype/base_augmentation.py + classes: + - BaseAugmentation + - page: common/prototype/base_distribution.md + source: rllte/common/prototype/base_distribution.py + classes: + - BaseDistribution + - page: common/prototype/base_encoder.md + source: rllte/common/prototype/base_encoder.py + classes: + - BaseEncoder + - page: common/prototype/base_reward.md + source: rllte/common/prototype/base_reward.py + classes: + - BaseIntrinsicRewardModule + - page: common/prototype/base_policy.md + source: rllte/common/prototype/base_policy.py + classes: + - BasePolicy + - page: common/prototype/base_storage.md + source: rllte/common/prototype/base_storage.py + classes: + - BaseStorage + - page: common/prototype/distributed_agent.md + source: rllte/common/prototype/distributed_agent.py + classes: + - DistributedAgent + - page: common/logger.md + source: rllte/common/logger.py + classes: + - Logger + - page: common/prototype/off_policy_agent.md + source: rllte/common/prototype/off_policy_agent.py + classes: + - OffPolicyAgent + - page: common/prototype/on_policy_agent.md + source: rllte/common/prototype/on_policy_agent.py + classes: + - OnPolicyAgent + - page: common/timer.md + source: rllte/common/timer.py + classes: + - Timer + - page: xploit/encoder/espeholt_residual_encoder.md + source: rllte/xploit/encoder/espeholt_residual_encoder.py + classes: + - EspeholtResidualEncoder + - page: xploit/encoder/identity_encoder.md + source: rllte/xploit/encoder/identity_encoder.py + classes: + - IdentityEncoder + - page: xploit/encoder/mnih_cnn_encoder.md + source: rllte/xploit/encoder/mnih_cnn_encoder.py + classes: + - MnihCnnEncoder + - page: xploit/encoder/pathak_cnn_encoder.md + source: rllte/xploit/encoder/pathak_cnn_encoder.py + classes: + - PathakCnnEncoder + - page: xploit/encoder/raffin_combined_encoder.md + source: rllte/xploit/encoder/raffin_combined_encoder.py + classes: + - RaffinCombinedEncoder + - page: xploit/encoder/tassa_cnn_encoder.md + source: rllte/xploit/encoder/tassa_cnn_encoder.py + classes: + - TassaCnnEncoder + - page: xploit/encoder/vanilla_mlp_encoder.md + source: rllte/xploit/encoder/vanilla_mlp_encoder.py + classes: + - VanillaMlpEncoder + - page: agent/legacy/a2c.md + source: rllte/agent/legacy/a2c.py + classes: + - A2C + - page: agent/daac.md + source: rllte/agent/daac.py + classes: + - DAAC + - page: agent/legacy/ddpg.md + source: rllte/agent/legacy/ddpg.py + classes: + - DDPG + - page: agent/legacy/dqn.md + source: rllte/agent/legacy/dqn.py + classes: + - DQN + - page: agent/drac.md + source: rllte/agent/drac.py + classes: + - DrAC + - page: agent/drdaac.md + source: rllte/agent/drdaac.py + classes: + - DrDAAC + - page: agent/ppg.md + source: rllte/agent/ppg.py + classes: + - PPG + - page: agent/drqv2.md + source: rllte/agent/drqv2.py + classes: + - DrQv2 + - page: agent/impala.md + source: rllte/agent/impala.py + classes: + - IMPALA + - page: agent/legacy/ppo.md + source: rllte/agent/legacy/ppo.py + classes: + - PPO + - page: agent/legacy/sac.md + source: rllte/agent/legacy/sac.py + classes: + - SAC + - page: agent/legacy/sacd.md + source: rllte/agent/legacy/sacd.py + classes: + - SACDiscrete + - page: xploit/storage/dict_replay_storage.md + source: rllte/xploit/storage/dict_replay_storage.py + classes: + - DictReplayStorage + - page: xploit/storage/dict_rollout_storage.md + source: rllte/xploit/storage/dict_rollout_storage.py + classes: + - DictRolloutStorage + - page: xploit/storage/her_replay_storage.md + source: rllte/xploit/storage/her_replay_storage.py + classes: + - HerReplayStorage + - page: xploit/storage/nstep_replay_storage.md + source: rllte/xploit/storage/nstep_replay_storage.py + classes: + - NStepReplayStorage + - page: xploit/storage/prioritized_replay_storage.md + source: rllte/xploit/storage/prioritized_replay_storage.py + classes: + - PrioritizedReplayStorage + - page: xploit/storage/vanilla_distributed_storage.md + source: rllte/xploit/storage/vanilla_distributed_storage.py + classes: + - VanillaDistributedStorage + - page: xploit/storage/vanilla_replay_storage.md + source: rllte/xploit/storage/vanilla_replay_storage.py + classes: + - VanillaReplayStorage + - page: xploit/storage/vanilla_rollout_storage.md + source: rllte/xploit/storage/vanilla_rollout_storage.py + classes: + - VanillaRolloutStorage + - page: xploit/policy/distributed_actor_learner.md + source: rllte/xploit/policy/distributed_actor_learner.py + classes: + - DistributedActorLearner + - page: xploit/policy/off_policy_det_actor_double_critic.md + source: rllte/xploit/policy/off_policy_det_actor_double_critic.py + classes: + - OffPolicyDetActorDoubleCritic + - page: xploit/policy/off_policy_double_actor_double_critic.md + source: rllte/xploit/policy/off_policy_double_actor_double_critic.py + classes: + - OffPolicyDoubleActorDoubleCritic + - page: xploit/policy/off_policy_double_qnetwork.md + source: rllte/xploit/policy/off_policy_double_qnetwork.py + classes: + - OffPolicyDoubleQNetwork + - page: xploit/policy/off_policy_stoch_actor_double_critic.md + source: rllte/xploit/policy/off_policy_stoch_actor_double_critic.py + classes: + - OffPolicyStochActorDoubleCritic + - page: xploit/policy/on_policy_decoupled_actor_critic.md + source: rllte/xploit/policy/on_policy_decoupled_actor_critic.py + classes: + - OnPolicyDecoupledActorCritic + - page: xploit/policy/on_policy_shared_actor_critic.md + source: rllte/xploit/policy/on_policy_shared_actor_critic.py + classes: + - OnPolicySharedActorCritic + - page: hub/atari.md + source: rllte/hub/atari.py + classes: + - Atari + - page: hub/dmc.md + source: rllte/hub/dmc.py + classes: + - DMControl + - page: hub/procgen.md + source: rllte/hub/procgen.py + classes: + - Procgen + - page: hub/minigrid.md + source: rllte/hub/minigrid.py + classes: + - MiniGrid + - page: xplore/reward/girm.md + source: rllte/xplore/reward/girm.py + classes: + - GIRM + - page: xplore/reward/icm.md + source: rllte/xplore/reward/icm.py + classes: + - ICM + - page: xplore/reward/ngu.md + source: rllte/xplore/reward/ngu.py + classes: + - NGU + - page: xplore/reward/pseudo_counts.md + source: rllte/xplore/reward/pseudo_counts.py + classes: + - PseudoCounts + - page: xplore/reward/re3.md + source: rllte/xplore/reward/re3.py + classes: + - RE3 + - page: xplore/reward/revd.md + source: rllte/xplore/reward/revd.py + classes: + - REVD + - page: xplore/reward/ride.md + source: rllte/xplore/reward/ride.py + classes: + - RIDE + - page: xplore/reward/rise.md + source: rllte/xplore/reward/rise.py + classes: + - RISE + - page: xplore/reward/rnd.md + source: rllte/xplore/reward/rnd.py + classes: + - RND + - page: xplore/augmentation/gaussian_noise.md + source: rllte/xplore/augmentation/gaussian_noise.py + classes: + - GaussianNoise + - page: xplore/augmentation/grayscale.md + source: rllte/xplore/augmentation/grayscale.py + classes: + - GrayScale + - page: xplore/augmentation/identity.md + source: rllte/xplore/augmentation/identity.py + classes: + - Identity + - page: xplore/augmentation/random_amplitude_scaling.md + source: rllte/xplore/augmentation/random_amplitude_scaling.py + classes: + - RandomAmplitudeScaling + - page: xplore/augmentation/random_colorjitter.md + source: rllte/xplore/augmentation/random_colorjitter.py + classes: + - RandomColorJitter + - page: xplore/augmentation/random_convolution.md + source: rllte/xplore/augmentation/random_convolution.py + classes: + - RandomConvolution + - page: xplore/augmentation/random_crop.md + source: rllte/xplore/augmentation/random_crop.py + classes: + - RandomCrop + - page: xplore/augmentation/random_cutout.md + source: rllte/xplore/augmentation/random_cutout.py + classes: + - RandomCutout + - page: xplore/augmentation/random_cutoutcolor.md + source: rllte/xplore/augmentation/random_cutoutcolor.py + classes: + - RandomCutoutColor + - page: xplore/augmentation/random_flip.md + source: rllte/xplore/augmentation/random_flip.py + classes: + - RandomFlip + - page: xplore/augmentation/random_rotate.md + source: rllte/xplore/augmentation/random_rotate.py + classes: + - RandomRotate + - page: xplore/augmentation/random_shift.md + source: rllte/xplore/augmentation/random_shift.py + classes: + - RandomShift + - page: xplore/augmentation/random_translate.md + source: rllte/xplore/augmentation/random_translate.py + classes: + - RandomTranslate + - page: xplore/distribution/bernoulli.md + source: rllte/xplore/distribution/bernoulli.py + classes: + - Bernoulli + - page: xplore/distribution/categorical.md + source: rllte/xplore/distribution/categorical.py + classes: + - Categorical + - page: xplore/distribution/diagonal_gaussian.md + source: rllte/xplore/distribution/diagonal_gaussian.py + classes: + - DiagonalGaussian + - page: xplore/distribution/multi_categorical.md + source: rllte/xplore/distribution/multi_categorical.py + classes: + - MultiCategorical + - page: xplore/distribution/normal_noise.md + source: rllte/xplore/distribution/normal_noise.py + classes: + - NormalNoise + - page: xplore/distribution/ornstein_uhlenbeck_noise.md + source: rllte/xplore/distribution/ornstein_uhlenbeck_noise.py + classes: + - OrnsteinUhlenbeckNoise + - page: xplore/distribution/squashed_normal.md + source: rllte/xplore/distribution/squashed_normal.py + classes: + - SquashedNormal + - page: xplore/distribution/truncated_normal_noise.md + source: rllte/xplore/distribution/truncated_normal_noise.py + classes: + - TruncatedNormalNoise + - page: env/bullet/__init__.md + source: rllte/env/bullet/__init__.py + functions: + - make_bullet_env + - page: env/dmc/__init__.md + source: rllte/env/dmc/__init__.py + functions: + - make_dmc_env + - page: env/atari/__init__.md + source: rllte/env/atari/__init__.py + functions: + - make_atari_env + - make_envpool_atari_env + - page: env/minigrid/__init__.md + source: rllte/env/minigrid/__init__.py + functions: + - make_minigrid_env + - page: env/procgen/__init__.md + source: rllte/env/procgen/__init__.py + functions: + - make_procgen_env + - make_envpool_procgen_env + - page: env/utils.md + source: rllte/env/utils.py + functions: + - make_rllte_env + - page: evaluation/comparison.md + source: rllte/evaluation/comparison.py + classes: + - Comparison + - page: evaluation/performance.md + source: rllte/evaluation/performance.py + classes: + - Performance + - page: evaluation/utils.md + source: rllte/evaluation/utils.py + functions: + - min_max_normalize + - page: evaluation/visualization.md + source: rllte/evaluation/visualization.py + functions: + - plot_interval_estimates + - plot_performance_profile + - plot_probability_improvement + - plot_sample_efficiency_curve diff --git a/search/search_index.json b/search/search_index.json new file mode 100644 index 00000000..71490242 --- /dev/null +++ b/search/search_index.json @@ -0,0 +1 @@ +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"RLLTE: Long-Term Evolution Project of Reinforcement Learning","text":"

Inspired by the long-term evolution (LTE) standard project in telecommunications, aiming to provide development components and standards for advancing RL research and applications. Beyond delivering top-notch algorithm implementations, RLLTE also serves as a toolkit for developing algorithms.

An introduction to RLLTE."},{"location":"#why-rllte","title":"Why RLLTE?","text":"
  • \ud83e\uddec Long-term evolution for providing latest algorithms and tricks;
  • \ud83c\udfde\ufe0f Complete ecosystem for task design, model training, evaluation, and deployment (TensorRT, CANN, ...);
  • \ud83e\uddf1 Module-oriented design for complete decoupling of RL algorithms;
  • \ud83d\ude80 Optimized workflow for full hardware acceleration;
  • \u2699\ufe0f Support custom environments and modules;
  • \ud83d\udda5\ufe0f Support multiple computing devices like GPU and NPU;
  • \ud83d\udcbe Large number of reusable benchmarks (RLLTE Hub);
  • \ud83d\udc68\u200d\u2708\ufe0f Large language model-empowered copilot (RLLTE Copilot).
"},{"location":"#a-pytorch-for-rl","title":"A PyTorch for RL","text":"

RLLTE decouples RL algorithms into minimum primitives and provide standard modules for development.

See Fast Algorithm Development for detailed examples.

"},{"location":"#project-evolution","title":"Project Evolution","text":"

RLLTE selects RL algorithms based on the following tenet:

  • Generality is the most important;
  • Improvements in sample efficiency or generalization ability;
  • Excellent performance on recognized benchmarks;
  • Promising tools for RL.
"},{"location":"#cite-us","title":"Cite Us","text":"

If you use RLLTE in your research, please cite this project like this:

@article{yuan2023rllte,\n  title={RLLTE: Long-Term Evolution Project of Reinforcement Learning}, \n  author={Mingqi Yuan and Zequn Zhang and Yang Xu and Shihao Luo and Bo Li and Xin Jin and Wenjun Zeng},\n  year={2023},\n  journal={arXiv preprint arXiv:2309.16382}\n}\n

"},{"location":"README-zh-Hans/","title":"README zh Hans","text":"RLLTE: \u5f3a\u5316\u5b66\u4e60\u957f\u671f\u6f14\u8fdb\u8ba1\u5212 \u8bba\u6587 | \u6587\u6863 | \u793a\u4f8b | \u8bba\u575b | \u57fa\u7ebf | [English](README.md) | [\u4e2d\u6587](docs/README-zh-Hans.md) |"},{"location":"README-zh-Hans/#contents","title":"Contents","text":"
  • \u6982\u8ff0
  • \u5feb\u901f\u5165\u95e8
  • \u5b89\u88c5
  • \u5feb\u901f\u8bad\u7ec3
    • \u8fd0\u7528NVIDIA GPU
    • \u8fd0\u7528HUAWEI NPU
  • \u4e09\u6b65\u521b\u5efa\u60a8\u7684\u5f3a\u5316\u5b66\u4e60\u667a\u80fd\u4f53
  • \u7b97\u6cd5\u89e3\u8026\u4e0e\u6a21\u5757\u66ff\u4ee3
  • \u529f\u80fd\u5217\u8868 (\u90e8\u5206)
  • \u5f3a\u5316\u5b66\u4e60\u667a\u80fd\u4f53
  • \u5185\u5728\u5956\u52b1\u6a21\u5757
  • RLLTE\u751f\u6001\u73af\u5883
  • API \u6587\u6863
  • \u5f15\u7528\u9879\u76ee
  • \u5982\u4f55\u8d21\u732e
  • \u81f4\u8c22
"},{"location":"README-zh-Hans/#_1","title":"\u6982\u8ff0","text":"

\u53d7\u901a\u4fe1\u9886\u57df\u957f\u671f\u6f14\u8fdb\uff08LTE\uff09\u6807\u51c6\u9879\u76ee\u7684\u542f\u53d1\uff0cRLLTE\u65e8\u5728\u63d0\u4f9b\u7528\u4e8e\u63a8\u8fdbRL\u7814\u7a76\u548c\u5e94\u7528\u7684\u5f00\u53d1\u7ec4\u4ef6\u548c\u5de5\u7a0b\u6807\u51c6\u3002\u9664\u4e86\u63d0\u4f9b\u4e00\u6d41\u7684\u7b97\u6cd5\u5b9e\u73b0\u5916\uff0cRLLTE\u8fd8\u80fd\u591f\u5145\u5f53\u5f00\u53d1\u7b97\u6cd5\u7684\u5de5\u5177\u5305\u3002

RLLTE\u7b80\u4ecb.

RLLTE\u9879\u76ee\u7279\u8272\uff1a - \ud83e\uddec \u957f\u671f\u6f14\u8fdb\u4ee5\u63d0\u4f9b\u6700\u65b0\u7684\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u4e0e\u6280\u5de7\uff1b - \ud83c\udfde\ufe0f \u4e30\u5bcc\u5b8c\u5907\u7684\u9879\u76ee\u751f\u6001\uff0c\u652f\u6301\u4efb\u52a1\u8bbe\u8ba1\u3001\u6a21\u578b\u8bad\u7ec3\u3001\u6a21\u578b\u8bc4\u4f30\u4ee5\u53ca\u6a21\u578b\u90e8\u7f72 (TensorRT, CANN, ...)\uff1b - \ud83e\uddf1 \u9ad8\u5ea6\u6a21\u5757\u5316\u7684\u8bbe\u8ba1\u4ee5\u5b9e\u73b0RL\u7b97\u6cd5\u7684\u5b8c\u5168\u89e3\u8026\uff1b - \ud83d\ude80 \u4f18\u5316\u7684\u5de5\u4f5c\u6d41\u7528\u4e8e\u786c\u4ef6\u52a0\u901f\uff1b - \u2699\ufe0f \u652f\u6301\u81ea\u5b9a\u4e49\u73af\u5883\u548c\u6a21\u5757\uff1b - \ud83d\udda5\ufe0f \u652f\u6301\u5305\u62ecGPU\u548cNPU\u7684\u591a\u79cd\u7b97\u529b\u8bbe\u5907\uff1b - \ud83d\udcbe \u5927\u91cf\u53ef\u91cd\u7528\u7684\u57fa\u7ebf\u6570\u636e (rllte-hub)\uff1b - \ud83d\udc68\u200d\u2708\ufe0f \u57fa\u4e8e\u5927\u8bed\u8a00\u6a21\u578b\u6253\u9020\u7684Copilot\u3002

\u9879\u76ee\u7ed3\u6784\u5982\u4e0b:

\u6709\u5173\u8fd9\u4e9b\u6a21\u5757\u7684\u8be6\u7ec6\u63cf\u8ff0\uff0c\u8bf7\u53c2\u9605API\u6587\u6863\u3002

"},{"location":"README-zh-Hans/#_2","title":"\u5feb\u901f\u5165\u95e8","text":""},{"location":"README-zh-Hans/#_3","title":"\u5b89\u88c5","text":"
  • \u524d\u7f6e\u6761\u4ef6

\u5f53\u524d\uff0c\u6211\u4eec\u5efa\u8bae\u4f7f\u7528Python>=3.8\uff0c\u7528\u6237\u53ef\u4ee5\u901a\u8fc7\u4ee5\u4e0b\u65b9\u5f0f\u521b\u5efa\u865a\u62df\u73af\u5883\uff1a

conda create -n rllte python=3.8\n

  • \u901a\u8fc7 pip

\u6253\u5f00\u7ec8\u7aef\u901a\u8fc7pip\u5b89\u88c5 rllte\uff1a

pip install rllte-core # \u5b89\u88c5\u57fa\u672c\u6a21\u5757\npip install rllte-core[envs] # \u5b89\u88c5\u9884\u8bbe\u7684\u4efb\u52a1\u73af\u5883\n

  • \u901a\u8fc7 git

\u5f00\u542f\u7ec8\u7aef\u4ece[GitHub]\u4e2d\u590d\u5236\u4ed3\u5e93(https://github.com/RLE-Foundation/rllte)\uff1a

git clone https://github.com/RLE-Foundation/rllte.git\n
\u5728\u8fd9\u4e4b\u540e, \u8fd0\u884c\u4ee5\u4e0b\u547d\u4ee4\u884c\u5b89\u88c5\u6240\u9700\u7684\u5305\uff1a
pip install -e . # \u5b89\u88c5\u57fa\u672c\u6a21\u5757\npip install -e .[envs] # \u5b89\u88c5\u9884\u8bbe\u7684\u4efb\u52a1\u73af\u5883\n

\u66f4\u8be6\u7ec6\u7684\u5b89\u88c5\u8bf4\u660e, \u8bf7\u53c2\u9605, \u5165\u95e8\u6307\u5357.

"},{"location":"README-zh-Hans/#_4","title":"\u5feb\u901f\u8bad\u7ec3\u5185\u7f6e\u7b97\u6cd5","text":"

RLLTE\u4e3a\u5e7f\u53d7\u8ba4\u53ef\u7684\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u63d0\u4f9b\u4e86\u9ad8\u8d28\u91cf\u7684\u5b9e\u73b0\uff0c\u5e76\u4e14\u8bbe\u8ba1\u4e86\u7b80\u5355\u53cb\u597d\u7684\u754c\u9762\u7528\u4e8e\u5e94\u7528\u6784\u5efa\u3002

"},{"location":"README-zh-Hans/#nvidia-gpu","title":"\u4f7f\u7528NVIDIA GPU","text":"

\u5047\u5982\u6211\u4eec\u8981\u7528 DrQ-v2\u7b97\u6cd5\u89e3\u51b3 DeepMind Control Suite\u4efb\u52a1, \u53ea\u9700\u7f16\u5199\u5982\u4e0b train.py\u6587\u4ef6\uff1a

# import `env` and `agent` module\nfrom rllte.env import make_dmc_env \nfrom rllte.agent import DrQv2\n\nif __name__ == \"__main__\":\n    device = \"cuda:0\"\n    # \u521b\u5efa env, `eval_env` \u53ef\u9009\n    env = make_dmc_env(env_id=\"cartpole_balance\", device=device)\n    eval_env = make_dmc_env(env_id=\"cartpole_balance\", device=device)\n    # \u521b\u5efa agent\n    agent = DrQv2(env=env, eval_env=eval_env, device=device, tag=\"drqv2_dmc_pixel\")\n    # \u5f00\u59cb\u8bad\u7ec3\n    agent.train(num_train_steps=500000, log_interval=1000)\n
\u8fd0\u884ctrain.py\u6587\u4ef6\uff0c\u5c06\u4f1a\u5f97\u5230\u5982\u4e0b\u8f93\u51fa\uff1a

"},{"location":"README-zh-Hans/#huawei-npu","title":"\u4f7f\u7528HUAWEI NPU","text":"

\u4e0e\u4e0a\u8ff0\u6848\u4f8b\u7c7b\u4f3c, \u5982\u679c\u9700\u8981\u5728 HUAWEI NPU \u4e0a\u8bad\u7ec3\u667a\u80fd\u4f53\uff0c\u53ea\u9700\u5c06 cuda \u66ff\u6362\u4e3a npu\uff1a

device = \"cuda:0\" -> device = \"npu:0\"\n

"},{"location":"README-zh-Hans/#_5","title":"\u4e09\u6b65\u521b\u5efa\u60a8\u7684\u5f3a\u5316\u5b66\u4e60\u667a\u80fd\u4f53","text":"

\u501f\u52a9RLLTE\uff0c\u5f00\u53d1\u8005\u53ea\u9700\u4e09\u6b65\u5c31\u53ef\u4ee5\u5b9e\u73b0\u4e00\u4e2a\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u3002\u63a5\u4e0b\u6765\u8fd9\u4e2a\u4f8b\u5b50\u5c06\u5c55\u793a\u5982\u4f55\u5b9e\u73b0 Advantage Actor-Critic (A2C) \u7b97\u6cd5\u7528\u4e8e\u89e3\u51b3 Atari \u6e38\u620f\uff1a - \u9996\u5148\uff0c\u8c03\u7528\u7b97\u6cd5\u539f\u578b\uff1a

from rllte.common.prototype import OnPolicyAgent\n
- \u5176\u6b21\uff0c\u5bfc\u5165\u5fc5\u8981\u7684\u6a21\u5757\uff1a
from rllte.xploit.encoder import MnihCnnEncoder\nfrom rllte.xploit.policy import OnPolicySharedActorCritic\nfrom rllte.xploit.storage import VanillaRolloutStorage\nfrom rllte.xplore.distribution import Categorical\n
- \u8fd0\u884c\u9009\u5b9a\u7b56\u7565\u7684 .describe \u51fd\u6570\uff0c\u8fd0\u884c\u7ed3\u679c\u5982\u4e0b\uff1a
OnPolicySharedActorCritic.describe()\n# Output:\n# ================================================================================\n# Name       : OnPolicySharedActorCritic\n# Structure  : self.encoder (shared by actor and critic), self.actor, self.critic\n# Forward    : obs -> self.encoder -> self.actor -> actions\n#            : obs -> self.encoder -> self.critic -> values\n#            : actions -> log_probs\n# Optimizers : self.optimizers['opt'] -> (self.encoder, self.actor, self.critic)\n# ================================================================================\n
\u8fd9\u5c06\u4f1a\u5c55\u793a\u5f53\u524d\u7b56\u7565\u7684\u6570\u636e\u7ed3\u6784\u3002\u6700\u540e\uff0c\u5c06\u4e0a\u8ff0\u6a21\u5757\u6574\u5408\u5230\u4e00\u8d77\u5e76\u4e14\u7f16\u5199 .update \u51fd\u6570:
from torch import nn\nimport torch as th\n\nclass A2C(OnPolicyAgent):\n    def __init__(self, env, tag, seed, device, num_steps) -> None:\n        super().__init__(env=env, tag=tag, seed=seed, device=device, num_steps=num_steps)\n        # \u521b\u5efa\u6a21\u5757\n        encoder = MnihCnnEncoder(observation_space=env.observation_space, feature_dim=512)\n        policy = OnPolicySharedActorCritic(observation_space=env.observation_space,\n                                           action_space=env.action_space,\n                                           feature_dim=512,\n                                           opt_class=th.optim.Adam,\n                                           opt_kwargs=dict(lr=2.5e-4, eps=1e-5),\n                                           init_fn=\"xavier_uniform\"\n                                           )\n        storage = VanillaRolloutStorage(observation_space=env.observation_space,\n                                        action_space=env.action_space,\n                                        device=device,\n                                        storage_size=self.num_steps,\n                                        num_envs=self.num_envs,\n                                        batch_size=256\n                                        )\n        # \u8bbe\u5b9a\u6240\u6709\u6a21\u5757\n        self.set(encoder=encoder, policy=policy, storage=storage, distribution=Categorical)\n\n    def update(self):\n        for _ in range(4):\n            for batch in self.storage.sample():\n                # \u8bc4\u4f30\u91c7\u6837\u7684\u52a8\u4f5c\n                new_values, new_log_probs, entropy = self.policy.evaluate_actions(obs=batch.observations, actions=batch.actions)\n                # \u7b56\u7565\u635f\u5931\n                policy_loss = - (batch.adv_targ * new_log_probs).mean()\n                # \u4ef7\u503c\u635f\u5931\n                value_loss = 0.5 * (new_values.flatten() - batch.returns).pow(2).mean()\n                # \u66f4\u65b0\n                self.policy.optimizers['opt'].zero_grad(set_to_none=True)\n                (value_loss * 0.5 + policy_loss - entropy * 0.01).backward()\n                nn.utils.clip_grad_norm_(self.policy.parameters(), 0.5)\n                self.policy.optimizers['opt'].step()\n
\u7136\u540e\uff0c\u4f7f\u7528\u4ee5\u4e0b\u4ee3\u7801\u8bad\u7ec3\u8be5\u667a\u80fd\u4f53\uff1a
from rllte.env import make_atari_env\nif __name__ == \"__main__\":\n    device = \"cuda\"\n    env = make_atari_env(\"PongNoFrameskip-v4\", num_envs=8, seed=0, device=device)\n    agent = A2C(env=env, tag=\"a2c_atari\", seed=0, device=device, num_steps=128)\n    agent.train(num_train_steps=10000000)\n
\u4e0a\u8ff0\u4f8b\u5b50\u8868\u660e\uff0c\u5229\u7528 RLLTE \u53ea\u9700\u5c11\u6570\u51e0\u884c\u4ee3\u7801\u4fbf\u53ef\u4ee5\u5f97\u5230\u4e00\u4e2a\u5f3a\u5316\u5b66\u4e60\u667a\u80fd\u4f53\u3002

"},{"location":"README-zh-Hans/#_6","title":"\u7b97\u6cd5\u89e3\u8026\u4e0e\u6a21\u5757\u66ff\u4ee3","text":"

RLLTE \u8bb8\u53ef\u5f00\u53d1\u8005\u5c06\u9884\u8bbe\u597d\u7684\u6a21\u5757\u66ff\u6362\uff0c\u4ee5\u4fbf\u4e8e\u8fdb\u884c\u7b97\u6cd5\u6027\u80fd\u6bd4\u8f83\u548c\u4f18\u5316\u3002\u5f00\u53d1\u8005\u53ef\u4ee5\u5c06\u9884\u8bbe\u6a21\u5757\u66ff\u6362\u6210\u522b\u7684\u7c7b\u578b\u7684\u5185\u7f6e\u6a21\u5757\u6216\u8005\u81ea\u5b9a\u4e49\u6a21\u5757\u3002\u5047\u8bbe\u6211\u4eec\u60f3\u8981\u5bf9\u6bd4\u4e0d\u540c\u7f16\u7801\u5668\u7684\u6548\u679c\uff0c\u53ea\u9700\u8981\u8c03\u7528\u5176\u4e2d .set \u51fd\u6570\uff1a

from rllte.xploit.encoder import EspeholtResidualEncoder\nencoder = EspeholtResidualEncoder(...)\nagent.set(encoder=encoder)\n
RLLTE \u6846\u67b6\u5341\u5206\u7b80\u4fbf\uff0c\u7ed9\u4e88\u5f00\u53d1\u8005\u4eec\u6700\u5927\u7a0b\u5ea6\u7684\u81ea\u7531\u3002\u66f4\u591a\u8be6\u7ec6\u8bf4\u660e\u8bf7\u53c2\u8003\u6559\u7a0b\u3002

"},{"location":"README-zh-Hans/#_7","title":"\u529f\u80fd\u5217\u8868 (\u90e8\u5206)","text":""},{"location":"README-zh-Hans/#_8","title":"\u5f3a\u5316\u5b66\u4e60\u667a\u80fd\u4f53","text":"\u7c7b\u578b \u7b97\u6cd5 \u8fde\u7eed \u79bb\u6563 \u591a\u91cd\u4e8c\u5143 \u591a\u91cd\u79bb\u6563 \u591a\u8fdb\u7a0b NPU \ud83d\udcb0 \ud83d\udd2d On-Policy A2C \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c On-Policy PPO \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c On-Policy DrAC \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f On-Policy DAAC \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c On-Policy DrDAAC \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f On-Policy PPG \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy DQN \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy DDPG \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy SAC \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy TD3 \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy DrQ-v2 \u2714\ufe0f \u274c \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f Distributed IMPALA \u2714\ufe0f \u2714\ufe0f \u274c \u274c \u2714\ufe0f \u274c \u274c \u274c
  • \ud83d\udc0c\uff1a\u5f00\u53d1\u4e2d\uff1b
  • \ud83d\udcb0\uff1a\u652f\u6301\u5185\u5728\u5956\u52b1\u5851\u9020\uff1b
  • \ud83d\udd2d\uff1a\u652f\u6301\u89c2\u6d4b\u589e\u5f3a\u3002
"},{"location":"README-zh-Hans/#_9","title":"\u5185\u5728\u5956\u52b1\u6a21\u5757","text":"\u7c7b\u578b \u6a21\u5757 Count-based PseudoCounts, RND Curiosity-driven ICM, GIRM, RIDE Memory-based NGU Information theory-based RE3, RISE, REVD

\u8be6\u7ec6\u6848\u4f8b\u8bf7\u53c2\u8003 Tutorials: Use Intrinsic Reward and Observation Augmentation\u3002

"},{"location":"README-zh-Hans/#rllte","title":"RLLTE \u751f\u6001\u73af\u5883","text":"

\u63a2\u7d22RLLTE\u751f\u6001\u4ee5\u52a0\u901f\u60a8\u7684\u7814\u7a76\uff1a

  • Hub\uff1a\u63d0\u4f9b\u5feb\u901f\u8bad\u7ec3\u7684 API \u63a5\u53e3\u4ee5\u53ca\u53ef\u91cd\u590d\u4f7f\u7528\u7684\u57fa\u51c6\u6d4b\u8bd5\uff1b
  • Evaluation\uff1a\u63d0\u4f9b\u53ef\u4fe1\u8d56\u7684\u6a21\u578b\u8bc4\u4f30\u6807\u51c6\uff1b
  • Env\uff1a\u63d0\u4f9b\u5c01\u88c5\u5b8c\u5584\u7684\u73af\u5883\uff1b
  • Deployment\uff1a\u63d0\u4f9b\u4fbf\u6377\u7684\u7b97\u6cd5\u90e8\u7f72\u63a5\u53e3\uff1b
  • Pre-training\uff1a\u63d0\u4f9b\u591a\u79cd\u5f3a\u5316\u5b66\u4e60\u9884\u8bad\u7ec3\u7684\u65b9\u5f0f\uff1b
  • Copilot\uff1a\u63d0\u4f9b\u5927\u8bed\u8a00\u6a21\u578b copilot\u3002
"},{"location":"README-zh-Hans/#api","title":"API \u6587\u6863","text":"

\u8bf7\u53c2\u9605\u6211\u4eec\u4fbf\u6377\u7684 API \u6587\u6863\uff1ahttps://docs.rllte.dev/

"},{"location":"README-zh-Hans/#_10","title":"\u5982\u4f55\u8d21\u732e","text":"

\u6b22\u8fce\u53c2\u4e0e\u8d21\u732e\u6211\u4eec\u7684\u9879\u76ee\uff01\u5728\u60a8\u51c6\u5907\u7f16\u7a0b\u4e4b\u524d\uff0c\u8bf7\u5148\u53c2\u9605CONTRIBUTING.md\u3002

"},{"location":"README-zh-Hans/#_11","title":"\u5f15\u7528\u9879\u76ee","text":"

\u5982\u679c\u60a8\u60f3\u5728\u7814\u7a76\u4e2d\u5f15\u7528 RLLTE\uff0c\u8bf7\u53c2\u8003\u5982\u4e0b\u683c\u5f0f\uff1a

@software{rllte,\n  author = {Mingqi Yuan, Zequn Zhang, Yang Xu, Shihao Luo, Bo Li, Xin Jin, and Wenjun Zeng},\n  title = {RLLTE: Long-Term Evolution Project of Reinforcement Learning},\n  url = {https://github.com/RLE-Foundation/rllte},\n  year = {2023},\n}\n

"},{"location":"README-zh-Hans/#_12","title":"\u81f4\u8c22","text":"

\u8be5\u9879\u76ee\u7531 \u9999\u6e2f\u7406\u5de5\u5927\u5b66\uff0c\u4e1c\u65b9\u7406\u5de5\u9ad8\u7b49\u7814\u7a76\u9662\uff0c\u4ee5\u53ca FLW-Foundation\u8d5e\u52a9\u3002 \u4e1c\u65b9\u7406\u5de5\u9ad8\u6027\u80fd\u8ba1\u7b97\u4e2d\u5fc3 \u63d0\u4f9b\u4e86 GPU \u8ba1\u7b97\u5e73\u53f0, \u534e\u4e3a\u5f02\u817e \u63d0\u4f9b\u4e86 NPU \u8ba1\u7b97\u5e73\u53f0\u3002\u8be5\u9879\u76ee\u7684\u90e8\u5206\u4ee3\u7801\u53c2\u8003\u4e86\u5176\u4ed6\u4f18\u79c0\u7684\u5f00\u6e90\u9879\u76ee\uff0c\u8bf7\u53c2\u89c1 ACKNOWLEDGMENT.md\u3002

"},{"location":"api/","title":"Overview","text":""},{"location":"api/#architecture","title":"Architecture","text":""},{"location":"api/#agent-implemented-rl-algorithms-using-rllte-modules","title":"Agent: Implemented RL algorithms using RLLTE modules.","text":"Type Algo. Box Dis. M.B. M.D. M.P. NPU \ud83d\udcb0 \ud83d\udd2d On-Policy A2C \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c On-Policy PPO \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c On-Policy DrAC \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f On-Policy DAAC \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c On-Policy DrDAAC \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f On-Policy PPG \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy DQN \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy DDPG \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy SAC \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy SAC-Discrete \u274c \u2714\ufe0f \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy TD3 \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c Off-Policy DrQ-v2 \u2714\ufe0f \u274c \u274c \u274c \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f Distributed IMPALA \u2714\ufe0f \u2714\ufe0f \u274c \u274c \u2714\ufe0f \u274c \u274c \u274c
  • Dis., M.B., M.D.: Discrete, MultiBinary, and MultiDiscrete action space;
  • M.P.: Multi processing;
  • \ud83d\udc0c: Developing;
  • \ud83d\udcb0: Support intrinsic reward shaping;
  • \ud83d\udd2d: Support observation augmentation.
"},{"location":"api/#xploit-modules-that-focus-on-exploitation-in-rl","title":"Xploit: Modules that focus on exploitation in RL.","text":"

Policy: Policies for interaction and learning.

Module Type Remark OnPolicySharedActorCritic On-policy Actor-Critic networks with a shared encoder. OnPolicyDecoupledActorCritic On-policy Actor-Critic networks with two separate encoders. OffPolicyDoubleQNetwork Off-policy Double Q-network. OffPolicyDoubleActorDoubleCritic Off-policy Double deterministic actor network and double-critic network. OffPolicyDetActorDoubleCritic Off-policy Deterministic actor network and double-critic network. OffPolicyStochActorDoubleCritic Off-policy Stochastic actor network and double-critic network. DistributedActorLearner Distributed Memory-shared actor and learner networks

Encoder: Neural nework-based encoders for processing observations.

Module Input Reference Target Task EspeholtResidualEncoder Images Paper Atari or Procgen games MnihCnnEncoder Images Paper Atari games TassaCnnEncoder Images Paper DeepMind Control Suite: pixel PathakCnnEncoder Images Paper Atari or MiniGrid games IdentityEncoder States N/A DeepMind Control Suite: state VanillaMlpEncoder States N/A DeepMind Control Suite: state RaffinCombinedEncoder Dict Paper Highway
  • Naming Rule: Surname of the first author + Backbone + Encoder
  • Target Task: The testing tasks in their paper or potential tasks.

Storage: Experience storage and sampling.

Module Type Remark VanillaRolloutStorage On-policy DictRolloutStorage On-policy VanillaReplayStorage Off-policy DictReplayStorage Off-policy NStepReplayStorage Off-policy PrioritizedReplayStorage Off-policy HerReplayStorage Off-policy VanillaDistributedStorage Distributed"},{"location":"api/#xplore-modules-that-focus-on-exploration-in-rl","title":"Xplore: Modules that focus on exploration in RL.","text":"

Augmentation: PyTorch.nn-like modules for observation augmentation.

Module Input Reference GaussianNoise States Paper RandomAmplitudeScaling States Paper GrayScale Images Paper RandomColorJitter Images Paper RandomConvolution Images Paper RandomCrop Images Paper RandomCutout Images Paper RandomCutoutColor Images Paper RandomFlip Images Paper RandomRotate Images Paper RandomShift Images Paper RandomTranslate Images Paper

Distribution: Distributions for sampling actions.

Module Type Reference NormalNoise Noise Paper OrnsteinUhlenbeckNoise Noise Paper TruncatedNormalNoise Noise Paper Bernoulli Distribution Paper Categorical Distribution Paper MultiCategorical Distribution Paper DiagonalGaussian Distribution Paper SquashedNormal Distribution Paper
  • In RLLTE, the action noise is implemented via a Distribution manner to realize unification.

Reward: Intrinsic reward modules for enhancing exploration.

Type Modules Count-based PseudoCounts, RND Curiosity-driven ICM, GIRM, RIDE Memory-based NGU Information theory-based RE3, RISE, REVD

See Tutorials: Use Intrinsic Reward and Observation Augmentation for usage examples.

"},{"location":"api/#env-packaged-environments-eg-atari-games-for-fast-invocation","title":"Env: Packaged environments (e.g., Atari games) for fast invocation.","text":"Function Name Remark Reference make_atari_env Atari Games Discrete control Paper make_bullet_env PyBullet Robotics Environments Continuous control Paper make_dmc_env DeepMind Control Suite Continuous control Paper make_minigrid_env MiniGrid Games Discrete control Paper make_procgen_env Procgen Games Discrete control Paper make_robosuite_env Robosuite Robotics Environments Continuous control Paper"},{"location":"api/#copilot-large-language-model-empowered-copilot","title":"Copilot: Large language model-empowered copilot.","text":"

See Copilot.

"},{"location":"api/#hub-fast-training-apis-and-reusable-benchmarks","title":"Hub: Fast training APIs and reusable benchmarks.","text":"

See Benchmarks.

"},{"location":"api/#evaluation-reasonable-and-reliable-metrics-for-algorithm-evaluation","title":"Evaluation: Reasonable and reliable metrics for algorithm evaluation.","text":"

See Tutorials: Model Evaluation.

"},{"location":"api/#pre-training-methods-of-pre-training-in-rl","title":"Pre-training: Methods of pre-training in RL.","text":"

See Tutorials: Pre-training.

"},{"location":"api/#deployment-convenient-apis-for-model-deployment","title":"Deployment: Convenient APIs for model deployment.","text":"

See Tutorials: Model Deployment.

"},{"location":"api_old/","title":"Api old","text":""},{"location":"api_old/#common-auxiliary-modules-like-trainer-and-logger","title":"Common: Auxiliary modules like trainer and logger.","text":"
  • Engine: Engine for building Hsuanwu application.
  • Logger: Logger for managing output information.
"},{"location":"api_old/#xploit-modules-that-focus-on-exploitation-in-rl","title":"Xploit: Modules that focus on exploitation in RL.","text":"
  • Agent: Agent for interacting and learning.
Type Algorithm On-Policy A2C\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0,PPO\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0 DAAC\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0,DrAC\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0\ud83d\udd2d,DrDAAC\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0\ud83d\udd2d Off-Policy DQN\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0,DDPG\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0,SAC\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0 DrQ-v2\ud83d\udda5\ufe0f\u26d3\ufe0f\ud83d\udcb0\ud83d\udd2d Distributed IMPALA\u26d3\ufe0f
  • \ud83d\udda5\ufe0f: Support Neural-network processing unit.
  • \u26d3\ufe0f: Multi Processing.
  • \ud83d\udcb0: Support intrinsic reward shaping.
  • \ud83d\udd2d: Support observation augmentation.
Module Recurrent Box Discrete MultiBinary Multi Processing NPU Paper Citations SAC \u274c \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f Link 5077\u2b50 DrQ \u274c \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f Link 433\u2b50 DDPG \u274c \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f Link 11819\u2b50 DrQ-v2 \u274c \u2714\ufe0f \u274c \u274c \u274c \u2714\ufe0f Link 100\u2b50 DAAC \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f Link 56\u2b50 PPO \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f Link 11155\u2b50 DrAC \u274c \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f Link 29\u2b50 IMPALA \u2714\ufe0f \u2714\ufe0f \u2714\ufe0f \u274c \u2714\ufe0f \u2714\ufe0f Link 1219\u2b50

Tips of Agent

  • \ud83d\udc0c: Developing.
  • NPU: Support Neural-network processing unit.
  • Recurrent: Support recurrent neural network.
  • Box: A N-dimensional box that containes every point in the action space.
  • Discrete: A list of possible actions, where each timestep only one of the actions can be used.
  • MultiBinary: A list of possible actions, where each timestep any of the actions can be used in any combination.
  • Encoder: Neural nework-based encoder for processing observations.
Module Input Reference Target Task EspeholtResidualEncoder Images IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures Atari or Procgen games. IdentityEncoder States N/A DeepMind Control Suite: state MnihCnnEncoder Images Playing Atari with Deep Reinforcement Learning Atari games. TassaCnnEncoder Images DeepMind Control Suite DeepMind Control Suite: pixel PathakCnnEncoder Images Curiosity-Driven Exploration by Self-Supervised Prediction Atari or MiniGrid games VanillaMlpEncoder States N/A DeepMind Control Suite: state

Tips of Encoder

  • Naming Rule: 'Surname of the first author' + 'Backbone' + 'Encoder'
  • Input: Input type.
  • Target Task: The testing tasks in their paper or potential tasks.
  • Storage: Storge for storing collected experiences.
Module Remark VanillaRolloutStorage On-Policy RL VanillaReplayStorage Off-Policy RL NStepReplayStorage Off-Policy RL PrioritizedReplayStorage Off-Policy RL DistributedStorage Distributed RL"},{"location":"api_old/#xplore-modules-that-focus-on-exploration-in-rl","title":"Xplore: Modules that focus on exploration in RL.","text":"
  • Augmentation: PyTorch.nn-like modules for observation augmentation.
Module Input Reference GaussianNoise States Reinforcement Learning with Augmented Data RandomAmplitudeScaling States Reinforcement Learning with Augmented Data GrayScale Images Reinforcement Learning with Augmented Data RandomColorJitter Images Reinforcement Learning with Augmented Data RandomConvolution Images Reinforcement Learning with Augmented Data RandomCrop Images Reinforcement Learning with Augmented Data RandomCutout Images Reinforcement Learning with Augmented Data RandomCutoutColor Images Reinforcement Learning with Augmented Data RandomFlip Images Reinforcement Learning with Augmented Data RandomRotate Images Reinforcement Learning with Augmented Data RandomShift Images Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning RandomTranslate Images Reinforcement Learning with Augmented Data
  • Distribution: Distributions for sampling actions.
Module Type Reference NormalNoise Noise torch.distributions OrnsteinUhlenbeckNoise Noise Continuous Control with Deep Reinforcement Learning TruncatedNormalNoise Noise Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning Bernoulli Distribution torch.distributions Categorical Distribution torch.distributions DiagonalGaussian Distribution torch.distributions SquashedNormal Distribution torch.distributions

Tips of Distribution

  • In Hsuanwu, the action noise is implemented via a Distribution manner to realize unification.
  • Reward: Intrinsic reward modules for enhancing exploration.
Module Remark Repr. Visual Reference PseudoCounts Count-Based exploration \u2714\ufe0f \u2714\ufe0f Never Give Up: Learning Directed Exploration Strategies ICM Curiosity-driven exploration \u2714\ufe0f \u2714\ufe0f Curiosity-Driven Exploration by Self-Supervised Prediction RND Count-based exploration \u274c \u2714\ufe0f Exploration by Random Network Distillation GIRM Curiosity-driven exploration \u2714\ufe0f \u2714\ufe0f Intrinsic Reward Driven Imitation Learning via Generative Model NGU Memory-based exploration \u2714\ufe0f \u2714\ufe0f Never Give Up: Learning Directed Exploration Strategies RIDE Procedurally-generated environment \u2714\ufe0f \u2714\ufe0f RIDE: Rewarding Impact-Driven Exploration for Procedurally-Generated Environments RE3 Entropy Maximization \u274c \u2714\ufe0f State Entropy Maximization with Random Encoders for Efficient Exploration RISE Entropy Maximization \u274c \u2714\ufe0f R\u00e9nyi State Entropy Maximization for Exploration Acceleration in Reinforcement Learning REVD Divergence Maximization \u274c \u2714\ufe0f Rewarding Episodic Visitation Discrepancy for Exploration in Reinforcement Learning

Tips of Reward

  • \ud83d\udc0c: Developing.
  • Repr.: The method involves representation learning.
  • Visual: The method works well in visual RL.

See Tutorials: Use intrinsic reward and observation augmentation for usage examples.

"},{"location":"api_old/#evaluation-reasonable-and-reliable-metrics-for-algorithm-evaluation","title":"Evaluation: Reasonable and reliable metrics for algorithm evaluation.","text":"

See Tutorials: Evaluate your model.

"},{"location":"api_old/#env-packaged-environments-eg-atari-games-for-fast-invocation","title":"Env: Packaged environments (e.g., Atari games) for fast invocation.","text":"Module Name Remark Reference make_atari_env Atari Games Discrete control The Arcade Learning Environment: An Evaluation Platform for General Agents make_bullet_env PyBullet Robotics Environments Continuous control Pybullet: A Python Module for Physics Simulation for Games, Robotics and Machine Learning make_dmc_env DeepMind Control Suite Continuous control DeepMind Control Suite make_minigrid_env MiniGrid Games Discrete control Minimalistic Gridworld Environment for Gymnasium make_procgen_env Procgen Games Discrete control Leveraging Procedural Generation to Benchmark Reinforcement Learning make_robosuite_env Robosuite Robotics Environments Continuous control Robosuite: A Modular Simulation Framework and Benchmark for Robot Learning"},{"location":"api_old/#pre-training-methods-of-pre-training-in-rl","title":"Pre-training: Methods of pre-training in RL.","text":"

See Tutorials: Pre-training in Hsuanwu.

"},{"location":"api_old/#deployment-methods-of-model-deployment-in-rl","title":"Deployment: Methods of model deployment in RL.","text":"

See Tutorials: Deploy your model in inference devices.

"},{"location":"benchmarks/","title":"Benchmarks","text":"

rllte-hub provides a large number of reusable datasets and models of representative RL benchmarks. All the files are deposited on the Hugging Face platform, view them by

  • https://hub.rllte.dev/ or
  • https://huggingface.co/RLE-Foundation.
Module Remark rllte.hub.datasets Provide test scores and learning cures of various RL algorithms on different benchmarks. rllte.hub.models Provide trained models of various RL algorithms on different benchmarks. rllte.hub.applications Provide fast-APIs for training RL agents on recognized benchmarks."},{"location":"benchmarks/#support-list","title":"Support list","text":"Benchmark Algorithm Remark Reference Atari Games PPO 50M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper SAC 1M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper DeepMind Control (Pixel) DrQ-v2 1M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper DeepMind Control (State) SAC 10M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 DDPG 10M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Procgen Games PPO 25M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper DAAC 25M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper MiniGrid Games

Tip

  • \ud83d\udc0c: Incoming.
  • (25M): 25 million training steps.
  • \ud83d\udcafScores: Available final scores.
  • \ud83d\udccaCurves: Available training curves.
  • \ud83e\udd16Models: Available trained models.
"},{"location":"benchmarks/#datasets","title":"Datasets","text":""},{"location":"benchmarks/#load_scores","title":".load_scores","text":"

Suppose we want to evaluate algorithm performance on the Procgen benchmark. Here is an example:

example.py

from rllte.hub.datasets import Procgen\n\nprocgen = Procgen()\nprocgen_scores = procgen.load_scores()\nprint(procgen_scores['ppo'].shape)\n\n# Output:\n# (10, 16)\n
For each algorithm, this will return a NdArray of size (10 x 16) where scores[n][m] represent the score on run n of task m.

"},{"location":"benchmarks/#load_curves","title":".load_curves","text":"

Meanwhile, .load_curves will return the learning curves by a Python Dict like:

curves = {\n    \"ppo\": {\n        \"train\": {\"bigfish\": np.ndarray(shape=(Number of seeds, Number of points)), ...}, \n        \"eval\": {\"bigfish\": np.ndarray(shape=(Number of seeds, Number of points)), ...}, \n    },\n    \"daac\": {\n        \"train\": {\"bigfish\": np.ndarray(shape=(Number of seeds, Number of points)), ...}, \n        \"eval\": {\"bigfish\": np.ndarray(shape=(Number of seeds, Number of points)), ...}, \n    },\n    ...\n}\n
A code example for loading curves of the Procgen benchmark: example.py
from rllte.hub.datasets import Procgen\n\nif __name__ == \"__main__\":\n    # load data\n    procgen = Procgen()\n    curves = procgen.load_curves()\n\n    print(curves['ppo']['train']['bigfish'].shape)\n    print(curves['ppo']['eval']['bigfish'].shape)\n\n# Output:\n# (10, 1525)\n# (10, 153)\n

"},{"location":"benchmarks/#models","title":"Models","text":"

Suppose we want to load an PPO agent trained on Procgen benchmark, here is an example:

example.py
from rllte.hub.models import Procgen\nfrom rllte.env import make_procgen_env\nimport torch as th\nimport numpy as np\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env_id = \"starpilot\"\n    seed = 1\n    # download the model\n    procgen = Procgen()\n    agent = procgen.load_models(agent=\"ppo\",\n                                env_id=env_id,\n                                seed=seed,\n                                device=device)\n    # create env\n    env = make_procgen_env(env_id=env_id, device=device, num_envs=1, seed=seed)\n    # evaluate the model\n    obs, infos = env.reset(seed=seed)\n    # run the model\n    episode_rewards, episode_steps = list(), list()\n    while len(episode_rewards) < 10:\n        # the exported model outputs logits of the action distribution\n        action = th.softmax(agent(obs), dim=1).argmax(dim=1)\n        obs, rewards, terminateds, truncateds, infos = env.step(action)\n\n        if \"episode\" in infos:\n            indices = np.nonzero(infos[\"episode\"][\"l\"])\n            episode_rewards.extend(infos[\"episode\"][\"r\"][indices].tolist())\n            episode_steps.extend(infos[\"episode\"][\"l\"][indices].tolist())\n\n    print(f\"mean episode reward: {np.mean(episode_rewards)}\")\n    print(f\"mean episode length: {np.mean(episode_steps)}\")\n\n# output:\nmean episode reward: 30.0\nmean episode length: 296.1\n
"},{"location":"benchmarks/#applications","title":"Applications","text":"

Suppose we want to train an PPO agent on Procgen benchmark, it suffices to write a train.py like:

from rllte.hub.applications import Procgen\n\napp = Procgen(agent=\"PPO\", env_id=\"coinrun\", seed=1, device=\"cuda\")\napp.train(num_train_steps=2.5e+7)\n
All the results of rllte.hub.datasets and rllte.hub.models were trained via rllte.hub.applications, and all the hyper-parameters can be found in the reference.

"},{"location":"changelog/","title":"Changelog","text":""},{"location":"changelog/#v001","title":"v0.0.1","text":"

05/05/2023 (Version 0.0.1)

Version 0.0.1 published.

  • New features:
    • Agent: SAC, DrQ, DDPG, DrQ-v2, PPO, DAAC, DrAC, PPG, IMPALA
    • Encoder: EspeholtResidualEncoder, IdentityEncoder, MnihCnnEncoder, TassaCnnEncoder, VanillaMlpEncoder
    • Storage: DecoupledRolloutStorage, VanillaRolloutStorage, VanillaReplayStorage, NStepReplayStorage, PrioritizedReplayStorage, DistributedStorage
    • Augmentation: GaussianNoise, RandomAmplitudeScaling, RandomShift, ...
    • Distribution: TruncatedNormalNoise, Bernoulli, Categorical, DiagonalGaussian, ...
    • Reward: PseudoCounts, ICM, RND, RE3, ...
"},{"location":"changelog/#initialization","title":"Initialization","text":"

19/01/2023

  • Repository initialization and first commit.
"},{"location":"contributing/","title":"Contributing to rllte","text":"

Thank you for using and contributing to rllte project!!!\ud83d\udc4b\ud83d\udc4b\ud83d\udc4b Before you begin writing code, it is important that you share your intention to contribute with the team, based on the type of contribution:

  1. You want to propose a new feature and implement it:

    • Post about your intended feature in an issue, and we shall discuss the design and implementation. Once we agree that the plan looks good, go ahead and implement it.
  2. You want to implement a feature or bug-fix for an outstanding issue:

    • Search for your issue in the rllte issue list.
    • Pick an issue and comment that you'd like to work on the feature or bug-fix.
    • If you need more context on a particular issue, please ask and we shall provide.

Once you implement and test your feature or bug-fix, please submit a Pull Request to https://github.com/RLE-Foundation/rllte.

"},{"location":"contributing/#get-rllte","title":"Get rllte","text":"

Open up a terminal and clone the repository from GitHub with git:

git clone https://github.com/RLE-Foundation/rllte.git\ncd rllte/\n
After that, run the following command to install package and dependencies:
pip install -e .[all]\n

"},{"location":"contributing/#codestyle","title":"Codestyle","text":"

We use black codestyle (max line length of 127 characters) together with isort to sort the imports. For the documentation, we use the default line length of 88 characters per line.

Please run make format to reformat your code. You can check the codestyle using make check-codestyle and make lint.

Please document each function/method and type them using the following Google style docstring template:

def function_with_types_in_docstring(param1: type1, param2: type2):\n    \"\"\"Example function with types documented in the docstring.\n\n    `PEP 484`_ type annotations are supported. If attribute, parameter, and\n    return types are annotated according to `PEP 484`_, they do not need to be\n    included in the docstring:\n\n    Args:\n        param1 (type1): The first parameter.\n        param2 (type2): The second parameter.\n\n    Returns:\n        bool: The return value. True for success, False otherwise.\n\n    .. _PEP 484:\n        https://www.python.org/dev/peps/pep-0484/\n\n    \"\"\"\n

"},{"location":"contributing/#pull-request-pr","title":"Pull Request (PR)","text":"

Before proposing a PR, please open an issue, where the feature will be discussed. This prevent from duplicated PR to be proposed and also ease the code review process. Each PR need to be reviewed and accepted by at least one of the maintainers (@yuanmingqi, @ShihaoLuo). A PR must pass the Continuous Integration tests to be merged with the master branch.

See the Pull Request Template.

"},{"location":"contributing/#tests","title":"Tests","text":"

All new features must add tests in the tests/ folder ensuring that everything works fine. We use pytest. Also, when a bug fix is proposed, tests should be added to avoid regression.

To run tests with pytest:

make pytest\n

Type checking with pytype and mypy:

make type\n

Codestyle check with black, isort and ruff:

make check-codestyle\nmake lint\n

To run type, format and lint in one command:

make commit-checks\n

"},{"location":"contributing/#acknowledgement","title":"Acknowledgement","text":"

This contributing guide is based on the stable-Baselines3 one.

"},{"location":"copilot/","title":"Copilot","text":"

Copilot is the first attempt to integrate an LLM into an RL framework, which aims to help developers reduce the learning cost and facilitate application construction. We follow the design of LocalGPT that interacts privately with documents using the power of GPT. The source documents are first ingested by an instructor embedding tool to create a local vector database. After that, a local LLM is used to understand questions and create answers based on the database. In practice, we utilize Vicuna-7B as the base model and build the database using various corpora, including API documentation, tutorials, and RL references. The powerful understanding ability of the LLM model enables the copilot to accurately answer questions about the use of the framework and any other questions of RL. Moreover, no additional training is required, and users are free to replace the base model according to their computing power. In future work, we will further enrich the corpus and add the code completion function to build a more intelligent copilot for RL.

  • GitHub Repository: https://github.com/RLE-Foundation/rllte-copilot
  • Hugging Face Space: Coming soon...
"},{"location":"getting_started/","title":"Getting Started","text":""},{"location":"getting_started/#installation","title":"Installation","text":""},{"location":"getting_started/#prerequisites","title":"Prerequisites","text":"

Currently, we recommend Python>=3.8, and user can create an virtual environment by

conda create -n rllte python=3.8\n

"},{"location":"getting_started/#with-pip-recommended","title":"with pip recommended","text":"

RLLTE has been published as a Python package in PyPi and can be installed with pip, ideally by using a virtual environment. Open up a terminal and install RLLTE with:

pip install rllte-core # basic installation\npip install rllte-core[envs] # for pre-defined environments\n
"},{"location":"getting_started/#with-git","title":"with git","text":"

Open up a terminal and clone the repository from GitHub witg git:

git clone https://github.com/RLE-Foundation/rllte.git\n
After that, run the following command to install package and dependencies:
pip install -e . # basic installation\npip install -e .[envs] # for pre-defined environments\n

"},{"location":"getting_started/#pytorch-installation","title":"PyTorch Installation","text":"

RLLTE currently supports two kinds of computing devices for acceleration, namely NVIDIA GPU and HUAWEI NPU. Thus users need to install different versions PyTorch for adapting to different devices.

"},{"location":"getting_started/#with-nvidia-gpu","title":"with NVIDIA GPU","text":"

Open up a terminal and install PyTorch with:

pip3 install torch==2.0.0 torchvision\n
More information can be found in Get Started.

"},{"location":"getting_started/#with-huawei-npu","title":"with HUAWEI NPU","text":"

Tip

Ascend NPU only supports aarch64!

  • Install the dependencies for PyTorch:
    pip3 install pyyaml wheel\n
  • Download the .whl package of PyTorch from Kunpeng file sharing center and install it:

    wget https://repo.huaweicloud.com/kunpeng/archive/Ascend/PyTorch/torch-1.11.0-cp39-cp39-linux_aarch64.whl\npip3 install torch-1.11.0-cp39-cp39-linux_aarch64.whl\n

  • Install torch_npu:

    wget https://gitee.com/ascend/pytorch/releases/download/v5.0.rc1-pytorch1.11.0/torch_npu-1.11.0-cp39-cp39m-linux_aarch64.whl\npip3 install torch_npu-1.11.0-cp39-cp39m-linux_aarch64.whl\n

  • Install apex [Optional]:

    wget https://gitee.com/ascend/apex/releases/download/v5.0.rc1-pytorch1.11.0/apex-0.1_ascend-cp39-cp39m-linux_aarch64.whl\npip3 install apex-0.1_ascend-cp39-cp39m-linux_aarch64.whl\n
    Training with mixed precision can improve the model performance. You can introduce the Apex mixed precision module or use the AMP module integrated in AscendPyTorch 1.8.1 or later based on the scenario. The Apex module provides four function modes to suit different training with mixed precision scenarios. AMP is only similar to one function of the Apex module, but can be directly used without being introduced. For details about how to use the AMP and Apex modules, see \"Mixed Precision Description\" in the PyTorch Network Model Porting and Training Guide.

"},{"location":"hub/","title":"RLLTE Hub: Large-Scale and Comprehensive Data Hub for RL","text":""},{"location":"hub/#support-list","title":"Support list","text":"Benchmark Algorithm Remark Reference Atari Games PPO 10M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper DeepMind Control (Pixel) DrQ-v2 1M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper DeepMind Control (State) SAC 10M for Humanoid, 2M else, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 DDPG \ud83d\udc0c Procgen Games PPO 25M, \ud83d\udcaf\ud83d\udcca\ud83e\udd16 Paper DAAC \ud83d\udc0c Paper MiniGrid Games \ud83d\udc0c \ud83d\udc0c \ud83d\udc0c

Tip

  • \ud83d\udc0c: Incoming.
  • (25M): 25 million training steps.
  • \ud83d\udcafScores: Available final scores.
  • \ud83d\udccaCurves: Available training curves.
  • \ud83e\udd16Models: Available trained models.
"},{"location":"hub/#trained-rl-models","title":"Trained RL Models","text":"

The following example illustrates how to download an PPO agent trained the Atari benchmark:

from rllte.hub import Atari\n\nagent = Atari().load_models(agent='ppo',\n                            env_id='BeamRider-v5',\n                            seed=0,\n                            device='cuda')\nprint(agent)\n

Use the trained agent to play the game:

from rllte.env import make_envpool_atari_env\nfrom rllte.common.utils import get_episode_statistics\nimport numpy as np\n\nenvs = make_envpool_atari_env(env_id=\"BeamRider-v5\",\n                              num_envs=1,\n                              seed=0,\n                              device=\"cuda\",\n                              asynchronous=False)\n\nobs, infos = envs.reset(seed=0)\nepisode_rewards, episode_steps = list(), list()\nwhile len(episode_rewards) < 10:\n    # The agent outputs logits of the action distribution\n    actions = th.softmax(agent(obs), dim=1).argmax(dim=1)\n    obs, rewards, terminateds, truncateds, infos = envs.step(actions)\n\n    eps_r, eps_l = get_episode_statistics(infos)\n    episode_rewards.extend(eps_r)\n    episode_steps.extend(eps_l)    \n\nprint(f\"mean episode reward: {np.mean(episode_rewards)}\")\nprint(f\"mean episode length: {np.mean(episode_steps)}\")\n\n# Output:\n# mean episode reward: 3249.8\n# mean episode length: 3401.1\n

"},{"location":"hub/#rl-training-logs","title":"RL Training Logs","text":"

Download training logs of various RL algorithms on well-recognized benchmarks for academic research.

"},{"location":"hub/#training-curves","title":"Training Curves","text":"

The following example illustrates how to download training curves of the SAC agent on the DeepMind Control Suite benchmark:

from rllte.hub import DMControl\n\ncurves = DMControl().load_curves(agent='sac', env_id=\"cheetah_run\")\n
This will return a Python Dict of NumPy array like:
curves\n\u251c\u2500\u2500 train: np.ndarray(shape=(N_SEEDS, N_POINTS))\n\u2514\u2500\u2500 eval:  np.ndarray(shape=(N_SEEDS, N_POINTS))\n

Visualize the training curves:

"},{"location":"hub/#test-scores","title":"Test Scores","text":"

Similarly, download the final test scores via

scores = DMControl().load_scores(agent='sac', env_id=\"cheetah_run\")\n
This will return a data array with shape (N_SEEDS, N_POINTS).

"},{"location":"hub/#rl-training-applications","title":"RL Training Applications","text":"

Developers can also train RL agents on well-recognized benchmarks rapidly using simple interfaces. Suppose we want to train an PPO agent on Procgen benchmark, it suffices to write a train.py like:

from rllte.hub import Procgen\n\napp = Procgen().load_apis(agent=\"PPO\", env_id=\"coinrun\", seed=1, device=\"cuda\")\napp.train(num_train_steps=2.5e+7)\n
All the curves, scores, and models were trained via .load_apis(), and all the hyper-parameters can be found in the reference of the support list.

"},{"location":"license/","title":"License","text":"

MIT License

Copyright (c) 2023 Reinforcement Learning Evolution Foundation

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"},{"location":"verification/","title":"Verification","text":""},{"location":"verification/#software","title":"Software","text":"

To ensure that RLLTE is installed correctly, we can verify the installation by running a single training script:

python -m rllte.verification\n
If successful, you will see the following output:
[08/03/2023 07:30:21 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/03/2023 07:30:21 PM] - [INFO.] - ================================================================================\n[08/03/2023 07:30:21 PM] - [INFO.] - Tag               : verification\n[08/03/2023 07:30:21 PM] - [INFO.] - Device            : CPU\n[08/03/2023 07:30:21 PM] - [DEBUG] - Agent             : PPO\n[08/03/2023 07:30:21 PM] - [DEBUG] - Encoder           : IdentityEncoder\n[08/03/2023 07:30:21 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/03/2023 07:30:21 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/03/2023 07:30:21 PM] - [DEBUG] - Distribution      : Categorical\n[08/03/2023 07:30:21 PM] - [DEBUG] - Augmentation      : False\n[08/03/2023 07:30:21 PM] - [DEBUG] - Intrinsic Reward  : False\n[08/03/2023 07:30:21 PM] - [DEBUG] - ================================================================================\n[08/03/2023 07:30:22 PM] - [TRAIN] - S: 512         | E: 4           | L: 428         | R: -427.000    | FPS: 1457.513  | T: 0:00:00    \n[08/03/2023 07:30:22 PM] - [TRAIN] - S: 640         | E: 5           | L: 428         | R: -427.000    | FPS: 1513.510  | T: 0:00:00    \n[08/03/2023 07:30:22 PM] - [TRAIN] - S: 768         | E: 6           | L: 353         | R: -352.000    | FPS: 1551.423  | T: 0:00:00    \n[08/03/2023 07:30:22 PM] - [TRAIN] - S: 896         | E: 7           | L: 353         | R: -352.000    | FPS: 1581.616  | T: 0:00:00    \n[08/03/2023 07:30:22 PM] - [INFO.] - Training Accomplished!\n[08/03/2023 07:30:22 PM] - [INFO.] - Model saved at: /export/yuanmingqi/code/rllte/logs/verification/2023-08-03-07-30-21/model\nVERIFICATION PASSED!\n

"},{"location":"verification/#hardware","title":"Hardware","text":"

Additionally, to check if your GPU driver and CUDA is enabled and accessible by PyTorch, run the following commands to return whether or not the CUDA driver is enabled:

import torch\ntorch.cuda.is_available()\n

For HUAWEI NPU:

import torch\nimport torch_npu\ntorch.npu.is_available()\n
"},{"location":"api_docs/agent/daac/","title":"DAAC","text":""},{"location":"api_docs/agent/daac/#daac","title":"DAAC","text":"

source

DAAC(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,\n   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,\n   hidden_dim: int = 256, clip_range: float = 0.2, clip_range_vf: float = 0.2,\n   policy_epochs: int = 1, value_freq: int = 1, value_epochs: int = 9, vf_coef: float = 0.5,\n   ent_coef: float = 0.01, adv_coef: float = 0.25, max_grad_norm: float = 0.5,\n   discount: float = 0.999, init_fn: str = 'xavier_uniform'\n)\n

Decoupled Advantage Actor-Critic (DAAC) agent. Based on: https://github.com/rraileanu/idaac

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_steps (int) : The sample length of per rollout.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • clip_range (float) : Clipping parameter.
  • clip_range_vf (float) : Clipping parameter for the value function.
  • policy_epochs (int) : Times of updating the policy network.
  • value_freq (int) : Update frequency of the value network.
  • value_epochs (int) : Times of updating the value network.
  • vf_coef (float) : Weighting coefficient of value loss.
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • adv_ceof (float) : Weighting coefficient of advantage loss.
  • max_grad_norm (float) : Maximum norm of gradients.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

DAAC agent instance.

Methods:

"},{"location":"api_docs/agent/daac/#update","title":".update","text":"

source

.update()\n

Update function that returns training metrics such as policy loss, value loss, etc..

"},{"location":"api_docs/agent/drac/","title":"DrAC","text":""},{"location":"api_docs/agent/drac/#drac","title":"DrAC","text":"

source

DrAC(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,\n   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,\n   hidden_dim: int = 512, clip_range: float = 0.1, clip_range_vf: float = 0.1,\n   n_epochs: int = 4, vf_coef: float = 0.5, ent_coef: float = 0.01, aug_coef: float = 0.1,\n   max_grad_norm: float = 0.5, discount: float = 0.999, init_fn: str = 'orthogonal'\n)\n

Data Regularized Actor-Critic (DrAC) agent. Based on: https://github.com/rraileanu/auto-drac

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_steps (int) : The sample length of per rollout.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • clip_range (float) : Clipping parameter.
  • clip_range_vf (float) : Clipping parameter for the value function.
  • n_epochs (int) : Times of updating the policy.
  • vf_coef (float) : Weighting coefficient of value loss.
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • aug_coef (float) : Weighting coefficient of augmentation loss.
  • max_grad_norm (float) : Maximum norm of gradients.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

DrAC agent instance.

Methods:

"},{"location":"api_docs/agent/drac/#update","title":".update","text":"

source

.update()\n

Update function that returns training metrics such as policy loss, value loss, etc..

"},{"location":"api_docs/agent/drdaac/","title":"DrDAAC","text":""},{"location":"api_docs/agent/drdaac/#drdaac","title":"DrDAAC","text":"

source

DrDAAC(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,\n   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,\n   hidden_dim: int = 256, clip_range: float = 0.2, clip_range_vf: float = 0.2,\n   policy_epochs: int = 1, value_freq: int = 1, value_epochs: int = 9, vf_coef: float = 0.5,\n   ent_coef: float = 0.01, aug_coef: float = 0.1, adv_coef: float = 0.25,\n   max_grad_norm: float = 0.5, discount: float = 0.999, init_fn: str = 'xavier_uniform'\n)\n

Data-Regularized extension of Decoupled Advantage Actor-Critic (DAAC) agent. Based on: https://github.com/rraileanu/idaac

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_steps (int) : The sample length of per rollout.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • clip_range (float) : Clipping parameter.
  • clip_range_vf (float) : Clipping parameter for the value function.
  • policy_epochs (int) : Times of updating the policy network.
  • value_freq (int) : Update frequency of the value network.
  • value_epochs (int) : Times of updating the value network.
  • vf_coef (float) : Weighting coefficient of value loss.
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • aug_coef (float) : Weighting coefficient of augmentation loss.
  • adv_ceof (float) : Weighting coefficient of advantage loss.
  • max_grad_norm (float) : Maximum norm of gradients.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

DAAC agent instance.

Methods:

"},{"location":"api_docs/agent/drdaac/#update","title":".update","text":"

source

.update()\n

Update function that returns training metrics such as policy loss, value loss, etc..

"},{"location":"api_docs/agent/drqv2/","title":"DrQv2","text":""},{"location":"api_docs/agent/drqv2/#drqv2","title":"DrQv2","text":"

source

DrQv2(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000,\n   storage_size: int = 1000000, feature_dim: int = 50, batch_size: int = 256,\n   lr: float = 0.0001, eps: float = 1e-08, hidden_dim: int = 1024,\n   critic_target_tau: float = 0.01, update_every_steps: int = 2,\n   stddev_clip: float = 0.3, init_fn: str = 'orthogonal'\n)\n

Data Regularized Q-v2 (DrQv2) agent. Based on: https://github.com/facebookresearch/drqv2

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_init_steps (int) : Number of initial exploration steps.
  • storage_size (int) : The capacity of the storage.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • critic_target_tau : The critic Q-function soft-update rate.
  • update_every_steps (int) : The agent update frequency.
  • stddev_clip (float) : The exploration std clip range.
  • init_fn (str) : Parameters initialization method.

Returns

DrQv2 agent instance.

Methods:

"},{"location":"api_docs/agent/drqv2/#update","title":".update","text":"

source

.update()\n

Update the agent and return training metrics such as actor loss, critic_loss, etc.

"},{"location":"api_docs/agent/drqv2/#update_critic","title":".update_critic","text":"

source

.update_critic(\n   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, discount: th.Tensor,\n   next_obs: th.Tensor\n)\n

Update the critic network.

Args

  • obs (th.Tensor) : Observations.
  • actions (th.Tensor) : Actions.
  • rewards (th.Tensor) : Rewards.
  • discounts (th.Tensor) : discounts.
  • next_obs (th.Tensor) : Next observations.

Returns

None.

"},{"location":"api_docs/agent/drqv2/#update_actor","title":".update_actor","text":"

source

.update_actor(\n   obs: th.Tensor\n)\n

Update the actor network.

Args

  • obs (th.Tensor) : Observations.

Returns

None.

"},{"location":"api_docs/agent/impala/","title":"IMPALA","text":""},{"location":"api_docs/agent/impala/#impala","title":"IMPALA","text":"

source

IMPALA(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', num_steps: int = 80, num_actors: int = 45, num_learners: int = 4,\n   num_storages: int = 60, feature_dim: int = 512, batch_size: int = 4, lr: float = 0.0004,\n   eps: float = 0.01, hidden_dim: int = 512, use_lstm: bool = False, ent_coef: float = 0.01,\n   baseline_coef: float = 0.5, max_grad_norm: float = 40, discount: float = 0.99,\n   init_fn: str = 'identity'\n)\n

Importance Weighted Actor-Learner Architecture (IMPALA). Based on: https://github.com/facebookresearch/torchbeast/blob/main/torchbeast/monobeast.py

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • num_steps (int) : The sample length of per rollout.
  • num_actors (int) : Number of actors.
  • num_learners (int) : Number of learners.
  • num_storages (int) : Number of storages.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • use_lstm (bool) : Use LSTM in the policy network or not.
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • baseline_coef (float) : Weighting coefficient of baseline value loss.
  • max_grad_norm (float) : Maximum norm of gradients.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

IMPALA agent instance.

Methods:

"},{"location":"api_docs/agent/impala/#update","title":".update","text":"

source

.update(\n   batch: Dict, lock = threading.Lock()\n)\n

Update the learner model.

Args

  • batch (Batch) : Batch samples.
  • lock (Lock) : Thread lock.

Returns

Training metrics.

"},{"location":"api_docs/agent/ppg/","title":"PPG","text":""},{"location":"api_docs/agent/ppg/#ppg","title":"PPG","text":"

source

PPG(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,\n   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,\n   hidden_dim: int = 512, clip_range: float = 0.2, clip_range_vf: float = 0.2,\n   vf_coef: float = 0.5, ent_coef: float = 0.01, max_grad_norm: float = 0.5,\n   policy_epochs: int = 32, aux_epochs: int = 6, kl_coef: float = 1.0,\n   num_aux_mini_batch: int = 4, num_aux_grad_accum: int = 1, discount: float = 0.999,\n   init_fn: str = 'xavier_uniform'\n)\n

Phasic Policy Gradient (PPG). Based on: https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ppg_procgen.py

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_steps (int) : The sample length of per rollout.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • clip_range (float) : Clipping parameter.
  • clip_range_vf (float) : Clipping parameter for the value function.
  • vf_coef (float) : Weighting coefficient of value loss.
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • max_grad_norm (float) : Maximum norm of gradients.
  • policy_epochs (int) : Number of iterations in the policy phase.
  • aux_epochs (int) : Number of iterations in the auxiliary phase.
  • kl_coef (float) : Weighting coefficient of divergence loss.
  • num_aux_grad_accum (int) : Number of gradient accumulation for auxiliary phase update.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

num_aux_mini_batch (int) Number of mini-batches in auxiliary phase.

Returns

PPG agent instance.

Methods:

"},{"location":"api_docs/agent/ppg/#update","title":".update","text":"

source

.update()\n

Update function that returns training metrics such as policy loss, value loss, etc..

"},{"location":"api_docs/agent/legacy/a2c/","title":"A2C","text":""},{"location":"api_docs/agent/legacy/a2c/#a2c","title":"A2C","text":"

source

A2C(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,\n   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,\n   hidden_dim: int = 512, n_epochs: int = 4, vf_coef: float = 0.5, ent_coef: float = 0.01,\n   max_grad_norm: float = 0.5, discount: float = 0.99, init_fn: str = 'orthogonal'\n)\n

Advantage Actor-Critic (A2C) agent. Based on: https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_steps (int) : The sample length of per rollout.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • n_epochs (int) : Times of updating the policy.
  • vf_coef (float) : Weighting coefficient of value loss.
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • max_grad_norm (float) : Maximum norm of gradients.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

A2C agent instance.

Methods:

"},{"location":"api_docs/agent/legacy/a2c/#update","title":".update","text":"

source

.update()\n

Update function that returns training metrics such as policy loss, value loss, etc..

"},{"location":"api_docs/agent/legacy/ddpg/","title":"DDPG","text":""},{"location":"api_docs/agent/legacy/ddpg/#ddpg","title":"DDPG","text":"

source

DDPG(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000,\n   storage_size: int = 1000000, feature_dim: int = 50, batch_size: int = 256,\n   lr: float = 0.0001, eps: float = 1e-08, hidden_dim: int = 1024,\n   critic_target_tau: float = 0.01, update_every_steps: int = 2, discount: float = 0.99,\n   stddev_clip: float = 0.3, init_fn: str = 'orthogonal'\n)\n

Deep Deterministic Policy Gradient (DDPG) agent.

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_init_steps (int) : Number of initial exploration steps.
  • storage_size (int) : The capacity of the storage.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • critic_target_tau : The critic Q-function soft-update rate.
  • update_every_steps (int) : The agent update frequency.
  • discount (float) : Discount factor.
  • stddev_clip (float) : The exploration std clip range.
  • init_fn (str) : Parameters initialization method.

Returns

DDPG agent instance.

Methods:

"},{"location":"api_docs/agent/legacy/ddpg/#update","title":".update","text":"

source

.update()\n

Update the agent and return training metrics such as actor loss, critic_loss, etc.

"},{"location":"api_docs/agent/legacy/ddpg/#update_critic","title":".update_critic","text":"

source

.update_critic(\n   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, terminateds: th.Tensor,\n   truncateds: th.Tensor, next_obs: th.Tensor\n)\n

Update the critic network.

Args

  • obs (th.Tensor) : Observations.
  • actions (th.Tensor) : Actions.
  • rewards (th.Tensor) : Rewards.
  • terminateds (th.Tensor) : Terminateds.
  • truncateds (th.Tensor) : Truncateds.
  • next_obs (th.Tensor) : Next observations.

Returns

None.

"},{"location":"api_docs/agent/legacy/ddpg/#update_actor","title":".update_actor","text":"

source

.update_actor(\n   obs: th.Tensor\n)\n

Update the actor network.

Args

  • obs (th.Tensor) : Observations.

Returns

None.

"},{"location":"api_docs/agent/legacy/dqn/","title":"DQN","text":""},{"location":"api_docs/agent/legacy/dqn/#dqn","title":"DQN","text":"

source

DQN(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000,\n   storage_size: int = 10000, feature_dim: int = 50, batch_size: int = 32,\n   lr: float = 0.001, eps: float = 1e-08, hidden_dim: int = 1024, tau: float = 1.0,\n   update_every_steps: int = 4, target_update_freq: int = 1000, discount: float = 0.99,\n   init_fn: str = 'orthogonal'\n)\n

Deep Q-Network (DQN) agent.

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_init_steps (int) : Number of initial exploration steps.
  • storage_size (int) : The capacity of the storage.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • tau : The Q-function soft-update rate.
  • update_every_steps (int) : The update frequency of the policy.
  • target_update_freq (int) : The frequency of target Q-network update.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

DQN agent instance.

Methods:

"},{"location":"api_docs/agent/legacy/dqn/#update","title":".update","text":"

source

.update()\n

Update the agent and return training metrics such as actor loss, critic_loss, etc.

"},{"location":"api_docs/agent/legacy/ppo/","title":"PPO","text":""},{"location":"api_docs/agent/legacy/ppo/#ppo","title":"PPO","text":"

source

PPO(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128,\n   feature_dim: int = 512, batch_size: int = 256, lr: float = 0.00025, eps: float = 1e-05,\n   hidden_dim: int = 512, clip_range: float = 0.1, clip_range_vf: Optional[float] = 0.1,\n   n_epochs: int = 4, vf_coef: float = 0.5, ent_coef: float = 0.01,\n   max_grad_norm: float = 0.5, discount: float = 0.999, init_fn: str = 'orthogonal'\n)\n

Proximal Policy Optimization (PPO) agent. Based on: https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_steps (int) : The sample length of per rollout.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • clip_range (float) : Clipping parameter.
  • clip_range_vf (Optional[float]) : Clipping parameter for the value function.
  • n_epochs (int) : Times of updating the policy.
  • vf_coef (float) : Weighting coefficient of value loss.
  • ent_coef (float) : Weighting coefficient of entropy bonus.
  • max_grad_norm (float) : Maximum norm of gradients.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

PPO agent instance.

Methods:

"},{"location":"api_docs/agent/legacy/ppo/#update","title":".update","text":"

source

.update()\n

Update function that returns training metrics such as policy loss, value loss, etc..

"},{"location":"api_docs/agent/legacy/sac/","title":"SAC","text":""},{"location":"api_docs/agent/legacy/sac/#sac","title":"SAC","text":"

source

SAC(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 5000,\n   storage_size: int = 10000000, feature_dim: int = 50, batch_size: int = 1024,\n   lr: float = 0.0001, eps: float = 1e-08, hidden_dim: int = 1024,\n   actor_update_freq: int = 1, critic_target_tau: float = 0.005,\n   critic_target_update_freq: int = 2, log_std_range: Tuple[float, ...] = (-5.0, 2),\n   betas: Tuple[float, float] = (0.9, 0.999), temperature: float = 0.1,\n   fixed_temperature: bool = False, discount: float = 0.99, init_fn: str = 'orthogonal'\n)\n

Soft Actor-Critic (SAC) agent. Based on: https://github.com/denisyarats/pytorch_sac

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_init_steps (int) : Number of initial exploration steps.
  • storage_size (int) : The capacity of the storage.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • actor_update_freq (int) : The actor update frequency (in steps).
  • critic_target_tau (float) : The critic Q-function soft-update rate.
  • critic_target_update_freq (int) : The critic Q-function soft-update frequency (in steps).
  • log_std_range (Tuple[float]) : Range of std for sampling actions.
  • betas (Tuple[float]) : Coefficients used for computing running averages of gradient and its square.
  • temperature (float) : Initial temperature coefficient.
  • fixed_temperature (bool) : Fixed temperature or not.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

PPO agent instance.

Methods:

"},{"location":"api_docs/agent/legacy/sac/#alpha","title":".alpha","text":"

source

.alpha()\n

Get the temperature coefficient.

"},{"location":"api_docs/agent/legacy/sac/#update","title":".update","text":"

source

.update()\n

Update the agent and return training metrics such as actor loss, critic_loss, etc.

"},{"location":"api_docs/agent/legacy/sac/#update_critic","title":".update_critic","text":"

source

.update_critic(\n   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, terminateds: th.Tensor,\n   truncateds: th.Tensor, next_obs: th.Tensor\n)\n

Update the critic network.

Args

  • obs (th.Tensor) : Observations.
  • actions (th.Tensor) : Actions.
  • rewards (th.Tensor) : Rewards.
  • terminateds (th.Tensor) : Terminateds.
  • truncateds (th.Tensor) : Truncateds.
  • next_obs (th.Tensor) : Next observations.

Returns

None.

"},{"location":"api_docs/agent/legacy/sac/#update_actor_and_alpha","title":".update_actor_and_alpha","text":"

source

.update_actor_and_alpha(\n   obs: th.Tensor\n)\n

Update the actor network and temperature.

Args

  • obs (th.Tensor) : Observations.

Returns

None.

"},{"location":"api_docs/agent/legacy/sacd/","title":"SACDiscrete","text":""},{"location":"api_docs/agent/legacy/sacd/#sacdiscrete","title":"SACDiscrete","text":"

source

SACDiscrete(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 10000,\n   storage_size: int = 100000, feature_dim: int = 50, batch_size: int = 256,\n   lr: float = 0.0005, eps: float = 1e-08, hidden_dim: int = 256,\n   actor_update_freq: int = 1, critic_target_tau: float = 0.01,\n   critic_target_update_freq: int = 4, betas: Tuple[float, float] = (0.9, 0.999),\n   temperature: float = 0.0, fixed_temperature: bool = False,\n   target_entropy_ratio: float = 0.98, discount: float = 0.99,\n   init_fn: str = 'orthogonal'\n)\n

Soft Actor-Critic Discrete (SAC-Discrete) agent.

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on the pre-training mode.
  • num_init_steps (int) : Number of initial exploration steps.
  • storage_size (int) : The capacity of the storage.
  • feature_dim (int) : Number of features extracted by the encoder.
  • batch_size (int) : Number of samples per batch to load.
  • lr (float) : The learning rate.
  • eps (float) : Term added to the denominator to improve numerical stability.
  • hidden_dim (int) : The size of the hidden layers.
  • actor_update_freq (int) : The actor update frequency (in steps).
  • critic_target_tau (float) : The critic Q-function soft-update rate.
  • critic_target_update_freq (int) : The critic Q-function soft-update frequency (in steps).
  • betas (Tuple[float]) : Coefficients used for computing running averages of gradient and its square.
  • temperature (float) : Initial temperature coefficient.
  • fixed_temperature (bool) : Fixed temperature or not.
  • target_entropy_ratio (float) : Target entropy ratio.
  • discount (float) : Discount factor.
  • init_fn (str) : Parameters initialization method.

Returns

PPO agent instance.

Methods:

"},{"location":"api_docs/agent/legacy/sacd/#alpha","title":".alpha","text":"

source

.alpha()\n

Get the temperature coefficient.

"},{"location":"api_docs/agent/legacy/sacd/#update","title":".update","text":"

source

.update()\n

Update the agent and return training metrics such as actor loss, critic_loss, etc.

"},{"location":"api_docs/agent/legacy/sacd/#deal_with_zero_probs","title":".deal_with_zero_probs","text":"

source

.deal_with_zero_probs(\n   action_probs: th.Tensor\n)\n

Deal with situation of 0.0 probabilities.

Args

  • action_probs (th.Tensor) : Action probabilities.

Returns

Action probabilities and its log values.

"},{"location":"api_docs/agent/legacy/sacd/#update_critic","title":".update_critic","text":"

source

.update_critic(\n   obs: th.Tensor, actions: th.Tensor, rewards: th.Tensor, terminateds: th.Tensor,\n   truncateds: th.Tensor, next_obs: th.Tensor\n)\n

Update the critic network.

Args

  • obs (th.Tensor) : Observations.
  • actions (th.Tensor) : Actions.
  • rewards (th.Tensor) : Rewards.
  • terminateds (th.Tensor) : Terminateds.
  • truncateds (th.Tensor) : Truncateds.
  • next_obs (th.Tensor) : Next observations.

Returns

None.

"},{"location":"api_docs/agent/legacy/sacd/#update_actor_and_alpha","title":".update_actor_and_alpha","text":"

source

.update_actor_and_alpha(\n   obs: th.Tensor\n)\n

Update the actor network and temperature.

Args

  • obs (th.Tensor) : Observations.

Returns

None.

"},{"location":"api_docs/common/initialization/","title":"Initialization","text":""},{"location":"api_docs/common/initialization/#get_init_fn","title":"get_init_fn","text":"

source

.get_init_fn(\n   method: str = 'orthogonal'\n)\n

Returns a network initialization function.

Args

  • method (str) : Initialization method name.

Returns

Initialization function.

"},{"location":"api_docs/common/initialization/#_xavier_normal","title":"_xavier_normal","text":"

source

._xavier_normal(\n   m\n)\n

Xavier normal initialization.

"},{"location":"api_docs/common/initialization/#_xavier_uniform","title":"_xavier_uniform","text":"

source

._xavier_uniform(\n   m\n)\n

Xavier uniform initialization.

"},{"location":"api_docs/common/initialization/#_orthogonal","title":"_orthogonal","text":"

source

._orthogonal(\n   m\n)\n

Orthogonal initialization.

"},{"location":"api_docs/common/initialization/#_identity","title":"_identity","text":"

source

._identity(\n   m\n)\n

Identity initialization.

"},{"location":"api_docs/common/logger/","title":"Logger","text":""},{"location":"api_docs/common/logger/#logger","title":"Logger","text":"

source

Logger(\n   log_dir: Path\n)\n

The logger class.

Args

  • log_dir : The logging location.

Returns

Logger instance.

Methods:

"},{"location":"api_docs/common/logger/#record","title":".record","text":"

source

.record(\n   key: str, value: Any\n)\n

Record the metric.

Args

  • key (str) : The key of the metric.
  • value (Any) : The value of the metric.

Returns

None.

"},{"location":"api_docs/common/logger/#parse_train_msg","title":".parse_train_msg","text":"

source

.parse_train_msg(\n   msg: Dict\n)\n

Parse the training message.

Args

  • msg (Dict) : The training message.

Returns

The formatted string.

"},{"location":"api_docs/common/logger/#parse_eval_msg","title":".parse_eval_msg","text":"

source

.parse_eval_msg(\n   msg: Dict\n)\n

Parse the evaluation message.

Args

  • msg (Dict) : The evaluation message.

Returns

The formatted string.

"},{"location":"api_docs/common/logger/#time_stamp","title":".time_stamp","text":"

source

.time_stamp()\n

Return the current time stamp.

"},{"location":"api_docs/common/logger/#info","title":".info","text":"

source

.info(\n   msg: str\n)\n

Output msg with 'info' level.

Args

  • msg (str) : Message to be printed.

Returns

None.

"},{"location":"api_docs/common/logger/#debug","title":".debug","text":"

source

.debug(\n   msg: str\n)\n

Output msg with 'debug' level.

Args

  • msg (str) : Message to be printed.

Returns

None.

"},{"location":"api_docs/common/logger/#error","title":".error","text":"

source

.error(\n   msg: str\n)\n

Output msg with 'error' level.

Args

  • msg (str) : Message to be printed.

Returns

None.

"},{"location":"api_docs/common/logger/#train","title":".train","text":"

source

.train(\n   msg: Dict\n)\n

Output msg with 'train' level.

Args

  • msg (Dict) : Message to be printed.

Returns

None.

"},{"location":"api_docs/common/logger/#eval","title":".eval","text":"

source

.eval(\n   msg: Dict\n)\n

Output msg with 'eval' level.

Args

  • msg (Dict) : Message to be printed.

Returns

None.

"},{"location":"api_docs/common/preprocessing/","title":"Preprocessing","text":""},{"location":"api_docs/common/preprocessing/#process_observation_space","title":"process_observation_space","text":"

source

.process_observation_space(\n   observation_space: gym.Space\n)\n

Process the observation space.

Args

  • observation_space (gym.Space) : Observation space.

Returns

Information of the observation space.

"},{"location":"api_docs/common/preprocessing/#process_action_space","title":"process_action_space","text":"

source

.process_action_space(\n   action_space: gym.Space\n)\n

Get the dimension of the action space.

Args

  • action_space (gym.Space) : Action space.

Returns

Information of the action space.

"},{"location":"api_docs/common/preprocessing/#get_flattened_obs_dim","title":"get_flattened_obs_dim","text":"

source

.get_flattened_obs_dim(\n   observation_space: spaces.Space\n)\n

Get the dimension of the observation space when flattened. It does not apply to image observation space. Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L169

Args

  • observation_space (spaces.Space) : Observation space.

Returns

The dimension of the observation space when flattened.

"},{"location":"api_docs/common/preprocessing/#is_image_space_channels_first","title":"is_image_space_channels_first","text":"

source

.is_image_space_channels_first(\n   observation_space: spaces.Box\n)\n

Check if an image observation space (see is_image_space) is channels-first (CxHxW, True) or channels-last (HxWxC, False). Use a heuristic that channel dimension is the smallest of the three. If second dimension is smallest, raise an exception (no support).

Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L10

Args

  • observation_space (spaces.Box) : Observation space.

Returns

True if observation space is channels-first image, False if channels-last.

"},{"location":"api_docs/common/preprocessing/#is_image_space","title":"is_image_space","text":"

source

.is_image_space(\n   observation_space: gym.Space, check_channels: bool = False,\n   normalized_image: bool = False\n)\n

Check if a observation space has the shape, limits and dtype of a valid image. The check is conservative, so that it returns False if there is a doubt. Valid images: RGB, RGBD, GrayScale with values in [0, 255]

Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L27

Args

  • observation_space (gym.Space) : Observation space.
  • check_channels (bool) : Whether to do or not the check for the number of channels. e.g., with frame-stacking, the observation space may have more channels than expected.
  • normalized_image (bool) : Whether to assume that the image is already normalized or not (this disables dtype and bounds checks): when True, it only checks that the space is a Box and has 3 dimensions. Otherwise, it checks that it has expected dtype (uint8) and bounds (values in [0, 255]).

Returns

True if observation space is channels-first image, False if channels-last.

"},{"location":"api_docs/common/preprocessing/#preprocess_obs","title":"preprocess_obs","text":"

source

.preprocess_obs(\n   obs: th.Tensor, observation_space: gym.Space\n)\n

Observations preprocessing function. Borrowed from: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/preprocessing.py#L92

Args

  • obs (th.Tensor) : Observation.
  • observation_space (gym.Space) : Observation space.

Returns

A function to preprocess observations.

"},{"location":"api_docs/common/timer/","title":"Timer","text":""},{"location":"api_docs/common/timer/#timer","title":"Timer","text":"

source

The calculagraph class.

Methods:

"},{"location":"api_docs/common/timer/#reset","title":".reset","text":"

source

.reset()\n

Reset the calculagraph.

"},{"location":"api_docs/common/timer/#total_time","title":".total_time","text":"

source

.total_time()\n

Get the total time.

"},{"location":"api_docs/common/prototype/base_agent/","title":"BaseAgent","text":""},{"location":"api_docs/common/prototype/base_agent/#baseagent","title":"BaseAgent","text":"

source

BaseAgent(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'auto', pretraining: bool = False\n)\n

Base class of the agent.

Args

  • env (VecEnv) : Vectorized environments for training.
  • eval_env (VecEnv) : Vectorized environments for evaluation.
  • tag (str) : An experiment tag.
  • seed (int) : Random seed for reproduction.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • pretraining (bool) : Turn on pre-training model or not.

Returns

Base agent instance.

Methods:

"},{"location":"api_docs/common/prototype/base_agent/#freeze","title":".freeze","text":"

source

.freeze(\n   **kwargs\n)\n

Freeze the agent and get ready for training.

"},{"location":"api_docs/common/prototype/base_agent/#check","title":".check","text":"

source

.check()\n

Check the compatibility of selected modules.

"},{"location":"api_docs/common/prototype/base_agent/#set","title":".set","text":"

source

.set(\n   encoder: Optional[Encoder] = None, policy: Optional[Policy] = None,\n   storage: Optional[Storage] = None, distribution: Optional[Distribution] = None,\n   augmentation: Optional[Augmentation] = None,\n   reward: Optional[IntrinsicRewardModule] = None\n)\n

Set a module for the agent.

Args

  • encoder (Optional[Encoder]) : An encoder of rllte.xploit.encoder or a custom encoder.
  • policy (Optional[Policy]) : A policy of rllte.xploit.policy or a custom policy.
  • storage (Optional[Storage]) : A storage of rllte.xploit.storage or a custom storage.
  • distribution (Optional[Distribution]) : A distribution of rllte.xplore.distribution or a custom distribution.
  • augmentation (Optional[Augmentation]) : An augmentation of rllte.xplore.augmentation or a custom augmentation.
  • reward (Optional[IntrinsicRewardModule]) : A reward of rllte.xplore.reward or a custom reward.

Returns

None.

"},{"location":"api_docs/common/prototype/base_agent/#mode","title":".mode","text":"

source

.mode(\n   training: bool = True\n)\n

Set the training mode.

Args

  • training (bool) : True (training) or False (evaluation).

Returns

None.

"},{"location":"api_docs/common/prototype/base_agent/#save","title":".save","text":"

source

.save()\n

Save the agent.

"},{"location":"api_docs/common/prototype/base_agent/#update","title":".update","text":"

source

.update(\n   *args, **kwargs\n)\n

Update function of the agent.

"},{"location":"api_docs/common/prototype/base_agent/#train","title":".train","text":"

source

.train(\n   num_train_steps: int, init_model_path: Optional[str], log_interval: int,\n   eval_interval: int, save_interval: int, num_eval_episodes: int, th_compile: bool\n)\n

Training function.

Args

  • num_train_steps (int) : The number of training steps.
  • init_model_path (Optional[str]) : The path of the initial model.
  • log_interval (int) : The interval of logging.
  • eval_interval (int) : The interval of evaluation.
  • save_interval (int) : The interval of saving model.
  • num_eval_episodes (int) : The number of evaluation episodes.
  • th_compile (bool) : Whether to use th.compile or not.

Returns

None.

"},{"location":"api_docs/common/prototype/base_agent/#eval","title":".eval","text":"

source

.eval(\n   num_eval_episodes: int\n)\n

Evaluation function.

Args

  • num_eval_episodes (int) : The number of evaluation episodes.

Returns

The evaluation results.

"},{"location":"api_docs/common/prototype/base_augmentation/","title":"BaseAugmentation","text":""},{"location":"api_docs/common/prototype/base_augmentation/#baseaugmentation","title":"BaseAugmentation","text":"

source

Base class of augmentation.

"},{"location":"api_docs/common/prototype/base_distribution/","title":"BaseDistribution","text":""},{"location":"api_docs/common/prototype/base_distribution/#basedistribution","title":"BaseDistribution","text":"

source

BaseDistribution(\n   *args, **kwargs\n)\n

Abstract base class of distributions. In rllte, the action noise is implemented as a distribution.

Methods:

"},{"location":"api_docs/common/prototype/base_distribution/#sample","title":".sample","text":"

source

.sample(\n   *args, **kwargs\n)\n

Generate samples.

"},{"location":"api_docs/common/prototype/base_encoder/","title":"BaseEncoder","text":""},{"location":"api_docs/common/prototype/base_encoder/#baseencoder","title":"BaseEncoder","text":"

source

BaseEncoder(\n   observation_space: gym.Space, feature_dim: int = 0\n)\n

Base class that represents a features extractor.

Args

  • observation_space (gym.Space) : The observation space.
  • feature_dim (int) : Number of features extracted.

Returns

The base encoder instance.

"},{"location":"api_docs/common/prototype/base_policy/","title":"BasePolicy","text":""},{"location":"api_docs/common/prototype/base_policy/#basepolicy","title":"BasePolicy","text":"

source

BasePolicy(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,\n   hidden_dim: int, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'\n)\n

Base class for all policies.

Args

  • observation_space (gym.Space) : Observation space.
  • action_space (gym.Space) : Action space.
  • feature_dim (int) : Number of features accepted.
  • hidden_dim (int) : Number of units per hidden layer.
  • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
  • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
  • init_fn (str) : Parameters initialization method.

Returns

Base policy instance.

Methods:

"},{"location":"api_docs/common/prototype/base_policy/#optimizers","title":".optimizers","text":"

source

.optimizers()\n

Get optimizers.

"},{"location":"api_docs/common/prototype/base_policy/#describe","title":".describe","text":"

source

.describe()\n

Describe the policy.

"},{"location":"api_docs/common/prototype/base_policy/#describe_1","title":".describe","text":"

source

.describe()\n

Describe the policy.

"},{"location":"api_docs/common/prototype/base_policy/#forward","title":".forward","text":"

source

.forward(\n   obs: th.Tensor, training: bool = True\n)\n

Forward method.

Args

  • obs (th.Tensor) : Observation from the environment.
  • training (bool) : Whether the agent is being trained or not.

Returns

Sampled actions, estimated values, ..., depends on specific algorithms.

"},{"location":"api_docs/common/prototype/base_policy/#freeze","title":".freeze","text":"

source

.freeze(\n   *args, **kwargs\n)\n

Freeze the policy and start training.

"},{"location":"api_docs/common/prototype/base_policy/#save","title":".save","text":"

source

.save(\n   *args, **kwargs\n)\n

Save models.

"},{"location":"api_docs/common/prototype/base_policy/#load","title":".load","text":"

source

.load(\n   path: str, device: th.device\n)\n

Load initial parameters.

Args

  • path (str) : Import path.
  • device (th.device) : Device to use.

Returns

None.

"},{"location":"api_docs/common/prototype/base_reward/","title":"BaseIntrinsicRewardModule","text":""},{"location":"api_docs/common/prototype/base_reward/#baseintrinsicrewardmodule","title":"BaseIntrinsicRewardModule","text":"

source

BaseIntrinsicRewardModule(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05\n)\n

Base class of intrinsic reward module.

Args

  • observation_space (gym.Space) : The observation space of environment.
  • action_space (gym.Space) : The action space of environment.
  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
  • kappa (float) : The decay rate.

Returns

Instance of the base intrinsic reward module.

Methods:

"},{"location":"api_docs/common/prototype/base_reward/#compute_irs","title":".compute_irs","text":"

source

.compute_irs(\n   samples: Dict, step: int = 0\n)\n

Compute the intrinsic rewards for current samples.

Args

  • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
  • step (int) : The global training step.
  • Returns

    The intrinsic rewards.

    "},{"location":"api_docs/common/prototype/base_reward/#update","title":".update","text":"

    source

    .update(\n   samples: Dict\n)\n

    Update the intrinsic reward module if necessary.

    Args

    • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

      Returns

      None

      "},{"location":"api_docs/common/prototype/base_reward/#add","title":".add","text":"

      source

      .add(\n   samples: Dict\n)\n

      Add the samples to the intrinsic reward module if necessary. User for modules like RE3 that have a storage component.

      Args

      • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

        Returns

        None

        "},{"location":"api_docs/common/prototype/base_storage/","title":"BaseStorage","text":""},{"location":"api_docs/common/prototype/base_storage/#basestorage","title":"BaseStorage","text":"

        source

        BaseStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str,\n   storage_size: int, batch_size: int, num_envs: int\n)\n

        Base class of the storage module.

        Args

        • observation_space (gym.Space) : The observation space of environment.
        • action_space (gym.Space) : The action space of environment.
        • device (str) : Device (cpu, cuda, ...) on which the code should be run.
        • storage_size (int) : The size of the storage.
        • batch_size (int) : Batch size of samples.
        • num_envs (int) : The number of parallel environments.

        Returns

        Instance of the base storage.

        Methods:

        "},{"location":"api_docs/common/prototype/base_storage/#to_torch","title":".to_torch","text":"

        source

        .to_torch(\n   x: np.ndarray\n)\n

        Convert numpy array to torch tensor.

        Args

        • x (np.ndarray) : Numpy array.

        Returns

        Torch tensor.

        "},{"location":"api_docs/common/prototype/base_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/common/prototype/base_storage/#add","title":".add","text":"

        source

        .add(\n   *args, **kwargs\n)\n

        Add samples to the storage.

        "},{"location":"api_docs/common/prototype/base_storage/#sample","title":".sample","text":"

        source

        .sample(\n   *args, **kwargs\n)\n

        Sample from the storage.

        "},{"location":"api_docs/common/prototype/base_storage/#update","title":".update","text":"

        source

        .update(\n   *args, **kwargs\n)\n

        Update the storage if necessary.

        "},{"location":"api_docs/common/prototype/distributed_agent/","title":"DistributedAgent","text":""},{"location":"api_docs/common/prototype/distributed_agent/#distributedagent","title":"DistributedAgent","text":"

        source

        DistributedAgent(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', num_steps: int = 80, num_actors: int = 45, num_learners: int = 4,\n   num_storages: int = 60, **kwargs\n)\n

        Trainer for distributed algorithms.

        Args

        • env (VecEnv) : Vectorized environments for training.
        • eval_env (VecEnv) : Vectorized environments for evaluation.
        • tag (str) : An experiment tag.
        • seed (int) : Random seed for reproduction.
        • device (str) : Device (cpu, cuda, ...) on which the code should be run.
        • pretraining (bool) : Turn on pre-training model or not.
        • num_steps (int) : The sample length of per rollout.
        • num_actors (int) : Number of actors.
        • num_learners (int) : Number of learners.
        • num_storages (int) : Number of storages.
        • kwargs : Arbitrary arguments such as batch_size and hidden_dim.

        Returns

        Distributed agent instance.

        Methods:

        "},{"location":"api_docs/common/prototype/distributed_agent/#run","title":".run","text":"

        source

        .run(\n   env: DistributedWrapper, actor_idx: int\n)\n

        Sample function of each actor. Implemented by individual algorithms.

        Args

        • env (DistributedWrapper) : A Gym-like environment wrapped by DistributedWrapper.
        • actor_idx (int) : The index of actor.

        Returns

        None.

        "},{"location":"api_docs/common/prototype/distributed_agent/#update","title":".update","text":"

        source

        .update(\n   *args, **kwargs\n)\n

        Update the agent. Implemented by individual algorithms.

        "},{"location":"api_docs/common/prototype/distributed_agent/#train","title":".train","text":"

        source

        .train(\n   num_train_steps: int, init_model_path: Optional[str] = None, log_interval: int = 1,\n   eval_interval: int = 5000, save_interval: int = 5000, num_eval_episodes: int = 10,\n   th_compile: bool = False\n)\n

        Training function.

        Args

        • num_train_steps (int) : The number of training steps.
        • init_model_path (Optional[str]) : The path of the initial model.
        • log_interval (int) : The interval of logging.
        • eval_interval (int) : The interval of evaluation.
        • save_interval (int) : The interval of saving model.
        • num_eval_episodes (int) : The number of evaluation episodes.
        • th_compile (bool) : Whether to use th.compile or not.

        Returns

        None.

        "},{"location":"api_docs/common/prototype/distributed_agent/#eval","title":".eval","text":"

        source

        .eval(\n   num_eval_episodes: int\n)\n

        Evaluation function.

        Args

        • num_eval_episodes (int) : The number of evaluation episodes.

        Returns

        The evaluation results.

        "},{"location":"api_docs/common/prototype/off_policy_agent/","title":"OffPolicyAgent","text":""},{"location":"api_docs/common/prototype/off_policy_agent/#offpolicyagent","title":"OffPolicyAgent","text":"

        source

        OffPolicyAgent(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_init_steps: int = 2000, **kwargs\n)\n

        Trainer for off-policy algorithms.

        Args

        • env (VecEnv) : Vectorized environments for training.
        • eval_env (Optional[VecEnv]) : Vectorized environments for evaluation.
        • tag (str) : An experiment tag.
        • seed (int) : Random seed for reproduction.
        • device (str) : Device (cpu, cuda, ...) on which the code should be run.
        • pretraining (bool) : Turn on pre-training model or not.
        • num_init_steps (int) : Number of initial exploration steps.
        • kwargs : Arbitrary arguments such as batch_size and hidden_dim.

        Returns

        Off-policy agent instance.

        Methods:

        "},{"location":"api_docs/common/prototype/off_policy_agent/#update","title":".update","text":"

        source

        .update()\n

        Update the agent. Implemented by individual algorithms.

        "},{"location":"api_docs/common/prototype/off_policy_agent/#train","title":".train","text":"

        source

        .train(\n   num_train_steps: int, init_model_path: Optional[str] = None, log_interval: int = 1,\n   eval_interval: int = 5000, save_interval: int = 5000, num_eval_episodes: int = 10,\n   th_compile: bool = False, anneal_lr: bool = False\n)\n

        Training function.

        Args

        • num_train_steps (int) : The number of training steps.
        • init_model_path (Optional[str]) : The path of the initial model.
        • log_interval (int) : The interval of logging.
        • eval_interval (int) : The interval of evaluation.
        • save_interval (int) : The interval of saving model.
        • num_eval_episodes (int) : The number of evaluation episodes.
        • th_compile (bool) : Whether to use th.compile or not.
        • anneal_lr (bool) : Whether to anneal the learning rate or not.

        Returns

        None.

        "},{"location":"api_docs/common/prototype/off_policy_agent/#eval","title":".eval","text":"

        source

        .eval(\n   num_eval_episodes: int\n)\n

        Evaluation function.

        Args

        • num_eval_episodes (int) : The number of evaluation episodes.

        Returns

        The evaluation results.

        "},{"location":"api_docs/common/prototype/on_policy_agent/","title":"OnPolicyAgent","text":""},{"location":"api_docs/common/prototype/on_policy_agent/#onpolicyagent","title":"OnPolicyAgent","text":"

        source

        OnPolicyAgent(\n   env: VecEnv, eval_env: Optional[VecEnv] = None, tag: str = 'default', seed: int = 1,\n   device: str = 'cpu', pretraining: bool = False, num_steps: int = 128\n)\n

        Trainer for on-policy algorithms.

        Args

        • env (VecEnv) : Vectorized environments for training.
        • eval_env (VecEnv) : Vectorized environments for evaluation.
        • tag (str) : An experiment tag.
        • seed (int) : Random seed for reproduction.
        • device (str) : Device (cpu, cuda, ...) on which the code should be run.
        • pretraining (bool) : Turn on pre-training model or not.
        • num_steps (int) : The sample length of per rollout.

        Returns

        On-policy agent instance.

        Methods:

        "},{"location":"api_docs/common/prototype/on_policy_agent/#update","title":".update","text":"

        source

        .update()\n

        Update the agent. Implemented by individual algorithms.

        "},{"location":"api_docs/common/prototype/on_policy_agent/#train","title":".train","text":"

        source

        .train(\n   num_train_steps: int, init_model_path: Optional[str] = None, log_interval: int = 1,\n   eval_interval: int = 100, save_interval: int = 100, num_eval_episodes: int = 10,\n   th_compile: bool = True, anneal_lr: bool = False\n)\n

        Training function.

        Args

        • num_train_steps (int) : The number of training steps.
        • init_model_path (Optional[str]) : The path of the initial model.
        • log_interval (int) : The interval of logging.
        • eval_interval (int) : The interval of evaluation.
        • save_interval (int) : The interval of saving model.
        • num_eval_episodes (int) : The number of evaluation episodes.
        • th_compile (bool) : Whether to use th.compile or not.
        • anneal_lr (bool) : Whether to anneal the learning rate or not.

        Returns

        None.

        "},{"location":"api_docs/common/prototype/on_policy_agent/#eval","title":".eval","text":"

        source

        .eval(\n   num_eval_episodes: int\n)\n

        Evaluation function.

        Args

        • num_eval_episodes (int) : The number of evaluation episodes.

        Returns

        The evaluation results.

        "},{"location":"api_docs/env/utils/","title":"make_rllte_env","text":""},{"location":"api_docs/env/utils/#make_rllte_env","title":"make_rllte_env","text":"

        source

        .make_rllte_env(\n   env_id: Union[str, Callable[..., gym.Env]], num_envs: int = 1, seed: int = 1,\n   device: str = 'cpu', asynchronous: bool = True, env_kwargs: Optional[Dict[str,\n   Any]] = None\n)\n

        Create environments that adapt to rllte engine.

        Args

        • env_id (Union[str, Callable[..., gym.Env]]) : either the env ID, the env class or a callable returning an env
        • num_envs (int) : Number of environments.
        • seed (int) : Random seed.
        • device (str) : Device to convert data.
        • asynchronous (bool) : True for AsyncVectorEnv and False for SyncVectorEnv.
        • env_kwargs : Optional keyword argument to pass to the env constructor.

        Returns

        Environment wrapped by TorchVecEnvWrapper.

        "},{"location":"api_docs/env/atari/__init__/","title":"make_atari_env","text":""},{"location":"api_docs/env/atari/__init__/#make_atari_env","title":"make_atari_env","text":"

        source

        .make_atari_env(\n   env_id: str = 'Alien-v5', num_envs: int = 8, device: str = 'cpu', seed: int = 1,\n   frame_stack: int = 4, asynchronous: bool = True\n)\n

        Create Atari environments.

        Args

        • env_id (str) : Name of environment.
        • num_envs (int) : Number of environments.
        • device (str) : Device to convert the data.
        • seed (int) : Random seed.
        • frame_stack (int) : Number of stacked frames.
        • asynchronous (bool) : True for creating asynchronous environments, and False for creating synchronous environments.

        Returns

        The vectorized environments.

        "},{"location":"api_docs/env/atari/__init__/#make_envpool_atari_env","title":"make_envpool_atari_env","text":"

        source

        .make_envpool_atari_env(\n   env_id: str = 'Alien-v5', num_envs: int = 8, device: str = 'cpu', seed: int = 1,\n   asynchronous: bool = True\n)\n

        Create Atari environments with envpool.

        Args

        • env_id (str) : Name of environment.
        • num_envs (int) : Number of environments.
        • device (str) : Device to convert the data.
        • seed (int) : Random seed.
        • asynchronous (bool) : True for creating asynchronous environments, and False for creating synchronous environments.

        Returns

        The vectorized environments.

        "},{"location":"api_docs/env/bullet/__init__/","title":"make_bullet_env","text":""},{"location":"api_docs/env/bullet/__init__/#make_bullet_env","title":"make_bullet_env","text":"

        source

        .make_bullet_env(\n   env_id: str = 'AntBulletEnv-v0', num_envs: int = 1, device: str = 'cpu', seed: int = 0,\n   parallel: bool = True\n)\n

        Create PyBullet robotics environments.

        Args

        • env_id (str) : Name of environment.
        • num_envs (int) : Number of environments.
        • device (str) : Device to convert the data.
        • seed (int) : Random seed.
        • parallel (bool) : True for creating asynchronous environments, and False for creating synchronous environments.

        Returns

        The vectorized environments.

        "},{"location":"api_docs/env/dmc/__init__/","title":"make_dmc_env","text":""},{"location":"api_docs/env/dmc/__init__/#make_dmc_env","title":"make_dmc_env","text":"

        source

        .make_dmc_env(\n   env_id: str = 'humanoid_run', num_envs: int = 1, device: str = 'cpu', seed: int = 1,\n   visualize_reward: bool = True, from_pixels: bool = False, height: int = 84,\n   width: int = 84, frame_stack: int = 3, action_repeat: int = 1, asynchronous: bool = True\n)\n

        Create DeepMind Control Suite environments.

        Args

        • env_id (str) : Name of environment.
        • num_envs (int) : Number of environments.
        • device (str) : Device to convert the data.
        • seed (int) : Random seed.
        • visualize_reward (bool) : Opposite to from_pixels.
        • from_pixels (bool) : Provide image-based observations or not.
        • height (int) : Image observation height.
        • width (int) : Image observation width.
        • frame_stack (int) : Number of stacked frames.
        • action_repeat (int) : Number of action repeats.
        • asynchronous (bool) : True for creating asynchronous environments, and False for creating synchronous environments.

        Returns

        The vectorized environments.

        "},{"location":"api_docs/env/minigrid/__init__/","title":"init","text":""},{"location":"api_docs/env/minigrid/__init__/#make_minigrid_env","title":"make_minigrid_env","text":"

        source

        .make_minigrid_env(\n   env_id: str = 'MiniGrid-DoorKey-5x5-v0', num_envs: int = 8,\n   fully_observable: bool = True, fully_numerical: bool = False, seed: int = 0,\n   frame_stack: int = 1, device: str = 'cpu', asynchronous: bool = True\n)\n

        Create MiniGrid environments.

        Args

        • env_id (str) : Name of environment.
        • num_envs (int) : Number of environments.
        • fully_observable (bool) : Fully observable gridworld using a compact grid encoding instead of the agent view.
        • fully_numerical (bool) : Transforms the observation space (that has a textual component) to a fully numerical observation space, where the textual instructions are replaced by arrays representing the indices of each word in a fixed vocabulary.
        • seed (int) : Random seed.
        • frame_stack (int) : Number of stacked frames.
        • device (str) : Device to convert the data.
        • asynchronous (bool) : True for creating asynchronous environments, and False for creating synchronous environments.

        Returns

        The vectorized environments.

        "},{"location":"api_docs/env/procgen/__init__/","title":"make_procgen_env","text":""},{"location":"api_docs/env/procgen/__init__/#make_procgen_env","title":"make_procgen_env","text":"

        source

        .make_procgen_env(\n   env_id: str = 'bigfish', num_envs: int = 64, device: str = 'cpu', seed: int = 1,\n   gamma: float = 0.99, num_levels: int = 200, start_level: int = 0,\n   distribution_mode: str = 'easy'\n)\n

        Create Procgen environments.

        Args

        • env_id (str) : Name of environment.
        • num_envs (int) : Number of environments.
        • device (str) : Device to convert the data.
        • seed (int) : Random seed.
        • gamma (float) : A discount factor.
        • num_levels (int) : The number of unique levels that can be generated. Set to 0 to use unlimited levels.
        • start_level (int) : The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels.
        • distribution_mode (str) : What variant of the levels to use, the options are \"easy\", \"hard\", \"extreme\", \"memory\", \"exploration\".

        Returns

        The vectorized environment.

        "},{"location":"api_docs/env/procgen/__init__/#make_envpool_procgen_env","title":"make_envpool_procgen_env","text":"

        source

        .make_envpool_procgen_env(\n   env_id: str = 'bigfish', num_envs: int = 64, device: str = 'cpu', seed: int = 1,\n   gamma: float = 0.99, num_levels: int = 200, start_level: int = 0,\n   distribution_mode: str = 'easy', asynchronous: bool = True\n)\n

        Create Procgen environments.

        Args

        • env_id (str) : Name of environment.
        • num_envs (int) : Number of environments.
        • device (str) : Device to convert the data.
        • seed (int) : Random seed.
        • gamma (float) : A discount factor.
        • num_levels (int) : The number of unique levels that can be generated. Set to 0 to use unlimited levels.
        • start_level (int) : The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels.
        • distribution_mode (str) : What variant of the levels to use, the options are \"easy\", \"hard\", \"extreme\", \"memory\", \"exploration\".
        • asynchronous (bool) : True for creating asynchronous environments, and False for creating synchronous environments.

        Returns

        The vectorized environments.

        "},{"location":"api_docs/evaluation/comparison/","title":"Comparison","text":""},{"location":"api_docs/evaluation/comparison/#comparison","title":"Comparison","text":"

        source

        Comparison(\n   scores_x: np.ndarray, scores_y: np.ndarray, get_ci: bool = False,\n   method: str = 'percentile', reps: int = 2000, confidence_interval_size: float = 0.95,\n   random_state: Optional[random.RandomState] = None\n)\n

        Compare the performance between algorithms. Based on: https://github.com/google-research/rliable/blob/master/rliable/metrics.py

        Args

        • scores_x (np.ndarray) : A matrix of size (num_runs_x x num_tasks) where scores[n][m] represent the score on run n of task m for algorithm X.
        • scores_y (np.ndarray) : A matrix of size (num_runs_y x num_tasks) where scores[n][m] represent the score on run n of task m for algorithm Y.
        • get_ci (bool) : Compute CIs or not.
        • method (str) : One of basic, percentile, bc (identical to debiased, bias-corrected), or bca.
        • reps (int) : Number of bootstrap replications.
        • confidence_interval_size (float) : Coverage of confidence interval.
        • random_state (int) : If specified, ensures reproducibility in uncertainty estimates.

        Returns

        Comparer instance.

        Methods:

        "},{"location":"api_docs/evaluation/comparison/#compute_poi","title":".compute_poi","text":"

        source

        .compute_poi()\n

        Compute the overall probability of imporvement of algorithm X over Y.

        "},{"location":"api_docs/evaluation/comparison/#get_interval_estimates","title":".get_interval_estimates","text":"

        source

        .get_interval_estimates(\n   scores_x: np.ndarray, scores_y: np.ndarray, metric: Callable\n)\n

        Computes interval estimation of the above performance evaluators.

        Args

        • scores_x (np.ndarray) : A matrix of size (num_runs_x x num_tasks) where scores[n][m] represent the score on run n of task m for algorithm X.
        • scores_y (np.ndarray) : A matrix of size (num_runs_y x num_tasks) where scores[n][m] represent the score on run n of task m for algorithm Y.
        • metric (Callable) : One of the above performance evaluators used for estimation.

        Returns

        Confidence intervals.

        "},{"location":"api_docs/evaluation/performance/","title":"Performance","text":""},{"location":"api_docs/evaluation/performance/#performance","title":"Performance","text":"

        source

        Performance(\n   scores: np.ndarray, get_ci: bool = False, method: str = 'percentile',\n   task_bootstrap: bool = False, reps: int = 50000,\n   confidence_interval_size: float = 0.95,\n   random_state: Optional[random.RandomState] = None\n)\n

        Evaluate the performance of an algorithm. Based on: https://github.com/google-research/rliable/blob/master/rliable/metrics.py

        Args

        • scores (np.ndarray) : A matrix of size (num_runs x num_tasks) where scores[n][m] represent the score on run n of task m.
        • get_ci (bool) : Compute CIs or not.
        • method (str) : One of basic, percentile, bc (identical to debiased, bias-corrected), or bca.
        • task_bootstrap (bool) : Whether to perform bootstrapping over tasks in addition to runs. Defaults to False. See StratifiedBoostrap for more details.
        • reps (int) : Number of bootstrap replications.
        • confidence_interval_size (float) : Coverage of confidence interval.
        • random_state (int) : If specified, ensures reproducibility in uncertainty estimates.

        Returns

        Performance evaluator.

        Methods:

        "},{"location":"api_docs/evaluation/performance/#aggregate_mean","title":".aggregate_mean","text":"

        source

        .aggregate_mean()\n

        Computes mean of sample mean scores per task.

        "},{"location":"api_docs/evaluation/performance/#aggregate_median","title":".aggregate_median","text":"

        source

        .aggregate_median()\n

        Computes median of sample mean scores per task.

        "},{"location":"api_docs/evaluation/performance/#aggregate_og","title":".aggregate_og","text":"

        source

        .aggregate_og(\n   gamma: float = 1.0\n)\n

        Computes optimality gap across all runs and tasks.

        Args

        • gamma (float) : Threshold for optimality gap. All scores above gamma are clipped to gamma.

        Returns

        Optimality gap at threshold gamma.

        "},{"location":"api_docs/evaluation/performance/#aggregate_iqm","title":".aggregate_iqm","text":"

        source

        .aggregate_iqm()\n

        Computes the interquartile mean across runs and tasks.

        "},{"location":"api_docs/evaluation/performance/#get_interval_estimates","title":".get_interval_estimates","text":"

        source

        .get_interval_estimates(\n   scores: np.ndarray, metric: Callable\n)\n

        Computes interval estimation of the above performance evaluators.

        Args

        • scores (np.ndarray) : A matrix of size (num_runs x num_tasks) where scores[n][m] represent the score on run n of task m.
        • metric (Callable) : One of the above performance evaluators used for estimation.

        Returns

        Confidence intervals.

        "},{"location":"api_docs/evaluation/performance/#create_performance_profile","title":".create_performance_profile","text":"

        source

        .create_performance_profile(\n   tau_list: Union[List[float], np.ndarray], use_score_distribution: bool = True\n)\n

        Method for calculating performance profilies.

        Args

        • tau_list (Union[List[float], np.ndarray]) : List of 1D numpy array of threshold values on which the profile is evaluated.
        • use_score_distribution (bool) : Whether to report score distributions or average score distributions.

        Returns

        Point and interval estimates of profiles evaluated at all thresholds in 'tau_list'.

        "},{"location":"api_docs/evaluation/utils/","title":"Utils","text":""},{"location":"api_docs/evaluation/utils/#min_max_normalize","title":"min_max_normalize","text":"

        source

        .min_max_normalize(\n   value: np.ndarray, min_scores: np.ndarray, max_scores: np.ndarray\n)\n

        Perform Max-Min normalization.

        "},{"location":"api_docs/evaluation/visualization/","title":"Visualization","text":""},{"location":"api_docs/evaluation/visualization/#plot_interval_estimates","title":"plot_interval_estimates","text":"

        source

        .plot_interval_estimates(\n   metrics_dict: Dict[str, Dict], metric_names: List[str], algorithms: List[str],\n   colors: Optional[List[str]] = None, color_palette: str = 'colorblind',\n   max_ticks: float = 4, subfigure_width: float = 3.4, row_height: float = 0.37,\n   interval_height: float = 0.6, xlabel_y_coordinate: float = -0.16,\n   xlabel: str = 'NormalizedScore', **kwargs\n)\n

        Plots verious metrics of algorithms with stratified confidence intervals. Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

        Args

        • metrics_dict (Dict[str, Dict]) : The dictionary of various metrics of algorithms.
        • metric_names (List[str]) : Names of the metrics corresponding to metrics_dict.
        • algorithms (List[str]) : List of methods used for plotting.
        • colors (Optional[List[str]]) : Maps each method to a color. If None, then this mapping is created based on color_palette.
        • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
        • max_ticks (float) : Find nice tick locations with no more than max_ticks. Passed to plt.MaxNLocator.
        • subfigure_width (float) : Width of each subfigure.
        • row_height (float) : Height of each row in a subfigure.
        • interval_height (float) : Height of confidence intervals.
        • xlabel_y_coordinate (float) : y-coordinate of the x-axis label.
        • xlabel (str) : Label for the x-axis.
        • kwargs : Arbitrary keyword arguments.

        Returns

        A matplotlib figure and an array of Axes.

        "},{"location":"api_docs/evaluation/visualization/#plot_performance_profile","title":"plot_performance_profile","text":"

        source

        .plot_performance_profile(\n   profile_dict: Dict[str, List], tau_list: np.ndarray,\n   use_non_linear_scaling: bool = False, figsize: Tuple[float, float] = (10.0, 5.0),\n   colors: Optional[List[str]] = None, color_palette: str = 'colorblind',\n   alpha: float = 0.15, xticks: Optional[Iterable] = None,\n   yticks: Optional[Iterable] = None,\n   xlabel: Optional[str] = 'NormalizedScore($\\\\tau$)',\n   ylabel: Optional[str] = 'Fractionofrunswithscore$>\\\\tau$',\n   linestyles: Optional[str] = None, **kwargs\n)\n

        Plots performance profiles with stratified confidence intervals. Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

        Args

        • profile_dict (Dict[str, List]) : A dictionary mapping a method to its performance.
        • tau_list (np.ndarray) : 1D numpy array of threshold values on which the profile is evaluated.
        • use_non_linear_scaling (bool) : Whether to scale the x-axis in proportion to the number of runs within any specified range.
        • figsize (Tuple[float]) : Size of the figure passed to matplotlib.subplots.
        • colors (Optional[List[str]]) : Maps each method to a color. If None, then this mapping is created based on color_palette.
        • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
        • alpha (float) : Changes the transparency of the shaded regions corresponding to the confidence intervals.
        • xticks (Optional[Iterable]) : The list of x-axis tick locations. Passing an empty list removes all xticks.
        • yticks (Optional[Iterable]) : The list of y-axis tick locations between 0 and 1. If None, defaults to [0, 0.25, 0.5, 0.75, 1.0].
        • xlabel (str) : Label for the x-axis.
        • ylabel (str) : Label for the y-axis.
        • linestyles (str) : Maps each method to a linestyle. If None, then the 'solid' linestyle is used for all methods.
        • kwargs : Arbitrary keyword arguments for annotating and decorating the figure. For valid arguments, refer to _annotate_and_decorate_axis.

        Returns

        A matplotlib figure and axes.Axes which contains the plot for performance profiles.

        "},{"location":"api_docs/evaluation/visualization/#plot_probability_improvement","title":"plot_probability_improvement","text":"

        source

        .plot_probability_improvement(\n   poi_dict: Dict[str, List], pair_separator: str = '_', figsize: Tuple[float,\n   float] = (3.7, 2.1), colors: Optional[List[str]] = None,\n   color_palette: str = 'colorblind', alpha: float = 0.75, interval_height: float = 0.6,\n   xticks: Optional[Iterable] = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],\n   xlabel: str = 'P(X>Y)', left_ylabel: str = 'AlgorithmX',\n   right_ylabel: str = 'AlgorithmY', **kwargs\n)\n

        Plots probability of improvement with stratified confidence intervals. Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

        Args

        • poi_dict (Dict[str, List]) : The dictionary of probability of improvements of different algorithms pairs.
        • pair_separator (str) : Each algorithm pair name in dictionaries above is joined by a string separator. For example, if the pairs are specified as 'X;Y', then the separator corresponds to ';'. Defaults to ','.
        • figsize (Tuple[float]) : Size of the figure passed to matplotlib.subplots.
        • colors (Optional[List[str]]) : Maps each method to a color. If None, then this mapping is created based on color_palette.
        • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
        • interval_height (float) : Height of confidence intervals.
        • alpha (float) : Changes the transparency of the shaded regions corresponding to the confidence intervals.
        • xticks (Optional[Iterable]) : The list of x-axis tick locations. Passing an empty list removes all xticks.
        • xlabel (str) : Label for the x-axis.
        • left_ylabel (str) : Label for the left y-axis. Defaults to 'Algorithm X'.
        • right_ylabel (str) : Label for the left y-axis. Defaults to 'Algorithm Y'.
        • kwargs : Arbitrary keyword arguments for annotating and decorating the figure. For valid arguments, refer to _annotate_and_decorate_axis.

        Returns

        A matplotlib figure and axes.Axes which contains the plot for probability of improvement.

        "},{"location":"api_docs/evaluation/visualization/#plot_sample_efficiency_curve","title":"plot_sample_efficiency_curve","text":"

        source

        .plot_sample_efficiency_curve(\n   sampling_dict: Dict[str, Dict], frames: np.ndarray, algorithms: List[str],\n   colors: Optional[List[str]] = None, color_palette: str = 'colorblind',\n   figsize: Tuple[float, float] = (3.7, 2.1),\n   xlabel: Optional[str] = 'NumberofFrames(inmillions)',\n   ylabel: Optional[str] = 'AggregateHumanNormalizedScore',\n   labelsize: str = 'xx-large', ticklabelsize: str = 'xx-large', **kwargs\n)\n

        Plots an aggregate metric with CIs as a function of environment frames. Based on: https://github.com/google-research/rliable/blob/master/rliable/plot_utils.py See https://docs.rllte.dev/tutorials/evaluation/ for usage tutorials.

        Args

        • sampling_dict (Dict[str, Dict]) : A dictionary of values with stratified confidence intervals in different frames.
        • frames (np.ndarray) : Array containing environment frames to mark on the x-axis.
        • algorithms (List[str]) : List of methods used for plotting.
        • colors (Optional[List[str]]) : Maps each method to a color. If None, then this mapping is created based on color_palette.
        • color_palette (str) : seaborn.color_palette object for mapping each method to a color.
        • max_ticks (float) : Find nice tick locations with no more than max_ticks. Passed to plt.MaxNLocator.
        • subfigure_width (float) : Width of each subfigure.
        • row_height (float) : Height of each row in a subfigure.
        • interval_height (float) : Height of confidence intervals.
        • xlabel_y_coordinate (float) : y-coordinate of the x-axis label.
        • xlabel (str) : Label for the x-axis.
        • kwargs : Arbitrary keyword arguments.

        Returns

        A matplotlib figure and an array of Axes.

        "},{"location":"api_docs/hub/atari/","title":"Atari","text":""},{"location":"api_docs/hub/atari/#atari","title":"Atari","text":"

        source

        Scores and learning cures of various RL algorithms on the full Atari benchmark. Environment link: https://github.com/Farama-Foundation/Arcade-Learning-Environment Number of environments: 57 Number of training steps: 10,000,000 Number of seeds: 10 Added algorithms: [PPO]

        Methods:

        "},{"location":"api_docs/hub/atari/#load_scores","title":".load_scores","text":"

        source

        .load_scores(\n   env_id: str, agent: str\n)\n

        Returns final performance.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.

        Returns

        Test scores data array with shape (N_SEEDS, N_POINTS).

        "},{"location":"api_docs/hub/atari/#load_curves","title":".load_curves","text":"

        source

        .load_curves(\n   env_id: str, agent: str\n)\n

        Returns learning curves using a Dict of NumPy arrays.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.

        Returns

        • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
        • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) Learning curves data with structure: curves
        "},{"location":"api_docs/hub/atari/#load_models","title":".load_models","text":"

        source

        .load_models(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the model from the hub.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded model.

        "},{"location":"api_docs/hub/atari/#load_apis","title":".load_apis","text":"

        source

        .load_apis(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the a training API.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded API.

        "},{"location":"api_docs/hub/dmc/","title":"DMControl","text":""},{"location":"api_docs/hub/dmc/#dmcontrol","title":"DMControl","text":"

        source

        Scores and learning cures of various RL algorithms on the full DeepMind Control Suite benchmark.

        Environment link: https://github.com/google-deepmind/dm_control Number of environments: 27 Number of training steps: 10,000,000 for humanoid, 2,000,000 for others Number of seeds: 10 Added algorithms: [SAC, DrQ-v2]

        Methods:

        "},{"location":"api_docs/hub/dmc/#get_obs_type","title":".get_obs_type","text":"

        source

        .get_obs_type(\n   agent: str\n)\n

        Returns the observation type of the agent.

        Args

        • agent (str) : Agent name.

        Returns

        Observation type.

        "},{"location":"api_docs/hub/dmc/#load_scores","title":".load_scores","text":"

        source

        .load_scores(\n   env_id: str, agent: str\n)\n

        Returns final performance.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.

        Returns

        Test scores data array with shape (N_SEEDS, N_POINTS).

        "},{"location":"api_docs/hub/dmc/#load_curves","title":".load_curves","text":"

        source

        .load_curves(\n   env_id: str, agent: str\n)\n

        Returns learning curves using a Dict of NumPy arrays.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.
        • obs_type (str) : A type from ['state', 'pixel'].

        Returns

        • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
        • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) Learning curves data with structure: curves
        "},{"location":"api_docs/hub/dmc/#load_models","title":".load_models","text":"

        source

        .load_models(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the model from the hub.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded model.

        "},{"location":"api_docs/hub/dmc/#load_apis","title":".load_apis","text":"

        source

        .load_apis(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the a training API.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded API.

        "},{"location":"api_docs/hub/minigrid/","title":"MiniGrid","text":""},{"location":"api_docs/hub/minigrid/#minigrid","title":"MiniGrid","text":"

        source

        Scores and learning cures of various RL algorithms on the MiniGrid benchmark. Environment link: https://github.com/Farama-Foundation/Minigrid Number of environments: 16 Number of training steps: 1,000,000 Number of seeds: 10 Added algorithms: [A2C]

        Methods:

        "},{"location":"api_docs/hub/minigrid/#load_scores","title":".load_scores","text":"

        source

        .load_scores(\n   env_id: str, agent: str\n)\n

        Returns final performance.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.

        Returns

        Test scores data array with shape (N_SEEDS, N_POINTS).

        "},{"location":"api_docs/hub/minigrid/#load_curves","title":".load_curves","text":"

        source

        .load_curves(\n   env_id: str, agent: str\n)\n

        Returns learning curves using a Dict of NumPy arrays.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.

        Returns

        • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
        • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) Learning curves data with structure: curves
        "},{"location":"api_docs/hub/minigrid/#load_models","title":".load_models","text":"

        source

        .load_models(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the model from the hub.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded model.

        "},{"location":"api_docs/hub/minigrid/#load_apis","title":".load_apis","text":"

        source

        .load_apis(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the a training API.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded API.

        "},{"location":"api_docs/hub/procgen/","title":"Procgen","text":""},{"location":"api_docs/hub/procgen/#procgen","title":"Procgen","text":"

        source

        Scores and learning cures of various RL algorithms on the full Procgen benchmark. Environment link: https://github.com/openai/procgen Number of environments: 16 Number of training steps: 25,000,000 Number of seeds: 10 Added algorithms: [PPO]

        Methods:

        "},{"location":"api_docs/hub/procgen/#load_scores","title":".load_scores","text":"

        source

        .load_scores(\n   env_id: str, agent: str\n)\n

        Returns final performance.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.

        Returns

        Test scores data array with shape (N_SEEDS, N_POINTS).

        "},{"location":"api_docs/hub/procgen/#load_curves","title":".load_curves","text":"

        source

        .load_curves(\n   env_id: str, agent: str\n)\n

        Returns learning curves using a Dict of NumPy arrays.

        Args

        • env_id (str) : Environment ID.
        • agent_id (str) : Agent name.

        Returns

        • train : np.ndarray(shape=(N_SEEDS, N_POINTS))
        • eval : np.ndarray(shape=(N_SEEDS, N_POINTS)) Learning curves data with structure: curves
        "},{"location":"api_docs/hub/procgen/#load_models","title":".load_models","text":"

        source

        .load_models(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the model from the hub.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded model.

        "},{"location":"api_docs/hub/procgen/#load_apis","title":".load_apis","text":"

        source

        .load_apis(\n   env_id: str, agent: str, seed: int, device: str = 'cpu'\n)\n

        Load the a training API.

        Args

        • env_id (str) : Environment ID.
        • agent (str) : Agent name.
        • seed (int) : The seed to load.
        • device (str) : The device to load the model on.

        Returns

        The loaded API.

        "},{"location":"api_docs/xploit/encoder/espeholt_residual_encoder/","title":"EspeholtResidualEncoder","text":""},{"location":"api_docs/xploit/encoder/espeholt_residual_encoder/#espeholtresidualencoder","title":"EspeholtResidualEncoder","text":"

        source

        EspeholtResidualEncoder(\n   observation_space: gym.Space, feature_dim: int = 0, net_arch: List[int] = [16, 32,\n   32]\n)\n

        ResNet-like encoder for processing image-based observations. Proposed by Espeholt L, Soyer H, Munos R, et al. Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures[C]//International conference on machine learning. PMLR, 2018: 1407-1416. Target task: Atari games and Procgen games.

        Args

        • observation_space (gym.Space) : Observation space.
        • feature_dim (int) : Number of features extracted.
        • net_arch (List) : Architecture of the network. It represents the out channels of each residual layer. The length of this list is the number of residual layers.

        Returns

        ResNet-like encoder instance.

        Methods:

        "},{"location":"api_docs/xploit/encoder/espeholt_residual_encoder/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor\n)\n

        Forward method implementation.

        Args

        • obs (th.Tensor) : Observation tensor.

        Returns

        Encoded observation tensor.

        "},{"location":"api_docs/xploit/encoder/identity_encoder/","title":"IdentityEncoder","text":""},{"location":"api_docs/xploit/encoder/identity_encoder/#identityencoder","title":"IdentityEncoder","text":"

        source

        IdentityEncoder(\n   observation_space: gym.Space, feature_dim: int = 64\n)\n

        Identity encoder for state-based observations.

        Args

        • observation_space (gym.Space) : Observation space.
        • feature_dim (int) : Number of features extracted.

        Returns

        Identity encoder instance.

        Methods:

        "},{"location":"api_docs/xploit/encoder/identity_encoder/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor\n)\n

        Forward method implementation.

        Args

        • obs (th.Tensor) : Observation tensor.

        Returns

        Encoded observation tensor.

        "},{"location":"api_docs/xploit/encoder/mnih_cnn_encoder/","title":"MnihCnnEncoder","text":""},{"location":"api_docs/xploit/encoder/mnih_cnn_encoder/#mnihcnnencoder","title":"MnihCnnEncoder","text":"

        source

        MnihCnnEncoder(\n   observation_space: gym.Space, feature_dim: int = 0\n)\n

        Convolutional neural network (CNN)-based encoder for processing image-based observations. Proposed by Mnih V, Kavukcuoglu K, Silver D, et al. Playing atari with deep reinforcement learning[J]. arXiv preprint arXiv:1312.5602, 2013. Target task: Atari games.

        Args

        • observation_space (gym.Space) : Observation space.
        • feature_dim (int) : Number of features extracted.

        Returns

        CNN-based encoder instance.

        Methods:

        "},{"location":"api_docs/xploit/encoder/mnih_cnn_encoder/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor\n)\n

        Forward method implementation.

        Args

        • obs (th.Tensor) : Observation tensor.

        Returns

        Encoded observation tensor.

        "},{"location":"api_docs/xploit/encoder/pathak_cnn_encoder/","title":"PathakCnnEncoder","text":""},{"location":"api_docs/xploit/encoder/pathak_cnn_encoder/#pathakcnnencoder","title":"PathakCnnEncoder","text":"

        source

        PathakCnnEncoder(\n   observation_space: gym.Space, feature_dim: int = 0\n)\n

        Convolutional neural network (CNN)-based encoder for processing image-based observations. Proposed by Pathak D, Agrawal P, Efros A A, et al. Curiosity-driven exploration by self-supervised prediction[C]// International conference on machine learning. PMLR, 2017: 2778-2787. Target task: Atari and MiniGrid games.

        Args

        • observation_space (gym.Space) : Observation space.
        • feature_dim (int) : Number of features extracted.

        Returns

        CNN-based encoder instance.

        Methods:

        "},{"location":"api_docs/xploit/encoder/pathak_cnn_encoder/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor\n)\n

        Forward method implementation.

        Args

        • obs (th.Tensor) : Observation tensor.

        Returns

        Encoded observation tensor.

        "},{"location":"api_docs/xploit/encoder/raffin_combined_encoder/","title":"RaffinCombinedEncoder","text":""},{"location":"api_docs/xploit/encoder/raffin_combined_encoder/#raffincombinedencoder","title":"RaffinCombinedEncoder","text":"

        source

        RaffinCombinedEncoder(\n   observation_space: gym.Space, feature_dim: int = 256, cnn_output_dim: int = 256\n)\n

        Combined features extractor for Dict observation spaces. Based on: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/torch_layers.py#L231

        Args

        • observation_space (gym.Space) : Observation space.
        • feature_dim (int) : Number of features extracted.
        • cnn_output_dim (int) : Number of features extracted by the CNN.

        Returns

        Identity encoder instance.

        Methods:

        "},{"location":"api_docs/xploit/encoder/raffin_combined_encoder/#forward","title":".forward","text":"

        source

        .forward(\n   obs: Dict[str, th.Tensor]\n)\n

        Forward method implementation.

        Args

        • obs (Dict[str, th.Tensor]) : Observation tensor.

        Returns

        Encoded observation tensor.

        "},{"location":"api_docs/xploit/encoder/tassa_cnn_encoder/","title":"TassaCnnEncoder","text":""},{"location":"api_docs/xploit/encoder/tassa_cnn_encoder/#tassacnnencoder","title":"TassaCnnEncoder","text":"

        source

        TassaCnnEncoder(\n   observation_space: gym.Space, feature_dim: int = 50\n)\n

        Convolutional neural network (CNN)-based encoder for processing image-based observations. Proposed by Tassa Y, Doron Y, Muldal A, et al. Deepmind control suite[J]. arXiv preprint arXiv:1801.00690, 2018. Target task: DeepMind Control Suite.

        Args

        • observation_space (gym.Space) : Observation space.
        • feature_dim (int) : Number of features extracted by the encoder.

        Returns

        CNN-based encoder instance.

        Methods:

        "},{"location":"api_docs/xploit/encoder/tassa_cnn_encoder/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor\n)\n

        Forward method implementation.

        Args

        • obs (th.Tensor) : Observation tensor.

        Returns

        Encoded observation tensor.

        "},{"location":"api_docs/xploit/encoder/vanilla_mlp_encoder/","title":"VanillaMlpEncoder","text":""},{"location":"api_docs/xploit/encoder/vanilla_mlp_encoder/#vanillamlpencoder","title":"VanillaMlpEncoder","text":"

        source

        VanillaMlpEncoder(\n   observation_space: gym.Space, feature_dim: int = 64, hidden_dim: int = 64\n)\n

        Multi layer perceptron (MLP) for processing state-based inputs.

        Args

        • observation_space (gym.Space) : Observation space.
        • feature_dim (int) : Number of features extracted.
        • hidden_dim (int) : Number of hidden units in the hidden layer.

        Returns

        Mlp-based encoder instance.

        Methods:

        "},{"location":"api_docs/xploit/encoder/vanilla_mlp_encoder/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor\n)\n

        Forward method implementation.

        Args

        • obs (th.Tensor) : Observation tensor.

        Returns

        Encoded observation tensor.

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/","title":"DistributedActorLearner","text":""},{"location":"api_docs/xploit/policy/distributed_actor_learner/#distributedactorlearner","title":"DistributedActorLearner","text":"

        source

        DistributedActorLearner(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,\n   hidden_dim: int = 512, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal',\n   use_lstm: bool = False\n)\n

        Actor-Learner network for IMPALA.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • feature_dim (int) : Number of features accepted.
        • hidden_dim (int) : Number of units per hidden layer.
        • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
        • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
        • init_fn (str) : Parameters initialization method.
        • use_lstm (bool) : Whether to use LSTM module.

        Returns

        Actor-Critic network.

        Methods:

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/#describe","title":".describe","text":"

        source

        .describe()\n

        Describe the policy.

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/#explore","title":".explore","text":"

        source

        .explore(\n   obs: th.Tensor\n)\n

        Explore the environment and randomly generate actions.

        Args

        • obs (th.Tensor) : Observation from the environment.

        Returns

        Sampled actions.

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/#freeze","title":".freeze","text":"

        source

        .freeze(\n   encoder: nn.Module, dist: Distribution\n)\n

        Freeze all the elements like encoder and dist.

        Args

        • encoder (nn.Module) : Encoder network.
        • dist (Distribution) : Distribution.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/#forward","title":".forward","text":"

        source

        .forward(\n   *args\n)\n

        Only for inference.

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/#to","title":".to","text":"

        source

        .to(\n   device: th.device\n)\n

        Only move the learner to device, and keep actor in CPU.

        Args

        • device (th.device) : Device to use.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/#save","title":".save","text":"

        source

        .save(\n   path: Path, pretraining: bool, global_step: int\n)\n

        Save models.

        Args

        • path (Path) : Save path.
        • pretraining (bool) : Pre-training mode.
        • global_step (int) : Global training step.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/distributed_actor_learner/#load","title":".load","text":"

        source

        .load(\n   path: str, device: th.device\n)\n

        Load initial parameters.

        Args

        • path (str) : Import path.
        • device (th.device) : Device to use.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_det_actor_double_critic/","title":"OffPolicyDetActorDoubleCritic","text":""},{"location":"api_docs/xploit/policy/off_policy_det_actor_double_critic/#offpolicydetactordoublecritic","title":"OffPolicyDetActorDoubleCritic","text":"

        source

        OffPolicyDetActorDoubleCritic(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,\n   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'\n)\n

        Deterministic actor network and double critic network for off-policy algortithms like DrQv2, DDPG. Here the 'self.dist' refers to an action noise.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • feature_dim (int) : Number of features accepted.
        • hidden_dim (int) : Number of units per hidden layer.
        • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
        • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
        • init_fn (str) : Parameters initialization method.

        Returns

        Actor-Critic network.

        Methods:

        "},{"location":"api_docs/xploit/policy/off_policy_det_actor_double_critic/#describe","title":".describe","text":"

        source

        .describe()\n

        Describe the policy.

        "},{"location":"api_docs/xploit/policy/off_policy_det_actor_double_critic/#freeze","title":".freeze","text":"

        source

        .freeze(\n   encoder: nn.Module, dist: Distribution\n)\n

        Freeze all the elements like encoder and dist.

        Args

        • encoder (nn.Module) : Encoder network.
        • dist (Distribution) : Distribution.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_det_actor_double_critic/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor, training: bool = True\n)\n

        Sample actions based on observations.

        Args

        • obs (th.Tensor) : Observations.
        • training (bool) : Training mode, True or False.

        Returns

        Sampled actions.

        "},{"location":"api_docs/xploit/policy/off_policy_det_actor_double_critic/#get_dist","title":".get_dist","text":"

        source

        .get_dist(\n   obs: th.Tensor\n)\n

        Get sample distribution.

        Args

        • obs (th.Tensor) : Observations.

        Returns

        RLLTE distribution.

        "},{"location":"api_docs/xploit/policy/off_policy_det_actor_double_critic/#save","title":".save","text":"

        source

        .save(\n   path: Path, pretraining: bool = False\n)\n

        Save models.

        Args

        • path (Path) : Save path.
        • pretraining (bool) : Pre-training mode.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_double_actor_double_critic/","title":"OffPolicyDoubleActorDoubleCritic","text":""},{"location":"api_docs/xploit/policy/off_policy_double_actor_double_critic/#offpolicydoubleactordoublecritic","title":"OffPolicyDoubleActorDoubleCritic","text":"

        source

        OffPolicyDoubleActorDoubleCritic(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,\n   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'\n)\n

        Double deterministic actor network and double critic network for off-policy algortithms like DDPG, TD3. Here the 'self.dist' refers to an action noise.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • feature_dim (int) : Number of features accepted.
        • hidden_dim (int) : Number of units per hidden layer.
        • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
        • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
        • init_fn (str) : Parameters initialization method.

        Returns

        Actor-Critic network.

        Methods:

        "},{"location":"api_docs/xploit/policy/off_policy_double_actor_double_critic/#describe","title":".describe","text":"

        source

        .describe()\n

        Describe the policy.

        "},{"location":"api_docs/xploit/policy/off_policy_double_actor_double_critic/#freeze","title":".freeze","text":"

        source

        .freeze(\n   encoder: nn.Module, dist: Distribution\n)\n

        Freeze all the elements like encoder and dist.

        Args

        • encoder (nn.Module) : Encoder network.
        • dist (Distribution) : Distribution.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_double_actor_double_critic/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor, training: bool = True\n)\n

        Sample actions based on observations.

        Args

        • obs (th.Tensor) : Observations.
        • training (bool) : Training mode, True or False.

        Returns

        Sampled actions.

        "},{"location":"api_docs/xploit/policy/off_policy_double_actor_double_critic/#get_dist","title":".get_dist","text":"

        source

        .get_dist(\n   obs: th.Tensor\n)\n

        Get sample distribution.

        Args

        • obs (th.Tensor) : Observations.

        Returns

        RLLTE distribution.

        "},{"location":"api_docs/xploit/policy/off_policy_double_actor_double_critic/#save","title":".save","text":"

        source

        .save(\n   path: Path, pretraining: bool, global_step: int\n)\n

        Save models.

        Args

        • path (Path) : Save path.
        • pretraining (bool) : Pre-training mode.
        • global_step (int) : Global training step.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_double_qnetwork/","title":"OffPolicyDoubleQNetwork","text":""},{"location":"api_docs/xploit/policy/off_policy_double_qnetwork/#offpolicydoubleqnetwork","title":"OffPolicyDoubleQNetwork","text":"

        source

        OffPolicyDoubleQNetwork(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,\n   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'\n)\n

        Q-network for off-policy algortithms like DQN.

        Structure: self.encoder (shared by actor and critic), self.qnet, self.qnet_target Optimizers: self.opt -> (self.qnet, self.qnet_target)

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • feature_dim (int) : Number of features accepted.
        • hidden_dim (int) : Number of units per hidden layer.
        • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
        • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
        • init_fn (str) : Parameters initialization method.

        Returns

        Actor network instance.

        Methods:

        "},{"location":"api_docs/xploit/policy/off_policy_double_qnetwork/#describe","title":".describe","text":"

        source

        .describe()\n

        Describe the policy.

        "},{"location":"api_docs/xploit/policy/off_policy_double_qnetwork/#freeze","title":".freeze","text":"

        source

        .freeze(\n   encoder: nn.Module, dist: Distribution\n)\n

        Freeze all the elements like encoder and dist.

        Args

        • encoder (nn.Module) : Encoder network.
        • dist (Distribution) : Distribution class.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_double_qnetwork/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor, training: bool = True\n)\n

        Sample actions based on observations.

        Args

        • obs (th.Tensor) : Observations.
        • training (bool) : Training mode, True or False.

        Returns

        Sampled actions.

        "},{"location":"api_docs/xploit/policy/off_policy_double_qnetwork/#save","title":".save","text":"

        source

        .save(\n   path: Path, pretraining: bool, global_step: int\n)\n

        Save models.

        Args

        • path (Path) : Save path.
        • pretraining (bool) : Pre-training mode.
        • global_step (int) : Global training step.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_stoch_actor_double_critic/","title":"OffPolicyStochActorDoubleCritic","text":""},{"location":"api_docs/xploit/policy/off_policy_stoch_actor_double_critic/#offpolicystochactordoublecritic","title":"OffPolicyStochActorDoubleCritic","text":"

        source

        OffPolicyStochActorDoubleCritic(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int = 64,\n   hidden_dim: int = 1024, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, log_std_range: Tuple = (-5, 2),\n   init_fn: str = 'orthogonal'\n)\n

        Stochastic actor network and double critic network for off-policy algortithms like SAC. Here the 'self.dist' refers to an sampling distribution instance.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • feature_dim (int) : Number of features accepted.
        • hidden_dim (int) : Number of units per hidden layer.
        • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
        • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
        • log_std_range (Tuple) : Range of log standard deviation.
        • init_fn (str) : Parameters initialization method.

        Returns

        Actor-Critic network.

        Methods:

        "},{"location":"api_docs/xploit/policy/off_policy_stoch_actor_double_critic/#describe","title":".describe","text":"

        source

        .describe()\n

        Describe the policy.

        "},{"location":"api_docs/xploit/policy/off_policy_stoch_actor_double_critic/#freeze","title":".freeze","text":"

        source

        .freeze(\n   encoder: nn.Module, dist: Distribution\n)\n

        Freeze all the elements like encoder and dist.

        Args

        • encoder (nn.Module) : Encoder network.
        • dist (Distribution) : Distribution class.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/off_policy_stoch_actor_double_critic/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor, training: bool = True\n)\n

        Sample actions based on observations.

        Args

        • obs (th.Tensor) : Observations.
        • training (bool) : Training mode, True or False.

        Returns

        Sampled actions.

        "},{"location":"api_docs/xploit/policy/off_policy_stoch_actor_double_critic/#get_dist","title":".get_dist","text":"

        source

        .get_dist(\n   obs: th.Tensor\n)\n

        Get sample distribution.

        Args

        • obs (th.Tensor) : Observations.
        • step (int) : Global training step.

        Returns

        Action distribution.

        "},{"location":"api_docs/xploit/policy/off_policy_stoch_actor_double_critic/#save","title":".save","text":"

        source

        .save(\n   path: Path, pretraining: bool, global_step: int\n)\n

        Save models.

        Args

        • path (Path) : Save path.
        • pretraining (bool) : Pre-training mode.
        • global_step (int) : Global training step.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/","title":"OnPolicyDecoupledActorCritic","text":""},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/#onpolicydecoupledactorcritic","title":"OnPolicyDecoupledActorCritic","text":"

        source

        OnPolicyDecoupledActorCritic(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,\n   hidden_dim: int = 512, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, init_fn: str = 'orthogonal'\n)\n

        Actor-Critic network for on-policy algorithms like DAAC.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • feature_dim (int) : Number of features accepted.
        • hidden_dim (int) : Number of units per hidden layer.
        • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
        • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
        • init_fn (str) : Parameters initialization method.

        Returns

        Actor-Critic network instance.

        Methods:

        "},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/#describe","title":".describe","text":"

        source

        .describe()\n

        Describe the policy.

        "},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/#freeze","title":".freeze","text":"

        source

        .freeze(\n   encoder: nn.Module, dist: Distribution\n)\n

        Freeze all the elements like encoder and dist.

        Args

        • encoder (nn.Module) : Encoder network.
        • dist (Distribution) : Distribution class.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor, training: bool = True\n)\n

        Get actions and estimated values for observations.

        Args

        • obs (th.Tensor) : Observations.
        • training (bool) : training mode, True or False.

        Returns

        Sampled actions, estimated values, and log of probabilities for observations when training is True, else only deterministic actions.

        "},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/#get_value","title":".get_value","text":"

        source

        .get_value(\n   obs: th.Tensor\n)\n

        Get estimated values for observations.

        Args

        • obs (th.Tensor) : Observations.

        Returns

        Estimated values.

        "},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/#evaluate_actions","title":".evaluate_actions","text":"

        source

        .evaluate_actions(\n   obs: th.Tensor, actions: th.Tensor\n)\n

        Evaluate actions according to the current policy given the observations.

        Args

        • obs (th.Tensor) : Sampled observations.
        • actions (th.Tensor) : Sampled actions.

        Returns

        Estimated values, log of the probability evaluated at actions, entropy of distribution.

        "},{"location":"api_docs/xploit/policy/on_policy_decoupled_actor_critic/#save","title":".save","text":"

        source

        .save(\n   path: Path, pretraining: bool, global_step: int\n)\n

        Save models.

        Args

        • path (Path) : Save path.
        • pretraining (bool) : Pre-training mode.
        • global_step (int) : Global training step.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/","title":"OnPolicySharedActorCritic","text":""},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#onpolicysharedactorcritic","title":"OnPolicySharedActorCritic","text":"

        source

        OnPolicySharedActorCritic(\n   observation_space: gym.Space, action_space: gym.Space, feature_dim: int,\n   hidden_dim: int = 512, opt_class: Type[th.optim.Optimizer] = th.optim.Adam,\n   opt_kwargs: Optional[Dict[str, Any]] = None, aux_critic: bool = False,\n   init_fn: str = 'orthogonal'\n)\n

        Actor-Critic network for on-policy algorithms like PPO and A2C.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • feature_dim (int) : Number of features accepted.
        • hidden_dim (int) : Number of units per hidden layer.
        • opt_class (Type[th.optim.Optimizer]) : Optimizer class.
        • opt_kwargs (Dict[str, Any]) : Optimizer keyword arguments.
        • aux_critic (bool) : Use auxiliary critic or not, for PPG agent.
        • init_fn (str) : Parameters initialization method.

        Returns

        Actor-Critic network instance.

        Methods:

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#describe","title":".describe","text":"

        source

        .describe()\n

        Describe the policy.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#freeze","title":".freeze","text":"

        source

        .freeze(\n   encoder: nn.Module, dist: Distribution\n)\n

        Freeze all the elements like encoder and dist.

        Args

        • encoder (nn.Module) : Encoder network.
        • dist (Distribution) : Distribution class.

        Returns

        None.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#forward","title":".forward","text":"

        source

        .forward(\n   obs: th.Tensor, training: bool = True\n)\n

        Get actions and estimated values for observations.

        Args

        • obs (th.Tensor) : Observations.
        • training (bool) : training mode, True or False.

        Returns

        Sampled actions, estimated values, and log of probabilities for observations when training is True, else only deterministic actions.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#get_value","title":".get_value","text":"

        source

        .get_value(\n   obs: th.Tensor\n)\n

        Get estimated values for observations.

        Args

        • obs (th.Tensor) : Observations.

        Returns

        Estimated values.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#evaluate_actions","title":".evaluate_actions","text":"

        source

        .evaluate_actions(\n   obs: th.Tensor, actions: th.Tensor\n)\n

        Evaluate actions according to the current policy given the observations.

        Args

        • obs (th.Tensor) : Sampled observations.
        • actions (th.Tensor) : Sampled actions.

        Returns

        Estimated values, log of the probability evaluated at actions, entropy of distribution.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#get_policy_outputs","title":".get_policy_outputs","text":"

        source

        .get_policy_outputs(\n   obs: th.Tensor\n)\n

        Get policy outputs for training.

        Args

        • obs (Tensor) : Observations.

        Returns

        Policy outputs like unnormalized probabilities for Discrete tasks.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#get_dist_and_aux_value","title":".get_dist_and_aux_value","text":"

        source

        .get_dist_and_aux_value(\n   obs: th.Tensor\n)\n

        Get probs and auxiliary estimated values for auxiliary phase update.

        Args

        • obs : Sampled observations.

        Returns

        Sample distribution, estimated values, auxiliary estimated values.

        "},{"location":"api_docs/xploit/policy/on_policy_shared_actor_critic/#save","title":".save","text":"

        source

        .save(\n   path: Path, pretraining: bool, global_step: int\n)\n

        Save models.

        Args

        • path (Path) : Save path.
        • pretraining (bool) : Pre-training mode.
        • global_step (int) : Global training step.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/dict_replay_storage/","title":"DictReplayStorage","text":""},{"location":"api_docs/xploit/storage/dict_replay_storage/#dictreplaystorage","title":"DictReplayStorage","text":"

        source

        DictReplayStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 1000000, batch_size: int = 1024, num_envs: int = 1\n)\n

        Dict replay storage for off-policy algorithms and dictionary observations.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • device (str) : Device to convert the data.
        • storage_size (int) : The capacity of the storage.
        • batch_size (int) : Batch size of samples.
        • num_envs (int) : The number of parallel environments.

        Returns

        Dict replay storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/dict_replay_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/xploit/storage/dict_replay_storage/#add","title":".add","text":"

        source

        .add(\n   observations: Dict[str, th.Tensor], actions: th.Tensor, rewards: th.Tensor,\n   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],\n   next_observations: Dict[str, th.Tensor]\n)\n

        Add sampled transitions into storage.

        Args

        • observations (Dict[str, th.Tensor]) : Observations.
        • actions (th.Tensor) : Actions.
        • rewards (th.Tensor) : Rewards.
        • terminateds (th.Tensor) : Termination flag.
        • truncateds (th.Tensor) : Truncation flag.
        • infos (Dict[str, Any]) : Additional information.
        • next_observations (Dict[str, th.Tensor]) : Next observations.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/dict_replay_storage/#sample","title":".sample","text":"

        source

        .sample()\n

        Sample from the storage.

        "},{"location":"api_docs/xploit/storage/dict_replay_storage/#update","title":".update","text":"

        source

        .update(\n   *args, **kwargs\n)\n

        Update the storage if necessary.

        "},{"location":"api_docs/xploit/storage/dict_rollout_storage/","title":"DictRolloutStorage","text":""},{"location":"api_docs/xploit/storage/dict_rollout_storage/#dictrolloutstorage","title":"DictRolloutStorage","text":"

        source

        DictRolloutStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 256, batch_size: int = 64, num_envs: int = 8,\n   discount: float = 0.999, gae_lambda: float = 0.95\n)\n

        Dict Rollout storage for on-policy algorithms and dictionary observations.

        Args

        • observation_space (gym.Space) : The observation space of environment.
        • action_space (gym.Space) : The action space of environment.
        • device (str) : Device to convert the data.
        • storage_size (int) : The capacity of the storage. Here it refers to the length of per rollout.
        • batch_size (int) : Batch size of samples.
        • num_envs (int) : The number of parallel environments.
        • discount (float) : The discount factor.
        • gae_lambda (float) : Weighting coefficient for generalized advantage estimation (GAE).

        Returns

        Dict rollout storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/dict_rollout_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/xploit/storage/dict_rollout_storage/#add","title":".add","text":"

        source

        .add(\n   observations: Dict[str, th.Tensor], actions: th.Tensor, rewards: th.Tensor,\n   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict,\n   next_observations: Dict[str, th.Tensor], log_probs: th.Tensor,\n   values: th.Tensor\n)\n

        Add sampled transitions into storage.

        Args

        • observations (Dict[str, th.Tensor]) : Observations.
        • actions (th.Tensor) : Actions.
        • rewards (th.Tensor) : Rewards.
        • terminateds (th.Tensor) : Termination signals.
        • truncateds (th.Tensor) : Truncation signals.
        • infos (Dict) : Extra information.
        • next_observations (Dict[str, th.Tensor]) : Next observations.
        • log_probs (th.Tensor) : Log of the probability evaluated at actions.
        • values (th.Tensor) : Estimated values.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/dict_rollout_storage/#sample","title":".sample","text":"

        source

        .sample()\n

        Sample data from storage.

        "},{"location":"api_docs/xploit/storage/her_replay_storage/","title":"HerReplayStorage","text":""},{"location":"api_docs/xploit/storage/her_replay_storage/#herreplaystorage","title":"HerReplayStorage","text":"

        source

        HerReplayStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 1000000, num_envs: int = 1, batch_size: int = 1024,\n   goal_selection_strategy: str = 'future', num_goals: int = 4,\n   reward_fn: Callable = lambdax: x, copy_info_dict: bool = False\n)\n

        Hindsight experience replay (HER) storage for off-policy algorithms. Based on: https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/her/her_replay_buffer.py

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • device (str) : Device to convert the data.
        • storage_size (int) : The capacity of the storage.
        • num_envs (int) : The number of parallel environments.
        • batch_size (int) : Batch size of samples.
        • goal_selection_strategy (str) : A goal selection strategy of [\"future\", \"final\", \"episode\"].
        • num_goals (int) : The number of goals to sample.
        • reward_fn (Callable) : Function to compute new rewards based on state and goal, whose definition is same as https://github.com/DLR-RM/stable-baselines3/blob/master/stable_baselines3/common/envs/bit_flipping_env.py#L190 copy_info_dict (bool) whether to copy the info dictionary and pass it to compute_reward() method.

        Returns

        Dict replay storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/her_replay_storage/#add","title":".add","text":"

        source

        .add(\n   observations: Dict[str, th.Tensor], actions: th.Tensor, rewards: th.Tensor,\n   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],\n   next_observations: Dict[str, th.Tensor]\n)\n

        Add sampled transitions into storage.

        Args

        • observations (Dict[str, th.Tensor]) : Observations.
        • actions (th.Tensor) : Actions.
        • rewards (th.Tensor) : Rewards.
        • terminateds (th.Tensor) : Termination flag.
        • truncateds (th.Tensor) : Truncation flag.
        • infos (Dict[str, Any]) : Additional information.
        • next_observations (Dict[str, th.Tensor]) : Next observations.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/her_replay_storage/#sample","title":".sample","text":"

        source

        .sample()\n

        Sample from the storage.

        "},{"location":"api_docs/xploit/storage/her_replay_storage/#update","title":".update","text":"

        source

        .update(\n   *args, **kwargs\n)\n

        Update the storage if necessary.

        "},{"location":"api_docs/xploit/storage/nstep_replay_storage/","title":"NStepReplayStorage","text":""},{"location":"api_docs/xploit/storage/nstep_replay_storage/#nstepreplaystorage","title":"NStepReplayStorage","text":"

        source

        NStepReplayStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 1000000, num_envs: int = 1, batch_size: int = 256,\n   num_workers: int = 4, pin_memory: bool = True, n_step: int = 3, discount: float = 0.99,\n   fetch_every: int = 1000, save_snapshot: bool = False\n)\n

        N-step replay storage. Implemented based on: https://github.com/facebookresearch/drqv2/blob/main/replay_buffer.py

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • device (str) : Device to convert replay data.
        • storage_size (int) : Max number of element in the storage.
        • num_envs (int) : The number of parallel environments.
        • batch_size (int) : Batch size of samples.
        • num_workers (int) : Subprocesses to use for data loading.
        • pin_memory (bool) : Pin memory or not.
        • nstep (int) : The number of transitions to consider when computing n-step returns
        • discount (float) : The discount factor for future rewards.
        • fetch_every (int) : Loading interval.
        • save_snapshot (bool) : Save loaded file or not.

        Returns

        N-step replay storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/nstep_replay_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/xploit/storage/nstep_replay_storage/#add","title":".add","text":"

        source

        .add(\n   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,\n   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],\n   next_observations: th.Tensor\n)\n

        Add sampled transitions into storage.

        Args

        • observations (th.Tensor) : Observations.
        • actions (th.Tensor) : Actions.
        • rewards (th.Tensor) : Rewards.
        • terminateds (th.Tensor) : Termination flag.
        • truncateds (th.Tensor) : Truncation flag.
        • infos (Dict[str, Any]) : Additional information.
        • next_observations (th.Tensor) : Next observations.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/nstep_replay_storage/#replay_iter","title":".replay_iter","text":"

        source

        .replay_iter()\n

        Create iterable dataloader.

        "},{"location":"api_docs/xploit/storage/nstep_replay_storage/#sample","title":".sample","text":"

        source

        .sample()\n

        Sample from the storage.

        "},{"location":"api_docs/xploit/storage/nstep_replay_storage/#update","title":".update","text":"

        source

        .update(\n   *args\n)\n

        Update the storage if necessary.

        "},{"location":"api_docs/xploit/storage/prioritized_replay_storage/","title":"PrioritizedReplayStorage","text":""},{"location":"api_docs/xploit/storage/prioritized_replay_storage/#prioritizedreplaystorage","title":"PrioritizedReplayStorage","text":"

        source

        PrioritizedReplayStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 1000000, batch_size: int = 1024, num_envs: int = 1,\n   alpha: float = 0.6, beta: float = 0.4\n)\n

        Prioritized replay storage with proportional prioritization for off-policy algorithms. Since the storage updates the priorities of the samples based on the TD error, users should include the indices and weights in the returned information of the .update method of the agent. An example is: return {\"indices\": indices, \"weights\": weights, ..., \"Actor Loss\": actor_loss, ...}

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • device (str) : Device to convert the data.
        • storage_size (int) : The capacity of the storage.
        • num_envs (int) : The number of parallel environments.
        • batch_size (int) : Batch size of samples.
        • alpha (float) : Prioritization value.
        • beta (float) : Importance sampling value.

        Returns

        Prioritized replay storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/prioritized_replay_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/xploit/storage/prioritized_replay_storage/#annealing_beta","title":".annealing_beta","text":"

        source

        .annealing_beta()\n

        Linearly increases beta from the initial value to 1 over global training steps.

        "},{"location":"api_docs/xploit/storage/prioritized_replay_storage/#add","title":".add","text":"

        source

        .add(\n   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,\n   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],\n   next_observations: th.Tensor\n)\n

        Add sampled transitions into storage.

        Args

        • observations (th.Tensor) : Observations.
        • actions (th.Tensor) : Actions.
        • rewards (th.Tensor) : Rewards.
        • terminateds (th.Tensor) : Termination flag.
        • truncateds (th.Tensor) : Truncation flag.
        • infos (Dict[str, Any]) : Additional information.
        • next_observations (th.Tensor) : Next observations.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/prioritized_replay_storage/#sample","title":".sample","text":"

        source

        .sample()\n

        Sample from the storage.

        "},{"location":"api_docs/xploit/storage/prioritized_replay_storage/#update","title":".update","text":"

        source

        .update(\n   metrics: Dict\n)\n

        Update the priorities.

        Args

        • metrics (Dict) : Training metrics from agent to udpate the priorities: indices (np.ndarray): The indices of current batch data. priorities (np.ndarray): The priorities of current batch data.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/vanilla_distributed_storage/","title":"VanillaDistributedStorage","text":""},{"location":"api_docs/xploit/storage/vanilla_distributed_storage/#vanilladistributedstorage","title":"VanillaDistributedStorage","text":"

        source

        VanillaDistributedStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 100, num_storages: int = 80, num_envs: int = 45,\n   batch_size: int = 32\n)\n

        Vanilla distributed storage for distributed algorithms like IMPALA.

        Args

        • observation_space (gym.Space) : The observation space of environment.
        • action_space (gym.Space) : The action space of environment.
        • device (str) : Device (cpu, cuda, ...) on which the code should be run.
        • storage_size (int) : The capacity of the storage. Here it refers to the length of per rollout.
        • num_storages (int) : The number of shared-memory storages.
        • num_envs (int) : The number of parallel environments.
        • batch_size (int) : The batch size.

        Returns

        Vanilla distributed storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/vanilla_distributed_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/xploit/storage/vanilla_distributed_storage/#add","title":".add","text":"

        source

        .add(\n   idx: int, timestep: int, actor_output: Dict[str, Any], env_output: Dict[str,\n   Any]\n)\n

        Add sampled transitions into storage.

        Args

        • idx (int) : The index of storage.
        • timestep (int) : The timestep of rollout.
        • actor_output (Dict) : Actor output.
        • env_output (Dict) : Environment output.

        Returns

        None

        "},{"location":"api_docs/xploit/storage/vanilla_distributed_storage/#sample","title":".sample","text":"

        source

        .sample(\n   free_queue: mp.SimpleQueue, full_queue: mp.SimpleQueue, lock = threading.Lock()\n)\n

        Sample transitions from the storage.

        Args

        • free_queue (Queue) : Free queue for communication.
        • full_queue (Queue) : Full queue for communication.
        • lock (Lock) : Thread lock.

        Returns

        Batched samples.

        "},{"location":"api_docs/xploit/storage/vanilla_distributed_storage/#update","title":".update","text":"

        source

        .update(\n   *args, **kwargs\n)\n

        Update the storage

        "},{"location":"api_docs/xploit/storage/vanilla_replay_storage/","title":"VanillaReplayStorage","text":""},{"location":"api_docs/xploit/storage/vanilla_replay_storage/#vanillareplaystorage","title":"VanillaReplayStorage","text":"

        source

        VanillaReplayStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 1000000, batch_size: int = 1024, num_envs: int = 1\n)\n

        Vanilla replay storage for off-policy algorithms.

        Args

        • observation_space (gym.Space) : Observation space.
        • action_space (gym.Space) : Action space.
        • device (str) : Device to convert the data.
        • storage_size (int) : The capacity of the storage.
        • num_envs (int) : The number of parallel environments.
        • batch_size (int) : Batch size of samples.

        Returns

        Vanilla replay storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/vanilla_replay_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/xploit/storage/vanilla_replay_storage/#add","title":".add","text":"

        source

        .add(\n   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,\n   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict[str, Any],\n   next_observations: th.Tensor\n)\n

        Add sampled transitions into storage.

        Args

        • observations (th.Tensor) : Observations.
        • actions (th.Tensor) : Actions.
        • rewards (th.Tensor) : Rewards.
        • terminateds (th.Tensor) : Termination flag.
        • truncateds (th.Tensor) : Truncation flag.
        • infos (Dict[str, Any]) : Additional information.
        • next_observations (th.Tensor) : Next observations.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/vanilla_replay_storage/#sample","title":".sample","text":"

        source

        .sample()\n

        Sample from the storage.

        "},{"location":"api_docs/xploit/storage/vanilla_replay_storage/#update","title":".update","text":"

        source

        .update(\n   *args\n)\n

        Update the storage if necessary.

        "},{"location":"api_docs/xploit/storage/vanilla_rollout_storage/","title":"VanillaRolloutStorage","text":""},{"location":"api_docs/xploit/storage/vanilla_rollout_storage/#vanillarolloutstorage","title":"VanillaRolloutStorage","text":"

        source

        VanillaRolloutStorage(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   storage_size: int = 256, batch_size: int = 64, num_envs: int = 8,\n   discount: float = 0.999, gae_lambda: float = 0.95\n)\n

        Vanilla rollout storage for on-policy algorithms.

        Args

        • observation_space (gym.Space) : The observation space of environment.
        • action_space (gym.Space) : The action space of environment.
        • device (str) : Device to convert the data.
        • storage_size (int) : The capacity of the storage. Here it refers to the length of per rollout.
        • batch_size (int) : Batch size of samples.
        • num_envs (int) : The number of parallel environments.
        • discount (float) : The discount factor.
        • gae_lambda (float) : Weighting coefficient for generalized advantage estimation (GAE).

        Returns

        Vanilla rollout storage.

        Methods:

        "},{"location":"api_docs/xploit/storage/vanilla_rollout_storage/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the storage.

        "},{"location":"api_docs/xploit/storage/vanilla_rollout_storage/#add","title":".add","text":"

        source

        .add(\n   observations: th.Tensor, actions: th.Tensor, rewards: th.Tensor,\n   terminateds: th.Tensor, truncateds: th.Tensor, infos: Dict,\n   next_observations: th.Tensor, log_probs: th.Tensor, values: th.Tensor\n)\n

        Add sampled transitions into storage.

        Args

        • observations (th.Tensor) : Observations.
        • actions (th.Tensor) : Actions.
        • rewards (th.Tensor) : Rewards.
        • terminateds (th.Tensor) : Termination signals.
        • truncateds (th.Tensor) : Truncation signals.
        • infos (Dict) : Extra information.
        • next_observations (th.Tensor) : Next observations.
        • log_probs (th.Tensor) : Log of the probability evaluated at actions.
        • values (th.Tensor) : Estimated values.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/vanilla_rollout_storage/#update","title":".update","text":"

        source

        .update()\n

        Update the terminal state of each env.

        "},{"location":"api_docs/xploit/storage/vanilla_rollout_storage/#compute_returns_and_advantages","title":".compute_returns_and_advantages","text":"

        source

        .compute_returns_and_advantages(\n   last_values: th.Tensor\n)\n

        Perform generalized advantage estimation (GAE).

        Args

        • last_values (th.Tensor) : Estimated values of the last step.

        Returns

        None.

        "},{"location":"api_docs/xploit/storage/vanilla_rollout_storage/#sample","title":".sample","text":"

        source

        .sample()\n

        Sample data from storage.

        "},{"location":"api_docs/xplore/augmentation/gaussian_noise/","title":"GaussianNoise","text":""},{"location":"api_docs/xplore/augmentation/gaussian_noise/#gaussiannoise","title":"GaussianNoise","text":"

        source

        GaussianNoise(\n   mu: float = 0, sigma: float = 1.0\n)\n

        Gaussian noise operation for processing state-based observations.

        Args

        • mu (float or th.Tensor) : mean of the distribution.
        • scale (float or th.Tensor) : standard deviation of the distribution.

        Returns

        Augmented states.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/gaussian_noise/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/grayscale/","title":"GrayScale","text":""},{"location":"api_docs/xplore/augmentation/grayscale/#grayscale","title":"GrayScale","text":"

        source

        Grayscale operation for image augmentation.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/grayscale/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/identity/","title":"Identity","text":""},{"location":"api_docs/xplore/augmentation/identity/#identity","title":"Identity","text":"

        source

        Identity augmentation.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/identity/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_amplitude_scaling/","title":"RandomAmplitudeScaling","text":""},{"location":"api_docs/xplore/augmentation/random_amplitude_scaling/#randomamplitudescaling","title":"RandomAmplitudeScaling","text":"

        source

        RandomAmplitudeScaling(\n   low: float = 0.6, high: float = 1.2\n)\n

        Random amplitude scaling operation for processing state-based observations.

        Args

        • low (float) : lower range (inclusive).
        • high (float) : upper range (exclusive).

        Returns

        Augmented states.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_amplitude_scaling/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_colorjitter/","title":"RandomColorJitter","text":""},{"location":"api_docs/xplore/augmentation/random_colorjitter/#randomcolorjitter","title":"RandomColorJitter","text":"

        source

        RandomColorJitter(\n   brightness: float = 0.4, contrast: float = 0.4, saturation: float = 0.4,\n   hue: float = 0.5\n)\n

        Random ColorJitter operation for image augmentation.

        Args

        • brightness (float) : How much to jitter brightness. Should be non negative numbers.
        • contrast (float) : How much to jitter contrast. Should be non negative numbers.
        • saturation (float) : How much to jitter saturation. Should be non negative numbers.
        • hue (float) : How much to jitter hue. Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.

        Returns

        Augmented images.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_colorjitter/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_convolution/","title":"RandomConvolution","text":""},{"location":"api_docs/xplore/augmentation/random_convolution/#randomconvolution","title":"RandomConvolution","text":"

        source

        Random Convolution operation for image augmentation. Note that imgs should be normalized and torch tensor.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_convolution/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_crop/","title":"RandomCrop","text":""},{"location":"api_docs/xplore/augmentation/random_crop/#randomcrop","title":"RandomCrop","text":"

        source

        RandomCrop(\n   pad: int = 4, out: int = 84\n)\n

        Random crop operation for processing image-based observations.

        Args

        • pad (int) : Padding size.
        • out (int) : Desired output size.

        Returns

        Augmented images.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_crop/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_cutout/","title":"RandomCutout","text":""},{"location":"api_docs/xplore/augmentation/random_cutout/#randomcutout","title":"RandomCutout","text":"

        source

        RandomCutout(\n   min_cut: int = 10, max_cut: int = 30\n)\n

        Random Cutout operation for image augmentation.

        Args

        • min_cut (int) : Min size of the cut shape.
        • max_cut (int) : Max size of the cut shape.

        Returns

        Augmented images.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_cutout/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_cutoutcolor/","title":"RandomCutoutColor","text":""},{"location":"api_docs/xplore/augmentation/random_cutoutcolor/#randomcutoutcolor","title":"RandomCutoutColor","text":"

        source

        RandomCutoutColor(\n   min_cut: int = 10, max_cut: int = 30\n)\n

        Random Cutout operation for image augmentation.

        Args

        • min_cut (int) : min size of the cut shape.
        • max_cut (int) : max size of the cut shape.

        Returns

        Augmented images.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_cutoutcolor/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_flip/","title":"RandomFlip","text":""},{"location":"api_docs/xplore/augmentation/random_flip/#randomflip","title":"RandomFlip","text":"

        source

        RandomFlip(\n   p: float = 0.2\n)\n

        Random flip operation for image augmentation.

        Args

        • p (float) : The image flip problistily in a batch.

        Returns

        Augmented images.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_flip/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_rotate/","title":"RandomRotate","text":""},{"location":"api_docs/xplore/augmentation/random_rotate/#randomrotate","title":"RandomRotate","text":"

        source

        RandomRotate(\n   p: float = 0.2\n)\n

        Random rotate operation for processing image-based observations.

        Args

        • p (float) : The image rotate problistily in a batch.

        Returns

        Random rotate image in a batch.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_rotate/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_shift/","title":"RandomShift","text":""},{"location":"api_docs/xplore/augmentation/random_shift/#randomshift","title":"RandomShift","text":"

        source

        RandomShift(\n   pad: int = 4\n)\n

        Random shift operation for processing image-based observations.

        Args

        • pad (int) : Padding size.

        Returns

        Augmented images.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_shift/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/augmentation/random_translate/","title":"RandomTranslate","text":""},{"location":"api_docs/xplore/augmentation/random_translate/#randomtranslate","title":"RandomTranslate","text":"

        source

        RandomTranslate(\n   size: int = 256, scale_factor: float = 0.75\n)\n

        Random translate operation for processing image-based observations.

        Args

        • size (int) : The scale size in translated images
        • scale_factor (float) : The scale factor ratio in translated images. Should have 0.0 <= scale_factor <= 1.0

        Returns

        Augmented images.

        Methods:

        "},{"location":"api_docs/xplore/augmentation/random_translate/#forward","title":".forward","text":"

        source

        .forward(\n   x: th.Tensor\n)\n

        "},{"location":"api_docs/xplore/distribution/bernoulli/","title":"Bernoulli","text":""},{"location":"api_docs/xplore/distribution/bernoulli/#bernoulli","title":"Bernoulli","text":"

        source

        Bernoulli distribution for sampling actions for 'MultiBinary' tasks.

        Methods:

        "},{"location":"api_docs/xplore/distribution/bernoulli/#probs","title":".probs","text":"

        source

        .probs()\n

        Return probabilities.

        "},{"location":"api_docs/xplore/distribution/bernoulli/#logits","title":".logits","text":"

        source

        .logits()\n

        Returns the unnormalized log probabilities.

        "},{"location":"api_docs/xplore/distribution/bernoulli/#sample","title":".sample","text":"

        source

        .sample(\n   sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/bernoulli/#log_prob","title":".log_prob","text":"

        source

        .log_prob(\n   actions: th.Tensor\n)\n

        Returns the log of the probability density/mass function evaluated at actions.

        Args

        • actions (th.Tensor) : The actions to be evaluated.

        Returns

        The log_prob value.

        "},{"location":"api_docs/xplore/distribution/bernoulli/#entropy","title":".entropy","text":"

        source

        .entropy()\n

        Returns the Shannon entropy of distribution.

        "},{"location":"api_docs/xplore/distribution/bernoulli/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/distribution/bernoulli/#mean","title":".mean","text":"

        source

        .mean()\n

        Returns the mean of the distribution.

        "},{"location":"api_docs/xplore/distribution/categorical/","title":"Categorical","text":""},{"location":"api_docs/xplore/distribution/categorical/#categorical","title":"Categorical","text":"

        source

        Categorical distribution for sampling actions for 'Discrete' tasks.

        Methods:

        "},{"location":"api_docs/xplore/distribution/categorical/#probs","title":".probs","text":"

        source

        .probs()\n

        Return probabilities.

        "},{"location":"api_docs/xplore/distribution/categorical/#logits","title":".logits","text":"

        source

        .logits()\n

        Returns the unnormalized log probabilities.

        "},{"location":"api_docs/xplore/distribution/categorical/#sample","title":".sample","text":"

        source

        .sample(\n   sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/categorical/#log_prob","title":".log_prob","text":"

        source

        .log_prob(\n   actions: th.Tensor\n)\n

        Returns the log of the probability density/mass function evaluated at actions.

        Args

        • actions (th.Tensor) : The actions to be evaluated.

        Returns

        The log_prob value.

        "},{"location":"api_docs/xplore/distribution/categorical/#entropy","title":".entropy","text":"

        source

        .entropy()\n

        Returns the Shannon entropy of distribution.

        "},{"location":"api_docs/xplore/distribution/categorical/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/distribution/categorical/#mean","title":".mean","text":"

        source

        .mean()\n

        Returns the mean of the distribution.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/","title":"DiagonalGaussian","text":""},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#diagonalgaussian","title":"DiagonalGaussian","text":"

        source

        Diagonal Gaussian distribution for 'Box' tasks.

        Methods:

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#sample","title":".sample","text":"

        source

        .sample(\n   sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#rsample","title":".rsample","text":"

        source

        .rsample(\n   sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped reparameterized sample or sample_shape shaped batch of reparameterized samples if the distribution parameters are batched.

        Args

        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#mean","title":".mean","text":"

        source

        .mean()\n

        Returns the mean of the distribution.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#stddev","title":".stddev","text":"

        source

        .stddev()\n

        Returns the standard deviation of the distribution.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#variance","title":".variance","text":"

        source

        .variance()\n

        Returns the variance of the distribution.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#log_prob","title":".log_prob","text":"

        source

        .log_prob(\n   actions: th.Tensor\n)\n

        Returns the log of the probability density/mass function evaluated at actions.

        Args

        • actions (th.Tensor) : The actions to be evaluated.

        Returns

        The log_prob value.

        "},{"location":"api_docs/xplore/distribution/diagonal_gaussian/#entropy","title":".entropy","text":"

        source

        .entropy()\n

        Returns the Shannon entropy of distribution.

        "},{"location":"api_docs/xplore/distribution/multi_categorical/","title":"MultiCategorical","text":""},{"location":"api_docs/xplore/distribution/multi_categorical/#multicategorical","title":"MultiCategorical","text":"

        source

        Multi-categorical distribution for sampling actions for 'MultiDiscrete' tasks.

        Methods:

        "},{"location":"api_docs/xplore/distribution/multi_categorical/#probs","title":".probs","text":"

        source

        .probs()\n

        Return probabilities.

        "},{"location":"api_docs/xplore/distribution/multi_categorical/#logits","title":".logits","text":"

        source

        .logits()\n

        Returns the unnormalized log probabilities.

        "},{"location":"api_docs/xplore/distribution/multi_categorical/#sample","title":".sample","text":"

        source

        .sample(\n   sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/multi_categorical/#log_prob","title":".log_prob","text":"

        source

        .log_prob(\n   actions: th.Tensor\n)\n

        Returns the log of the probability density/mass function evaluated at actions.

        Args

        • actions (th.Tensor) : The actions to be evaluated.

        Returns

        The log_prob value.

        "},{"location":"api_docs/xplore/distribution/multi_categorical/#entropy","title":".entropy","text":"

        source

        .entropy()\n

        Returns the Shannon entropy of distribution.

        "},{"location":"api_docs/xplore/distribution/multi_categorical/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/distribution/multi_categorical/#mean","title":".mean","text":"

        source

        .mean()\n

        Returns the mean of the distribution.

        "},{"location":"api_docs/xplore/distribution/normal_noise/","title":"NormalNoise","text":""},{"location":"api_docs/xplore/distribution/normal_noise/#normalnoise","title":"NormalNoise","text":"

        source

        NormalNoise(\n   mu: Union[float, th.Tensor] = 0.0, sigma: Union[float, th.Tensor] = 1.0,\n   low: float = -1.0, high: float = 1.0, eps: float = 1e-06\n)\n

        Gaussian action noise.

        Args

        • mu (Union[float, th.Tensor]) : Mean of the noise.
        • sigma (Union[float, th.Tensor]) : Standard deviation of the noise.
        • low (float) : The lower bound of the noise.
        • high (float) : The upper bound of the noise.
        • eps (float) : A small value to avoid numerical instability.

        Returns

        Gaussian action noise instance.

        Methods:

        "},{"location":"api_docs/xplore/distribution/normal_noise/#sample","title":".sample","text":"

        source

        .sample(\n   clip: Optional[float] = None, sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • clip (Optional[float]) : The clip range of the sampled noises.
        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/normal_noise/#mean","title":".mean","text":"

        source

        .mean()\n

        Returns the mean of the distribution.

        "},{"location":"api_docs/xplore/distribution/normal_noise/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/distribution/ornstein_uhlenbeck_noise/","title":"OrnsteinUhlenbeckNoise","text":""},{"location":"api_docs/xplore/distribution/ornstein_uhlenbeck_noise/#ornsteinuhlenbecknoise","title":"OrnsteinUhlenbeckNoise","text":"

        source

        OrnsteinUhlenbeckNoise(\n   mu: Union[float, th.Tensor] = 0.0, sigma: Union[float, th.Tensor] = 1.0,\n   low: float = -1.0, high: float = 1.0, eps: float = 1e-06, theta: float = 0.15,\n   dt: float = 0.01\n)\n

        Ornstein Uhlenbeck action noise. Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab

        Args

        • mu (Union[float, th.Tensor]) : Mean of the noise.
        • sigma (Union[float, th.Tensor]) : Standard deviation of the noise.
        • low (float) : The lower bound of the noise.
        • high (float) : The upper bound of the noise.
        • eps (float) : A small value to avoid numerical instability.
        • theta (float) : The rate of mean reversion.
        • dt (float) : Timestep for the noise.
        • stddev_schedule (str) : Use the exploration std schedule.
        • stddev_clip (float) : The exploration std clip range.

        Returns

        Ornstein-Uhlenbeck noise instance.

        Methods:

        "},{"location":"api_docs/xplore/distribution/ornstein_uhlenbeck_noise/#sample","title":".sample","text":"

        source

        .sample(\n   clip: Optional[float] = None, sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • clip (Optional[float]) : The clip range of the sampled noises.
        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/ornstein_uhlenbeck_noise/#reset","title":".reset","text":"

        source

        .reset()\n

        Reset the noise.

        "},{"location":"api_docs/xplore/distribution/ornstein_uhlenbeck_noise/#mean","title":".mean","text":"

        source

        .mean()\n

        Returns the mean of the distribution.

        "},{"location":"api_docs/xplore/distribution/ornstein_uhlenbeck_noise/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/distribution/squashed_normal/","title":"SquashedNormal","text":""},{"location":"api_docs/xplore/distribution/squashed_normal/#squashednormal","title":"SquashedNormal","text":"

        source

        Squashed normal distribution for Box tasks.

        Methods:

        "},{"location":"api_docs/xplore/distribution/squashed_normal/#sample","title":".sample","text":"

        source

        .sample(\n   sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/squashed_normal/#rsample","title":".rsample","text":"

        source

        .rsample(\n   sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped reparameterized sample or sample_shape shaped batch of reparameterized samples if the distribution parameters are batched.

        Args

        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/squashed_normal/#mean","title":".mean","text":"

        source

        .mean()\n

        Return the transformed mean.

        "},{"location":"api_docs/xplore/distribution/squashed_normal/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/distribution/squashed_normal/#log_prob","title":".log_prob","text":"

        source

        .log_prob(\n   actions: th.Tensor\n)\n

        Scores the sample by inverting the transform(s) and computing the score using the score of the base distribution and the log abs det jacobian.

        Args

        • actions (th.Tensor) : The actions to be evaluated.

        Returns

        The log_prob value.

        "},{"location":"api_docs/xplore/distribution/truncated_normal_noise/","title":"TruncatedNormalNoise","text":""},{"location":"api_docs/xplore/distribution/truncated_normal_noise/#truncatednormalnoise","title":"TruncatedNormalNoise","text":"

        source

        TruncatedNormalNoise(\n   mu: Union[float, th.Tensor] = 0.0, sigma: Union[float, th.Tensor] = 1.0,\n   low: float = -1.0, high: float = 1.0, eps: float = 1e-06,\n   stddev_schedule: str = 'linear(1.0, 0.1, 100000)'\n)\n

        Truncated normal action noise. See Section 3.1 of \"Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning\".

        Args

        • mu (Union[float, th.Tensor]) : Mean of the noise.
        • sigma (Union[float, th.Tensor]) : Standard deviation of the noise.
        • low (float) : The lower bound of the noise.
        • high (float) : The upper bound of the noise.
        • eps (float) : A small value to avoid numerical instability.
        • stddev_schedule (str) : Use the exploration std schedule, available options are: linear(init, final, duration) and step_linear(init, final1, duration1, final2, duration2).

        Returns

        Truncated normal noise instance.

        Methods:

        "},{"location":"api_docs/xplore/distribution/truncated_normal_noise/#sample","title":".sample","text":"

        source

        .sample(\n   clip: Optional[float] = None, sample_shape: th.Size = th.Size()\n)\n

        Generates a sample_shape shaped sample or sample_shape shaped batch of samples if the distribution parameters are batched.

        Args

        • clip (Optional[float]) : The clip range of the sampled noises.
        • sample_shape (th.Size) : The size of the sample to be drawn.

        Returns

        A sample_shape shaped sample.

        "},{"location":"api_docs/xplore/distribution/truncated_normal_noise/#mean","title":".mean","text":"

        source

        .mean()\n

        Returns the mean of the distribution.

        "},{"location":"api_docs/xplore/distribution/truncated_normal_noise/#mode","title":".mode","text":"

        source

        .mode()\n

        Returns the mode of the distribution.

        "},{"location":"api_docs/xplore/reward/girm/","title":"GIRM","text":""},{"location":"api_docs/xplore/reward/girm/#girm","title":"GIRM","text":"

        source

        GIRM(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,\n   batch_size: int = 64, lambd: float = 0.5, lambd_recon: float = 1.0,\n   lambd_action: float = 1.0, kld_loss_beta: float = 1.0\n)\n

        Intrinsic Reward Driven Imitation Learning via Generative Model (GIRM). See paper: http://proceedings.mlr.press/v119/yu20d/yu20d.pdf

        Args

        • observation_space (Space) : The observation space of environment.
        • action_space (Space) : The action space of environment.
        • device (str) : Device (cpu, cuda, ...) on which the code should be run.
        • beta (float) : The initial weighting coefficient of the intrinsic rewards.
        • kappa (float) : The decay rate.
        • latent_dim (int) : The dimension of encoding vectors.
        • lr (float) : The learning rate.
        • batch_size (int) : The batch size for update.
        • lambd (float) : The weighting coefficient for combining actions.
        • lambd_recon (float) : Weighting coefficient of the reconstruction loss.
        • lambd_action (float) : Weighting coefficient of the action loss.
        • kld_loss_beta (float) : Weighting coefficient of the divergence loss.

        Returns

        Instance of GIRM.

        Methods:

        "},{"location":"api_docs/xplore/reward/girm/#get_vae_loss","title":".get_vae_loss","text":"

        source

        .get_vae_loss(\n   recon_x: th.Tensor, x: th.Tensor, mean: th.Tensor, logvar: th.Tensor\n)\n

        Compute the vae loss.

        Args

        • recon_x (th.Tensor) : Reconstructed x.
        • x (th.Tensor) : Input x.
        • mean (th.Tensor) : Sample mean.
        • logvar (th.Tensor) : Log of the sample variance.

        Returns

        Loss values.

        "},{"location":"api_docs/xplore/reward/girm/#compute_irs","title":".compute_irs","text":"

        source

        .compute_irs(\n   samples: Dict, step: int = 0\n)\n

        Compute the intrinsic rewards for current samples.

        Args

        • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
        • step (int) : The global training step.
        • Returns

          The intrinsic rewards.

          "},{"location":"api_docs/xplore/reward/girm/#add","title":".add","text":"

          source

          .add(\n   samples: Dict\n)\n

          Add new samples to the intrinsic reward module.

          "},{"location":"api_docs/xplore/reward/girm/#update","title":".update","text":"

          source

          .update(\n   samples: Dict\n)\n

          Update the intrinsic reward module if necessary.

          Args

          • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

            Returns

            None

            "},{"location":"api_docs/xplore/reward/icm/","title":"ICM","text":""},{"location":"api_docs/xplore/reward/icm/#icm","title":"ICM","text":"

            source

            ICM(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,\n   batch_size: int = 64\n)\n

            Curiosity-Driven Exploration by Self-Supervised Prediction. See paper: http://proceedings.mlr.press/v70/pathak17a/pathak17a.pdf

            Args

            • observation_space (Space) : The observation space of environment.
            • action_space (Space) : The action space of environment.
            • device (str) : Device (cpu, cuda, ...) on which the code should be run.
            • beta (float) : The initial weighting coefficient of the intrinsic rewards.
            • kappa (float) : The decay rate.
            • latent_dim (int) : The dimension of encoding vectors.
            • lr (float) : The learning rate.
            • batch_size (int) : The batch size for update.

            Returns

            Instance of ICM.

            Methods:

            "},{"location":"api_docs/xplore/reward/icm/#compute_irs","title":".compute_irs","text":"

            source

            .compute_irs(\n   samples: Dict, step: int = 0\n)\n

            Compute the intrinsic rewards for current samples.

            Args

            • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
            • step (int) : The global training step.
            • Returns

              The intrinsic rewards.

              "},{"location":"api_docs/xplore/reward/icm/#add","title":".add","text":"

              source

              .add(\n   samples: Dict\n)\n

              Add new samples to the intrinsic reward module.

              "},{"location":"api_docs/xplore/reward/icm/#update","title":".update","text":"

              source

              .update(\n   samples: Dict\n)\n

              Update the intrinsic reward module if necessary.

              Args

              • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                Returns

                None

                "},{"location":"api_docs/xplore/reward/ngu/","title":"NGU","text":""},{"location":"api_docs/xplore/reward/ngu/#ngu","title":"NGU","text":"

                source

                NGU(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,\n   batch_size: int = 64, capacity: int = 1000, k: int = 10,\n   kernel_cluster_distance: float = 0.008, kernel_epsilon: float = 0.0001,\n   c: float = 0.001, sm: float = 8.0, mrs: float = 5.0\n)\n

                Never Give Up: Learning Directed Exploration Strategies (NGU). See paper: https://arxiv.org/pdf/2002.06038

                Args

                • observation_space (Space) : The observation space of environment.
                • action_space (Space) : The action space of environment.
                • device (str) : Device (cpu, cuda, ...) on which the code should be run.
                • beta (float) : The initial weighting coefficient of the intrinsic rewards.
                • kappa (float) : The decay rate.
                • latent_dim (int) : The dimension of encoding vectors.
                • lr (float) : The learning rate.
                • batch_size (int) : The batch size for update.
                • capacity (int) : The of capacity the episodic memory.
                • k (int) : Number of neighbors.
                • kernel_cluster_distance (float) : The kernel cluster distance.
                • kernel_epsilon (float) : The kernel constant.
                • c (float) : The pseudo-counts constant.
                • sm (float) : The kernel maximum similarity.
                • mrs (float) : The maximum reward scaling.

                Returns

                Instance of NGU.

                Methods:

                "},{"location":"api_docs/xplore/reward/ngu/#pseudo_counts","title":".pseudo_counts","text":"

                source

                .pseudo_counts(\n   e: th.Tensor\n)\n

                Pseudo counts.

                Args

                • e (th.Tensor) : Encoded observations.

                Returns

                Conut values.

                "},{"location":"api_docs/xplore/reward/ngu/#compute_irs","title":".compute_irs","text":"

                source

                .compute_irs(\n   samples: Dict, step: int = 0\n)\n

                Compute the intrinsic rewards for current samples.

                Args

                • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
                • step (int) : The global training step.
                • Returns

                  The intrinsic rewards.

                  "},{"location":"api_docs/xplore/reward/ngu/#add","title":".add","text":"

                  source

                  .add(\n   samples: Dict\n)\n

                  Add new samples to the intrinsic reward module.

                  "},{"location":"api_docs/xplore/reward/ngu/#update","title":".update","text":"

                  source

                  .update(\n   samples: Dict\n)\n

                  Update the intrinsic reward module if necessary.

                  Args

                  • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                    Returns

                    None

                    "},{"location":"api_docs/xplore/reward/pseudo_counts/","title":"PseudoCounts","text":""},{"location":"api_docs/xplore/reward/pseudo_counts/#pseudocounts","title":"PseudoCounts","text":"

                    source

                    PseudoCounts(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 32, lr: float = 0.001,\n   batch_size: int = 64, capacity: int = 1000, k: int = 10,\n   kernel_cluster_distance: float = 0.008, kernel_epsilon: float = 0.0001,\n   c: float = 0.001, sm: float = 8.0\n)\n

                    Pseudo-counts based on \"Never Give Up: Learning Directed Exploration Strategies (NGU)\". See paper: https://arxiv.org/pdf/2002.06038

                    Args

                    • observation_space (Space) : The observation space of environment.
                    • action_space (Space) : The action space of environment.
                    • device (str) : Device (cpu, cuda, ...) on which the code should be run.
                    • beta (float) : The initial weighting coefficient of the intrinsic rewards.
                    • kappa (float) : The decay rate.
                    • latent_dim (int) : The dimension of encoding vectors.
                    • lr (float) : The learning rate.
                    • batch_size (int) : The batch size for update.
                    • capacity (int) : The of capacity the episodic memory.
                    • k (int) : Number of neighbors.
                    • kernel_cluster_distance (float) : The kernel cluster distance.
                    • kernel_epsilon (float) : The kernel constant.
                    • c (float) : The pseudo-counts constant.
                    • sm (float) : The kernel maximum similarity.

                    Returns

                    Instance of PseudoCounts.

                    Methods:

                    "},{"location":"api_docs/xplore/reward/pseudo_counts/#pseudo_counts","title":".pseudo_counts","text":"

                    source

                    .pseudo_counts(\n   e: th.Tensor\n)\n

                    Pseudo counts.

                    Args

                    • e (th.Tensor) : Encoded observations.

                    Returns

                    Conut values.

                    "},{"location":"api_docs/xplore/reward/pseudo_counts/#compute_irs","title":".compute_irs","text":"

                    source

                    .compute_irs(\n   samples: Dict, step: int = 0\n)\n

                    Compute the intrinsic rewards for current samples.

                    Args

                    • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
                    • step (int) : The global training step.
                    • Returns

                      The intrinsic rewards.

                      "},{"location":"api_docs/xplore/reward/pseudo_counts/#add","title":".add","text":"

                      source

                      .add(\n   samples: Dict\n)\n

                      Add new samples to the intrinsic reward module.

                      "},{"location":"api_docs/xplore/reward/pseudo_counts/#update","title":".update","text":"

                      source

                      .update(\n   samples: Dict\n)\n

                      Update the intrinsic reward module if necessary.

                      Args

                      • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                        Returns

                        None

                        "},{"location":"api_docs/xplore/reward/re3/","title":"RE3","text":""},{"location":"api_docs/xplore/reward/re3/#re3","title":"RE3","text":"

                        source

                        RE3(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128,\n   storage_size: int = 10000, num_envs: int = 1, k: int = 5, average_entropy: bool = False\n)\n

                        State Entropy Maximization with Random Encoders for Efficient Exploration (RE3). See paper: http://proceedings.mlr.press/v139/seo21a/seo21a.pdf

                        Args

                        • observation_space (Space) : The observation space of environment.
                        • action_space (Space) : The action space of environment.
                        • device (str) : Device (cpu, cuda, ...) on which the code should be run.
                        • beta (float) : The initial weighting coefficient of the intrinsic rewards.
                        • kappa (float) : The decay rate.
                        • latent_dim (int) : The dimension of encoding vectors.
                        • storage_size (int) : The size of the storage for random embeddings.
                        • num_envs (int) : The number of parallel environments.
                        • k (int) : Use the k-th neighbors.
                        • average_entropy (bool) : Use the average of entropy estimation.

                        Returns

                        Instance of RE3.

                        Methods:

                        "},{"location":"api_docs/xplore/reward/re3/#compute_irs","title":".compute_irs","text":"

                        source

                        .compute_irs(\n   samples: Dict, step: int = 0\n)\n

                        Compute the intrinsic rewards for current samples.

                        Args

                        • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
                        • step (int) : The global training step.
                        • Returns

                          The intrinsic rewards.

                          "},{"location":"api_docs/xplore/reward/re3/#update","title":".update","text":"

                          source

                          .update(\n   samples: Dict\n)\n

                          Update the intrinsic reward module if necessary.

                          Args

                          • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                            Returns

                            None

                            "},{"location":"api_docs/xplore/reward/re3/#add","title":".add","text":"

                            source

                            .add(\n   samples: Dict\n)\n

                            Calculate the random embeddings and insert them into the storage.

                            Args

                            • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                              Returns

                              None

                              "},{"location":"api_docs/xplore/reward/revd/","title":"REVD","text":""},{"location":"api_docs/xplore/reward/revd/#revd","title":"REVD","text":"

                              source

                              REVD(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, alpha: float = 0.5,\n   k: int = 5, average_divergence: bool = False\n)\n

                              Rewarding Episodic Visitation Discrepancy for Exploration in Reinforcement Learning (REVD). See paper: https://openreview.net/pdf?id=V2pw1VYMrDo

                              Args

                              • observation_space (Space) : The observation space of environment.
                              • action_space (Space) : The action space of environment.
                              • device (str) : Device (cpu, cuda, ...) on which the code should be run.
                              • beta (float) : The initial weighting coefficient of the intrinsic rewards.
                              • kappa (float) : The decay rate.
                              • latent_dim (int) : The dimension of encoding vectors.
                              • alpha (alpha) : The order of R\u00e9nyi divergence.
                              • k (int) : Use the k-th neighbors.
                              • average_divergence (bool) : Use the average of divergence estimation.

                              Returns

                              Instance of REVD.

                              Methods:

                              "},{"location":"api_docs/xplore/reward/revd/#compute_irs","title":".compute_irs","text":"

                              source

                              .compute_irs(\n   samples: Dict, step: int = 0\n)\n

                              Compute the intrinsic rewards for current samples.

                              Args

                              • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
                              • step (int) : The global training step.
                              • Returns

                                The intrinsic rewards.

                                "},{"location":"api_docs/xplore/reward/revd/#add","title":".add","text":"

                                source

                                .add(\n   samples: Dict\n)\n

                                Add new samples to the intrinsic reward module.

                                "},{"location":"api_docs/xplore/reward/revd/#update","title":".update","text":"

                                source

                                .update(\n   samples: Dict\n)\n

                                Update the intrinsic reward module if necessary.

                                Args

                                • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                                  Returns

                                  None

                                  "},{"location":"api_docs/xplore/reward/ride/","title":"RIDE","text":""},{"location":"api_docs/xplore/reward/ride/#ride","title":"RIDE","text":"

                                  source

                                  RIDE(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,\n   batch_size: int = 64, capacity: int = 1000, k: int = 10,\n   kernel_cluster_distance: float = 0.008, kernel_epsilon: float = 0.0001,\n   c: float = 0.001, sm: float = 8.0\n)\n

                                  RIDE: Rewarding Impact-Driven Exploration for Procedurally-Generated Environments. See paper: https://arxiv.org/pdf/2002.12292

                                  Args

                                  • observation_space (Space) : The observation space of environment.
                                  • action_space (Space) : The action space of environment.
                                  • device (str) : Device (cpu, cuda, ...) on which the code should be run.
                                  • beta (float) : The initial weighting coefficient of the intrinsic rewards.
                                  • kappa (float) : The decay rate.
                                  • latent_dim (int) : The dimension of encoding vectors.
                                  • lr (float) : The learning rate.
                                  • batch_size (int) : The batch size for update.
                                  • capacity (int) : The of capacity the episodic memory.
                                  • k (int) : Number of neighbors.
                                  • kernel_cluster_distance (float) : The kernel cluster distance.
                                  • kernel_epsilon (float) : The kernel constant.
                                  • c (float) : The pseudo-counts constant.
                                  • sm (float) : The kernel maximum similarity.

                                  Returns

                                  Instance of RIDE.

                                  Methods:

                                  "},{"location":"api_docs/xplore/reward/ride/#pseudo_counts","title":".pseudo_counts","text":"

                                  source

                                  .pseudo_counts(\n   e: th.Tensor\n)\n

                                  Pseudo counts.

                                  Args

                                  • e (th.Tensor) : Encoded observations.

                                  Returns

                                  Conut values.

                                  "},{"location":"api_docs/xplore/reward/ride/#compute_irs","title":".compute_irs","text":"

                                  source

                                  .compute_irs(\n   samples: Dict, step: int = 0\n)\n

                                  Compute the intrinsic rewards for current samples.

                                  Args

                                  • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
                                  • step (int) : The global training step.
                                  • Returns

                                    The intrinsic rewards.

                                    "},{"location":"api_docs/xplore/reward/ride/#update","title":".update","text":"

                                    source

                                    .update(\n   samples: Dict\n)\n

                                    Update the intrinsic reward module if necessary.

                                    Args

                                    • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                                      Returns

                                      None

                                      "},{"location":"api_docs/xplore/reward/ride/#add","title":".add","text":"

                                      source

                                      .add(\n   samples: Dict\n)\n

                                      Add new samples to the intrinsic reward module.

                                      "},{"location":"api_docs/xplore/reward/rise/","title":"RISE","text":""},{"location":"api_docs/xplore/reward/rise/#rise","title":"RISE","text":"

                                      source

                                      RISE(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128,\n   storage_size: int = 10000, num_envs: int = 1, alpha: float = 0.5, k: int = 5,\n   average_entropy: bool = False\n)\n

                                      R\u00e9nyi State Entropy Maximization for Exploration Acceleration in Reinforcement Learning (RISE). See paper: https://ieeexplore.ieee.org/abstract/document/9802917/

                                      Args

                                      • observation_space (Space) : The observation space of environment.
                                      • action_space (Space) : The action space of environment.
                                      • device (str) : Device (cpu, cuda, ...) on which the code should be run.
                                      • beta (float) : The initial weighting coefficient of the intrinsic rewards.
                                      • kappa (float) : The decay rate.
                                      • latent_dim (int) : The dimension of encoding vectors.
                                      • storage_size (int) : The size of the storage for random embeddings.
                                      • num_envs (int) : The number of parallel environments.
                                      • alpha (alpha) : The The order of R\u00e9nyi entropy.
                                      • k (int) : Use the k-th neighbors.
                                      • average_entropy (bool) : Use the average of entropy estimation.

                                      Returns

                                      Instance of RISE.

                                      Methods:

                                      "},{"location":"api_docs/xplore/reward/rise/#compute_irs","title":".compute_irs","text":"

                                      source

                                      .compute_irs(\n   samples: Dict, step: int = 0\n)\n

                                      Compute the intrinsic rewards for current samples.

                                      Args

                                      • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
                                      • step (int) : The global training step.
                                      • Returns

                                        The intrinsic rewards.

                                        "},{"location":"api_docs/xplore/reward/rise/#update","title":".update","text":"

                                        source

                                        .update(\n   samples: Dict\n)\n

                                        Update the intrinsic reward module if necessary.

                                        Args

                                        • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                                          Returns

                                          None

                                          "},{"location":"api_docs/xplore/reward/rise/#add","title":".add","text":"

                                          source

                                          .add(\n   samples: Dict\n)\n

                                          Calculate the random embeddings and insert them into the storage.

                                          Args

                                          • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                                            Returns

                                            None

                                            "},{"location":"api_docs/xplore/reward/rnd/","title":"RND","text":""},{"location":"api_docs/xplore/reward/rnd/#rnd","title":"RND","text":"

                                            source

                                            RND(\n   observation_space: gym.Space, action_space: gym.Space, device: str = 'cpu',\n   beta: float = 0.05, kappa: float = 2.5e-05, latent_dim: int = 128, lr: float = 0.001,\n   batch_size: int = 64\n)\n

                                            Exploration by Random Network Distillation (RND). See paper: https://arxiv.org/pdf/1810.12894.pdf

                                            Args

                                            • observation_space (Space) : The observation space of environment.
                                            • action_space (Space) : The action space of environment.
                                            • device (str) : Device (cpu, cuda, ...) on which the code should be run.
                                            • beta (float) : The initial weighting coefficient of the intrinsic rewards.
                                            • kappa (float) : The decay rate.
                                            • latent_dim (int) : The dimension of encoding vectors.
                                            • lr (float) : The learning rate.
                                            • batch_size (int) : The batch size for update.

                                            Returns

                                            Instance of RND.

                                            Methods:

                                            "},{"location":"api_docs/xplore/reward/rnd/#compute_irs","title":".compute_irs","text":"

                                            source

                                            .compute_irs(\n   samples: Dict, step: int = 0\n)\n

                                            Compute the intrinsic rewards for current samples.

                                            Args

                                            • samples (Dict) : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.
                                            • step (int) : The global training step.
                                            • Returns

                                              The intrinsic rewards.

                                              "},{"location":"api_docs/xplore/reward/rnd/#add","title":".add","text":"

                                              source

                                              .add(\n   samples: Dict\n)\n

                                              Add new samples to the intrinsic reward module.

                                              "},{"location":"api_docs/xplore/reward/rnd/#update","title":".update","text":"

                                              source

                                              .update(\n   samples: Dict\n)\n

                                              Update the intrinsic reward module if necessary.

                                              Args

                                              • samples : The collected samples. A python dict like {obs (n_steps, n_envs, obs_shape) , actions (n_steps, n_envs, action_shape) , rewards (n_steps, n_envs) , next_obs (n_steps, n_envs, *obs_shape) }.

                                                Returns

                                                None

                                                "},{"location":"tutorials/","title":"WELCOME TO RLLTE TUTORIALS \ud83d\udc4b\ud83d\udc4b\ud83d\udc4b","text":""},{"location":"tutorials/#general","title":"General","text":"
                                                • RL Algorithm Decoupling
                                                • Fast Algorithm Development
                                                "},{"location":"tutorials/#model-training","title":"Model Training","text":"
                                                • Quick Start
                                                • Module Replacement for An Implemented Algorithm
                                                • Intrinsic Reward Shaping for Enhancing Exploration
                                                • Observation Augmentation for Sample Efficiency and Generalization
                                                • Pre-training with Intrinsic Rewards
                                                "},{"location":"tutorials/#model-evaluation","title":"Model Evaluation","text":"
                                                • Performance Evaluation of Single Algorithm
                                                • Performance Comparison of Multiple Algorithms
                                                • Metrics Visualization
                                                "},{"location":"tutorials/#model-deployment","title":"Model Deployment","text":"
                                                • with NVIDIA TensorRT
                                                • with HUAWEI CANN
                                                "},{"location":"tutorials/#customization","title":"Customization","text":"
                                                • Make A Custom Environment
                                                • Make A Custom Module
                                                "},{"location":"tutorials/custom/environment/","title":"Custom Environment","text":"Open in Colab View on GitHub"},{"location":"tutorials/custom/environment/#environment-definition","title":"Environment definition","text":"

                                                To use custom environments in RLLTE, it suffices to follow the gymnasium interface and prepare your environment following Tutorials: Make Your Own Custom Environment. A example is: example.py

                                                import gymnasium as gym\nimport numpy as np\n\nclass CustomEnv(gym.Env):\n    def __init__(self, total_length) -> None:\n        super().__init__()\n        self.observation_space = gym.spaces.Box(\n            shape=(9, 84, 84),\n            high=255.0,\n            low=0.,\n            dtype=np.uint8\n        )\n        self.action_space = gym.spaces.Box(\n            shape=(7,),\n            high=1.,\n            low=-1.,\n            dtype=np.float32\n        )\n        self.total_length = total_length\n        self.count = 0\n\n    def step(self, action):\n        obs = self.observation_space.sample()\n        reward = np.random.rand()\n        if self.count < self.total_length:\n            terminated = truncated = False\n        else:\n            terminated = truncated = True\n        info = {\"discount\": 0.99}\n        self.count += 1\n\n        return obs, reward, terminated, truncated, info\n\n    def reset(self, seed=None, options=None):\n        self.count = 0\n        return self.observation_space.sample(), {\"discount\": 0.99}\n

                                                "},{"location":"tutorials/custom/environment/#use-make_rllte_env","title":"Use make_rllte_env","text":"

                                                In RLLTE, the environments are assumed to be vectorized and a make_rllte_env function is used to warp the environments: example.py

                                                from rllte.env.utils import make_rllte_env\n# create vectorized environments\nenv = make_rllte_env(env_id=CustomEnv, \n                     device=device, \n                     env_kwargs={'total_length': 499} # set env arguments\n                     )\n
                                                After that, you can use the custom environment in application directly. train.py
                                                from rllte.agent import DrQv2\nfrom rllte.env.utils import make_rllte_env\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_rllte_env(env_id=CustomEnv, \n                        device=device, \n                        env_kwargs={'total_length': 499} # set env arguments\n                        )\n    eval_env = make_rllte_env(env_id=CustomEnv, \n                            device=device, \n                            env_kwargs={'total_length': 499} # set env arguments\n                            )\n    agent = DrQv2(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"drqv2_dmc_pixel\")\n    agent.train(num_train_steps=5000)\n

                                                "},{"location":"tutorials/custom/module/","title":"Custom Module","text":"Open in Colab View on GitHub

                                                RLLTE is an extremely open platform that supports custom modules, including encoder, storage, policy, etc. Just write a new module based on the BaseClass, then we can insert it into an agent directly. Suppose we want to build a new encoder entitled CustomEncoder. An example is example.py

                                                from rllte.agent import PPO\nfrom rllte.env import make_atari_env\nfrom rllte.common.prototype import BaseEncoder\nfrom gymnasium.spaces import Space\nfrom torch import nn\nimport torch as th\n\nclass CustomEncoder(BaseEncoder):\n    \"\"\"Custom encoder.\n\n    Args:\n        observation_space (Space): The observation space of environment.\n        feature_dim (int): Number of features extracted.\n\n    Returns:\n        The new encoder instance.\n    \"\"\"\n    def __init__(self, observation_space: Space, feature_dim: int = 0) -> None:\n        super().__init__(observation_space, feature_dim)\n\n        obs_shape = observation_space.shape\n        assert len(obs_shape) == 3\n\n        self.trunk = nn.Sequential(\n            nn.Conv2d(obs_shape[0], 32, 3, stride=2), nn.ReLU(),\n            nn.Conv2d(32, 32, 3, stride=2), nn.ReLU(),\n            nn.Flatten(),\n        )\n\n        with th.no_grad():\n            sample = th.ones(size=tuple(obs_shape)).float()\n            n_flatten = self.trunk(sample.unsqueeze(0)).shape[1]\n\n        self.trunk.extend([nn.Linear(n_flatten, feature_dim), nn.ReLU()])\n\n    def forward(self, obs: th.Tensor) -> th.Tensor:\n        h = self.trunk(obs / 255.0)\n\n        return h.view(h.size()[0], -1)\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_atari_env(device=device)\n    eval_env = make_atari_env(device=device)\n    # create agent\n    feature_dim = 512\n    agent = PPO(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"ppo_atari\",\n                feature_dim=feature_dim)\n    # create a new encoder\n    encoder = CustomEncoder(observation_space=env.observation_space, \n                         feature_dim=feature_dim)\n    # set the new encoder\n    agent.set(encoder=encoder)\n    # start training\n    agent.train(num_train_steps=5000)\n
                                                Run example.py and you'll see the old MnihCnnEncoder has been replaced by CustomEncoder:
                                                [08/04/2023 03:47:24 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 03:47:24 PM] - [INFO.] - ================================================================================\n[08/04/2023 03:47:24 PM] - [INFO.] - Tag               : ppo_atari\n[08/04/2023 03:47:24 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 03:47:24 PM] - [DEBUG] - Agent             : PPO\n[08/04/2023 03:47:24 PM] - [DEBUG] - Encoder           : CustomEncoder\n[08/04/2023 03:47:24 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 03:47:24 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 03:47:24 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 03:47:24 PM] - [DEBUG] - Augmentation      : False\n[08/04/2023 03:47:24 PM] - [DEBUG] - Intrinsic Reward  : False\n[08/04/2023 03:47:24 PM] - [DEBUG] - ================================================================================\n...\n
                                                As for customizing modules like Storage and Distribution, etc., users should consider compatibility with specific algorithms.

                                                "},{"location":"tutorials/general/decoupling/","title":"RL Algorithm Decoupling","text":"

                                                The actual performance of an RL algorithm is affected by various factors (e.g., different network architectures and experience usage strategies), which are difficult to quantify.

                                                Huang S, Dossa R F J, Raffin A, et al. The 37 Implementation Details of Proximal Policy Optimization[J]. The ICLR Blog Track 2023, 2022.

                                                RLLTE decouples RL algorithms into minimum primitives from the perspective of exploitation and exploration and provides abundant modules for development:

                                                • Xploit: Modules that focus on exploitation in RL.
                                                  • Encoder: Modules for processing observations and extracting features;
                                                  • Policy: Modules for interaction and learning;
                                                  • Storage: Modules for storing and replaying collected experiences;
                                                • Xplore: Modules that focus on exploration in RL.
                                                  • Distribution: Modules for sampling actions;
                                                  • Augmentation: Modules for observation augmentation;
                                                  • Reward: Intrinsic reward modules for enhancing exploration.

                                                Therefore, the core of RLLTE is not designed to provide specific RL algorithms but a toolkit for producing algorithms. Developers are free to use various built-in or customized modules to build RL algorithms.

                                                • See Fast Algorithm Development

                                                In particular, developers are allowed to replace modules of an implemented algorithm.

                                                • See Module Replacement for An Implemented Algorithm

                                                RLLTE is an extremely open framework that allows developers to try anything.

                                                "},{"location":"tutorials/general/fast/","title":"Fast Algorithm Development","text":"Open in Colab View on GitHub

                                                Developers only need three steps to implement an RL algorithm with RLLTE:

                                                Workflow

                                                1. Select an algorithm prototype;
                                                2. Select desired modules;
                                                3. Write an update function.

                                                The following example illustrates how to write an Advantage Actor-Critic (A2C) agent to solve Atari games.

                                                "},{"location":"tutorials/general/fast/#set-prototype","title":"Set prototype","text":"

                                                Firstly, we select OnPolicyAgent as the prototype

                                                from rllte.common.prototype import OnPolicyAgent\n\nclass A2C(OnPolicyAgent):\n    def __init__(self, env, tag, device, num_steps):\n        # here we only use four arguments\n        super().__init__(env=env, tag=tag, device=device, num_steps=num_steps)\n

                                                "},{"location":"tutorials/general/fast/#set-necessary-modules","title":"Set necessary modules","text":"

                                                Now we need an encoder to process observations, a learnable policy to generate actions, and a storage to store and sample experiences.

                                                from rllte.xploit.encoder import MnihCnnEncoder\nfrom rllte.xploit.policy import OnPolicySharedActorCritic\nfrom rllte.xploit.storage import VanillaRolloutStorage\nfrom rllte.xplore.distribution import Categorical\n

                                                "},{"location":"tutorials/general/fast/#set-update-function","title":"Set update function","text":"

                                                Run the .describe function of the selected policy and you will see the following output:

                                                OnPolicySharedActorCritic.describe()\n\n# Output:\n# ================================================================================\n# Name       : OnPolicySharedActorCritic\n# Structure  : self.encoder (shared by actor and critic), self.actor, self.critic\n# Forward    : obs -> self.encoder -> self.actor -> actions\n#            : obs -> self.encoder -> self.critic -> values\n#            : actions -> log_probs\n# Optimizers : self.optimizers['opt'] -> (self.encoder, self.actor, self.critic)\n# ================================================================================\n
                                                This will illustrate the structure of the policy and indicate the optimizable parts. Finally, merge these modules and write an .update function:
                                                from torch import nn\nimport torch as th\n\nclass A2C(OnPolicyAgent):\n    def __init__(self, env, tag, seed, device, num_steps) -> None:\n        super().__init__(env=env, tag=tag, seed=seed, device=device, num_steps=num_steps)\n        # create modules\n        encoder = MnihCnnEncoder(observation_space=env.observation_space, feature_dim=512)\n        policy = OnPolicySharedActorCritic(observation_space=env.observation_space,\n                                           action_space=env.action_space,\n                                           feature_dim=512,\n                                           opt_class=th.optim.Adam,\n                                           opt_kwargs=dict(lr=2.5e-4, eps=1e-5),\n                                           init_fn=\"xavier_uniform\"\n                                           )\n        storage = VanillaRolloutStorage(observation_space=env.observation_space,\n                                        action_space=env.action_space,\n                                        device=device,\n                                        storage_size=self.num_steps,\n                                        num_envs=self.num_envs,\n                                        batch_size=256\n                                        )\n        dist = Categorical()\n        # set all the modules\n        self.set(encoder=encoder, policy=policy, storage=storage, distribution=dist)\n\n    def update(self):\n        for _ in range(4):\n            for batch in self.storage.sample():\n                # evaluate the sampled actions\n                new_values, new_log_probs, entropy = self.policy.evaluate_actions(obs=batch.observations, actions=batch.actions)\n                # policy loss part\n                policy_loss = - (batch.adv_targ * new_log_probs).mean()\n                # value loss part\n                value_loss = 0.5 * (new_values.flatten() - batch.returns).pow(2).mean()\n                # update\n                self.policy.optimizers['opt'].zero_grad(set_to_none=True)\n                (value_loss * 0.5 + policy_loss - entropy * 0.01).backward()\n                nn.utils.clip_grad_norm_(self.policy.parameters(), 0.5)\n                self.policy.optimizers['opt'].step()\n

                                                "},{"location":"tutorials/general/fast/#start-training","title":"Start training","text":"

                                                Now we can start training by train.py

                                                from rllte.env import make_atari_env\nif __name__ == \"__main__\":\n    device = \"cuda\"\n    env = make_atari_env(\"AlienNoFrameskip-v4\", num_envs=8, seed=0, device=device)\n    agent = A2C(env=env, tag=\"a2c_atari\", seed=0, device=device, num_steps=128)\n    agent.train(num_train_steps=10000000)\n
                                                Run train.py and you will see the following output:
                                                [08/04/2023 02:19:06 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 02:19:06 PM] - [INFO.] - ================================================================================\n[08/04/2023 02:19:06 PM] - [INFO.] - Tag               : a2c_atari\n[08/04/2023 02:19:06 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 02:19:07 PM] - [DEBUG] - Agent             : A2C\n[08/04/2023 02:19:07 PM] - [DEBUG] - Encoder           : MnihCnnEncoder\n[08/04/2023 02:19:07 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 02:19:07 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 02:19:07 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 02:19:07 PM] - [DEBUG] - Augmentation      : False\n[08/04/2023 02:19:07 PM] - [DEBUG] - Intrinsic Reward  : False\n[08/04/2023 02:19:07 PM] - [DEBUG] - ================================================================================\n[08/04/2023 02:19:09 PM] - [TRAIN] - S: 1024        | E: 8           | L: 44          | R: 99.000      | FPS: 407.637   | T: 0:00:02    \n[08/04/2023 02:19:10 PM] - [TRAIN] - S: 2048        | E: 16          | L: 50          | R: 109.000     | FPS: 594.725   | T: 0:00:03    \n[08/04/2023 02:19:11 PM] - [TRAIN] - S: 3072        | E: 24          | L: 47          | R: 96.000      | FPS: 692.433   | T: 0:00:04    \n[08/04/2023 02:19:12 PM] - [TRAIN] - S: 4096        | E: 32          | L: 36          | R: 93.000      | FPS: 755.935   | T: 0:00:05    \n[08/04/2023 02:19:13 PM] - [TRAIN] - S: 5120        | E: 40          | L: 55          | R: 99.000      | FPS: 809.577   | T: 0:00:06    \n[08/04/2023 02:19:14 PM] - [TRAIN] - S: 6144        | E: 48          | L: 46          | R: 34.000      | FPS: 847.310   | T: 0:00:07    \n[08/04/2023 02:19:15 PM] - [TRAIN] - S: 7168        | E: 56          | L: 49          | R: 43.000      | FPS: 878.628   | T: 0:00:08   \n...\n

                                                As shown in this example, only a few dozen lines of code are needed to create RL agents with RLLTE.

                                                "},{"location":"tutorials/md/cann/","title":"with CANN","text":"

                                                AscendCL provides a collection of C language APIs for use in the development of DNN inference apps on Compute Architecture for Neural Networks (CANN). These APIs are designed for model and operator loading and execution, as well as media data processing, facilitating deep learning inference computing, graphics and image preprocessing, and single-operator accelerated computing on the Ascend CANN platform.

                                                "},{"location":"tutorials/md/cann/#prerequisites","title":"Prerequisites","text":"

                                                Get the complete repository from GitHub:

                                                git clone https://github.com/RLE-Foundation/rllte\n

                                                Download and install the following necessary libraries: CANN 6.0.1

                                                "},{"location":"tutorials/md/cann/#model-preparation","title":"Model preparation","text":"

                                                Take Ascend310 for example:

                                                atc --model=model/test_model.onnx --framework=5 --output=test_model --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape=\"input:1,9,84,84\"\n
                                                More details can be found in Learning Wizard.

                                                "},{"location":"tutorials/md/cann/#c-development","title":"C++ development","text":"
                                                • Include the header file #include \"acl/acl.h\"
                                                • The main workflow is
                                                  int main()\n{   \n    // 1. Define a resource initialization function for AscendCL initialization and runtime resource allocation (specifying a compute device).\n    InitResource();\n\n    // 2. Define a model loading function for loading the image classification model.\n    const char *modelPath = \"../model/test_model.om\";\n    LoadModel(modelPath);\n\n    // 3. Define a function for prepare data to the memory and transferring the data to the device.\n    LoadData()\n\n    // 4. Define an inference function for executing inference.\n    Inference();\n\n    // 5. Define a function for processing inference result data to print the class indexes of the top 5 confidence values of the test image.\n    PrintResult();\n\n    // 6. Define a function for unloading the image classification model.\n    UnloadModel();\n\n    // 7. Define a function for freeing the memory and destroying inference-related data to prevent memory leak.\n    UnloadData();\n\n    // 8. Define a resource deinitialization function for AscendCL deinitialization and runtime resource deallocation (releasing a compute device).\n    DestroyResource();\n}\n
                                                "},{"location":"tutorials/md/cann/#build-and-run","title":"Build and run","text":"
                                                cd ascend\nexport APP_SOURCE_PATH=<path_to_rllte_deployment>/ascend\nexport DDK_PATH=<path_to_ascend_toolkit>\nexport NPU_HOST_LIB=<path_to_ascend_devlib>\nchmod +x sample_build.sh\n./sample_build.sh\n./chmod +x sample_run.sh\n./sample_run.sh\n
                                                "},{"location":"tutorials/md/deployment/","title":"Model Deployment","text":"

                                                Currently, rllte supports model deployment by:

                                                • NVIDIA TensorRT
                                                • HUAWEI CANN

                                                The following content shows how to convert a rllte model into a .onnx model and deploy it on the corresponding devices.

                                                "},{"location":"tutorials/md/deployment/#with-nvidia-tensorrt","title":"with NVIDIA TensorRT","text":""},{"location":"tutorials/md/deployment/#prerequisites","title":"Prerequisites","text":"

                                                Get the complete repository from GitHub:

                                                git clone https://github.com/RLE-Foundation/rllte\n

                                                Download the following necessary libraries:

                                                • CUDA Toolkit Documentation v12.0
                                                • cuDNN v8.8.0 for CUDA 12.0
                                                • TensorRT 8.6.0 EA

                                                Meanwhile, install the following Python packages:

                                                • pycuda==2022.2.2
                                                • tensorrt==8.6.0
                                                • numpy==1.24.2
                                                • torch==2.0.0
                                                • onnx==1.14.0

                                                The following two examples can used to verify your installation:

                                                C++ Port
                                                cd deloyment/c++\nmkdir build && cd build\ncmake .. && make\n./DeployerTest ../../model/test_model.onnx\n
                                                Python Port
                                                cd deloyment/python\npython3 pth2onnx.py ../model/test_model.pth\n./trtexec --onnx=test_model.onnx --saveEngine=test_model.trt --skipInference\npython3 infer.py test_model.plan\n
                                                "},{"location":"tutorials/md/deployment/#use-in-your-c-project","title":"Use in Your C++ Project","text":"

                                                The following code illustrates how to include our library in you project: example.cpp

                                                // Including the header file in your cpp file.\n#inlude \"RLLTEDeployer.h\n\n// Declear an instance of Options, and configurate the parameters.\nOptions options;\noptions.deviceIndex = 0;  \noptions.doesSupportDynamicBatchSize = false;  \noptions.maxWorkspaceSize = 4000000000; \noptions.precision = Precision::FP16;\n\n// Declear an instance of Options, and configurate the parameters.\nRLLTEDeployer deployer(options);\n\n// Use the build member function to convert the onnx model to the TensorRT static model (plan).\ndeployer.build(path_of_onnx_model);\n\n// Use the loadPlan member function to load the converted model. If a path is given, \n// then it will search the path, or it will just search the current working directory.\ndeployer.loadPlan();\n\n// Use infer member funtion to execute the infer process. The input is the tensor with \n// relevant data type, and the output is a pointer with relevant data size and data type. T\n// he infer result will be moved to the output.\ndeployer.infer<float>(input, output, 1);\ndeployer.infer<float16_t>(input, output, 1);\ndeployer.infer<int8>(input, output, 1);\n\n...\n
                                                Please refer to the DeployerTest.cpp for the complete code.

                                                "},{"location":"tutorials/md/deployment/#with-cmake","title":"with CMake","text":"CMakeLists.txt
                                                find_package(CUDA REQUIRED)\ninclude_directories(${CUDA_INCLUDE_DIRS} ${Path_of_RLLTEDeployer_h}})\ntarget_link_libraries(YOUREXECUTEFILE ${PATH_OF_libRLLTEDeployer_so)\n
                                                "},{"location":"tutorials/md/deployment/#with-docker","title":"with Docker","text":"

                                                Install the NVIDIA docker via (make sure the NVIDIA driver is installed): install_docker.sh

                                                sudo apt-get install ca-certificates gnupg lsb-release\nsudo mkdir -p /etc/apt/keyrings\n\ncurl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg\necho \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable\" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null\nsudo apt-get update\nsudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin \ndistribution=$(. /etc/os-release;echo $ID$VERSION_ID)\n\ncurl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -\ncurl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list\n\nsudo apt-get update && sudo apt-get install -y nvidia-container-toolkit\nsudo systemctl restart docker\nsudo groupadd docker\nsudo gpasswd -a $USER docker\n

                                                Restart your device, and run the following command.

                                                sudo service docker restart\n

                                                Now you can run your model via: run_docker.sh

                                                docker pull jakeshihaoluo/rllte_deployment_env:0.0.1\ndocker run -it -v ${path_to_the_repo}:/rllte --gpus all jakeshihaoluo/rllte_deployment_env:0.0.1\ncd /rllte/deloyment/c++\nmkdir build && cd build\ncmake .. && make\n./DeployerTest ../../model/test_model.onnx\n

                                                "},{"location":"tutorials/md/deployment/#with-huawei-cann","title":"with HUAWEI CANN","text":"

                                                incoming...

                                                "},{"location":"tutorials/md/tensorrt/","title":"with TensorRT","text":""},{"location":"tutorials/md/tensorrt/#prerequisites","title":"Prerequisites","text":"

                                                Get the complete repository from GitHub:

                                                git clone https://github.com/RLE-Foundation/rllte\n

                                                Download the following necessary libraries:

                                                • CUDA Toolkit Documentation v12.0
                                                • cuDNN v8.8.0 for CUDA 12.0
                                                • TensorRT 8.6.0 EA

                                                Meanwhile, install the following Python packages:

                                                • pycuda==2022.2.2
                                                • tensorrt==8.6.0
                                                • numpy==1.24.2
                                                • torch==2.0.0
                                                • onnx==1.14.0

                                                The following two examples can used to verify your installation:

                                                C++ Port
                                                cd deloyment/c++\nmkdir build && cd build\ncmake .. && make\n./DeployerTest ../../model/test_model.onnx\n
                                                Python Port
                                                cd deloyment/python\npython3 pth2onnx.py ../model/test_model.pth\n./trtexec --onnx=test_model.onnx --saveEngine=test_model.trt --skipInference\npython3 infer.py test_model.plan\n
                                                "},{"location":"tutorials/md/tensorrt/#use-in-your-c-project","title":"Use in Your C++ Project","text":"

                                                The following code illustrates how to include our library in you project: example.cpp

                                                // Including the header file in your cpp file.\n#inlude \"RLLTEDeployer.h\n\n// Declear an instance of Options, and configurate the parameters.\nOptions options;\noptions.deviceIndex = 0;  \noptions.doesSupportDynamicBatchSize = false;  \noptions.maxWorkspaceSize = 4000000000; \noptions.precision = Precision::FP16;\n\n// Declear an instance of Options, and configurate the parameters.\nRLLTEDeployer deployer(options);\n\n// Use the build member function to convert the onnx model to the TensorRT static model (plan).\ndeployer.build(path_of_onnx_model);\n\n// Use the loadPlan member function to load the converted model. If a path is given, \n// then it will search the path, or it will just search the current working directory.\ndeployer.loadPlan();\n\n// Use infer member funtion to execute the infer process. The input is the tensor with \n// relevant data type, and the output is a pointer with relevant data size and data type. T\n// he infer result will be moved to the output.\ndeployer.infer<float>(input, output, 1);\ndeployer.infer<float16_t>(input, output, 1);\ndeployer.infer<int8>(input, output, 1);\n\n...\n
                                                Please refer to the DeployerTest.cpp for the complete code.

                                                "},{"location":"tutorials/md/tensorrt/#with-cmake","title":"with CMake","text":"CMakeLists.txt
                                                find_package(CUDA REQUIRED)\ninclude_directories(${CUDA_INCLUDE_DIRS} ${Path_of_RLLTEDeployer_h}})\ntarget_link_libraries(YOUREXECUTEFILE ${PATH_OF_libRLLTEDeployer_so)\n
                                                "},{"location":"tutorials/md/tensorrt/#with-docker","title":"with Docker","text":"

                                                Install the NVIDIA docker via (make sure the NVIDIA driver is installed): install_docker.sh

                                                sudo apt-get install ca-certificates gnupg lsb-release\nsudo mkdir -p /etc/apt/keyrings\n\ncurl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg\necho \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable\" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null\nsudo apt-get update\nsudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin \ndistribution=$(. /etc/os-release;echo $ID$VERSION_ID)\n\ncurl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -\ncurl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list\n\nsudo apt-get update && sudo apt-get install -y nvidia-container-toolkit\nsudo systemctl restart docker\nsudo groupadd docker\nsudo gpasswd -a $USER docker\n

                                                Restart your device, and run the following command.

                                                sudo service docker restart\n

                                                Now you can run your model via: run_docker.sh

                                                docker pull jakeshihaoluo/rllte_deployment_env:0.0.1\ndocker run -it -v ${path_to_the_repo}:/rllte --gpus all jakeshihaoluo/rllte_deployment_env:0.0.1\ncd /rllte/deloyment/c++\nmkdir build && cd build\ncmake .. && make\n./DeployerTest ../../model/test_model.onnx\n

                                                "},{"location":"tutorials/me/comp/","title":"Performance Comparison of Multiple Algorithms","text":"Open in Colab View on GitHub"},{"location":"tutorials/me/comp/#download-data","title":"Download Data","text":"

                                                Suppose we want to evaluate algorithm performance on the Procgen benchmark. First, download the data from rllte-hub: example.py

                                                # load packages\nfrom rllte.evaluation import Performance, Comparison, min_max_normalize\nfrom rllte.hub.datasets import Procgen, Atari\nimport numpy as np\n# load scores\nprocgen = Procgen()\nprocgen_scores = procgen.load_scores()\nprint(procgen_scores.keys())\n# get ppo-normalized scores\nppo_norm_scores = dict()\nMIN_SCORES = np.zeros_like(procgen_scores['ppo'])\nMAX_SCORES = np.mean(procgen_scores['ppo'], axis=0)\nfor algo in procgen_scores.keys():\n    ppo_norm_scores[algo] = min_max_normalize(procgen_scores[algo],\n                                              min_scores=MIN_SCORES,\n                                              max_scores=MAX_SCORES)\n\n# Output:\n# dict_keys(['ppg', 'mixreg', 'ppo', 'idaac', 'plr', 'ucb-drac'])\n
                                                For each algorithm, this will return a NdArray of size (10 x 16) where scores[n][m] represent the score on run n of task m.

                                                "},{"location":"tutorials/me/comp/#performance-comparison","title":"Performance Comparison","text":"

                                                Comparison module allows you to compare the performance between two algorithms: example.py

                                                comp = Comparison(scores_x=ppo_norm_scores['PPG'],\n                  scores_y=ppo_norm_scores['PPO'],\n                  get_ci=True)\ncomp.compute_poi()\n\n# Output:\n# (0.8153125, array([[0.779375  ], [0.85000781]]))\n
                                                This indicates the overall probability of imporvement of PPG over PPO is 0.8153125.

                                                Available metrics:

                                                Metric Remark .compute_poi Compute the overall probability of imporvement of algorithm X over Y."},{"location":"tutorials/me/perf/","title":"Performance Evaluation of Single Algorithm","text":"Open in Colab View on GitHub

                                                RLLTE provides evaluation methods based on:

                                                Agarwal R, Schwarzer M, Castro P S, et al. Deep reinforcement learning at the edge of the statistical precipice[J]. Advances in neural information processing systems, 2021, 34: 29304-29320.

                                                We reconstruct and improve the code of the official repository rliable, achieving higher convenience and efficiency.

                                                "},{"location":"tutorials/me/perf/#download-data","title":"Download Data","text":"

                                                Suppose we want to evaluate algorithm performance on the Procgen benchmark. First, download the data from rllte-hub: example.py

                                                # load packages\nfrom rllte.evaluation import Performance, Comparison, min_max_normalize\nfrom rllte.hub.datasets import Procgen, Atari\nimport numpy as np\n# load scores\nprocgen = Procgen()\nprocgen_scores = procgen.load_scores()\nprint(procgen_scores.keys())\n# get ppo-normalized scores\nppo_norm_scores = dict()\nMIN_SCORES = np.zeros_like(procgen_scores['ppo'])\nMAX_SCORES = np.mean(procgen_scores['ppo'], axis=0)\nfor algo in procgen_scores.keys():\n    ppo_norm_scores[algo] = min_max_normalize(procgen_scores[algo],\n                                              min_scores=MIN_SCORES,\n                                              max_scores=MAX_SCORES)\n\n# Output:\n# dict_keys(['ppg', 'mixreg', 'ppo', 'idaac', 'plr', 'ucb-drac'])\n
                                                For each algorithm, this will return a NdArray of size (10 x 16) where scores[n][m] represent the score on run n of task m.

                                                "},{"location":"tutorials/me/perf/#performance-evaluation","title":"Performance Evaluation","text":"

                                                Initialize the performance evaluator: example.py

                                                perf = Performance(scores=ppo_norm_scores['PPO'], \n                   get_ci=True # get confidence intervals\n                   )\nperf.aggregate_mean()\n\n# Output:\n# Computing confidence interval for aggregate MEAN...\n# (1.0, array([[0.9737281 ], [1.02564405]]))\n
                                                Available metrics:

                                                Metric Remark .aggregate_mean Computes mean of sample mean scores per task. .aggregate_median Computes median of sample mean scores per task. .aggregate_og Computes optimality gap across all runs and tasks. .aggregate_iqm Computes the interquartile mean across runs and tasks. .create_performance_profile Computes the performance profiles."},{"location":"tutorials/me/visual/","title":"Metrics Visualization","text":"Open in Colab View on GitHub"},{"location":"tutorials/me/visual/#download-data","title":"Download Data","text":"

                                                Suppose we want to visualize algorithm performance on the Procgen benchmark. First, download the data from rllte-hub: example.py

                                                # load packages\nfrom rllte.evaluation import Performance, Comparison, min_max_normalize\nfrom rllte.hub.datasets import Procgen, Atari\nfrom rllte.evaluation import (plot_interval_estimates,\n                              plot_probability_improvement,\n                              plot_sample_efficiency_curve,\n                              plot_performance_profile)\nimport numpy as np\n# load scores\nprocgen = Procgen()\nprocgen_scores = procgen.load_scores()\nprint(procgen_scores.keys())\n# get ppo-normalized scores\nppo_norm_scores = dict()\nMIN_SCORES = np.zeros_like(procgen_scores['ppo'])\nMAX_SCORES = np.mean(procgen_scores['ppo'], axis=0)\nfor algo in procgen_scores.keys():\n    ppo_norm_scores[algo] = min_max_normalize(procgen_scores[algo],\n                                              min_scores=MIN_SCORES,\n                                              max_scores=MAX_SCORES)\n\n# Output:\n# dict_keys(['ppg', 'mixreg', 'ppo', 'idaac', 'plr', 'ucb-drac'])\n
                                                For each algorithm, this will return a NdArray of size (10 x 16) where scores[n][m] represent the score on run n of task m.

                                                "},{"location":"tutorials/me/visual/#visualization","title":"Visualization","text":""},{"location":"tutorials/me/visual/#plot_interval_estimates","title":".plot_interval_estimates","text":"

                                                .plot_interval_estimates can plot various performance metrics of algorithms with stratified confidence intervals. Take Procgen for example, we want to plot four reliable metrics computed by Performance evaluator: example.py

                                                # construct a performance dict\naggregate_performance_dict = {\n    \"MEAN\": {},\n    \"MEDIAN\": {},\n    \"IQM\": {},\n    \"OG\": {}\n}\nfor algo in ppo_norm_scores.keys():\n    perf = Performance(scores=ppo_norm_scores[algo], get_ci=True)\n    aggregate_performance_dict['MEAN'][algo] = perf.aggregate_mean()\n    aggregate_performance_dict['MEDIAN'][algo] = perf.aggregate_median()\n    aggregate_performance_dict['IQM'][algo] = perf.aggregate_iqm()\n    aggregate_performance_dict['OG'][algo] = perf.aggregate_og()\n\n# plot all the four metrics of all the algorithms\nfig, axes = plot_interval_estimates(aggregate_performance_dict,\n                                    metric_names=['MEAN', 'MEDIAN', 'IQM', 'OG'],\n                                    algorithms=['PPO', 'MixReg', 'UCB-DrAC', 'PLR', 'PPG', 'IDAAC'],\n                                    xlabel=\"PPO-Normalized Score\")\nfig.savefig('./plot_interval_estimates1.png', format='png', bbox_inches='tight')\n\n# plot two metrics of all the algorithms\nfig, axes = plot_interval_estimates(aggregate_performance_dict,\n                        metric_names=['MEAN', 'MEDIAN'],\n                        algorithms=['PPO', 'MixReg', 'UCB-DrAC', 'PLR', 'PPG', 'IDAAC'],\n                        xlabel=\"PPO-Normalized Score\")\nfig.savefig('./plot_interval_estimates2.png', format='png', bbox_inches='tight')\n\n# plot two metrics of three algorithms\nfig, axes = plot_interval_estimates(aggregate_performance_dict,\n                        metric_names=['MEAN', 'MEDIAN'],\n                        algorithms=['ppg', 'mixreg', 'ppo'],\n                        xlabel=\"PPO-Normalized Score\",\n                        xlabel_y_coordinate=-0.4)\nfig.savefig('./plot_interval_estimates3.png', format='png', bbox_inches='tight')\n
                                                The output figures are:

                                                "},{"location":"tutorials/me/visual/#plot_probability_improvement","title":".plot_probability_improvement","text":"

                                                .plot_probability_improvement plots probability of improvement with stratified confidence intervals. An example is: example.py

                                                # construct a comparison dict\npairs = [['IDAAC', 'PPG'], ['IDAAC', 'UCB-DrAC'], ['IDAAC', 'PPO'],\n    ['PPG', 'PPO'], ['UCB-DrAC', 'PLR'], \n    ['PLR', 'MixReg'], ['UCB-DrAC', 'MixReg'],  ['MixReg', 'PPO']]\n\nprobability_of_improvement_dict = {}\nfor pair in pairs:\n    comp = Comparison(scores_x=ppo_norm_scores[pair[0]], \n                      scores_y=ppo_norm_scores[pair[1]],\n                      get_ci=True)\n    probability_of_improvement_dict['_'.join(pair)] = comp.compute_poi()\n\nfig, ax = plot_probability_improvement(poi_dict=probability_of_improvement_dict)\nfig.savefig('./plot_probability_improvement.png', format='png', bbox_inches='tight')\n
                                                The output figure is:

                                                "},{"location":"tutorials/me/visual/#plot_performance_profile","title":".plot_performance_profile","text":"

                                                .plot_performance_profile plots performance profiles with stratified confidence intervals. An example is: example.py

                                                profile_dict = dict()\nprocgen_tau = np.linspace(0.5, 3.6, 101)\n\nfor algo in ppo_norm_scores.keys():\n    perf = Performance(scores=ppo_norm_scores[algo], get_ci=True, reps=2000)\n    profile_dict[algo] = perf.create_performance_profile(tau_list=procgen_tau)\n\nfig, axes = plot_performance_profile(profile_dict, \n                         procgen_tau,\n                         figsize=(7, 5),\n                         xlabel=r'PPO-Normalized Score $(\\tau)$',\n                         )\nfig.savefig('./plot_performance_profile.png', format='png', bbox_inches='tight')\n
                                                The output figure is:

                                                "},{"location":"tutorials/me/visual/#plot_sample_efficiency_curve","title":".plot_sample_efficiency_curve","text":"

                                                .plot_sample_efficiency_curve plots an aggregate metric with CIs as a function of environment frames. An example is: example.py

                                                # get Atari games' curve data\nale_all_frames_scores_dict = Atari().load_curves()\nprint(ale_all_frames_scores_dict.keys())\nprint(ale_all_frames_scores_dict['C51'].shape)\n# Output:\n# dict_keys(['C51', 'DQN (Adam)', 'DQN (Nature)', 'Rainbow', 'IQN', 'REM', 'M-IQN', 'DreamerV2'])\n# (5, 55, 200)\n# 200 data points of 55 games over 5 random seeds\n\nframes = np.array([1, 10, 25, 50, 75, 100, 125, 150, 175, 200]) - 1\n\nsampling_dict = dict()\nfor algo in ale_all_frames_scores_dict.keys():\n    sampling_dict[algo] = [[], [], []]\n    for frame in frames:\n        perf = Performance(ale_all_frames_scores_dict[algo][:, :, frame],\n                           get_ci=True, \n                           reps=2000)\n        value, CIs = perf.aggregate_iqm()\n        sampling_dict[algo][0].append(value)\n        sampling_dict[algo][1].append(CIs[0]) # lower bound\n        sampling_dict[algo][2].append(CIs[1]) # upper bound\n\n    sampling_dict[algo][0] = np.array(sampling_dict[algo][0]).reshape(-1)\n    sampling_dict[algo][1] = np.array(sampling_dict[algo][1]).reshape(-1)\n    sampling_dict[algo][2] = np.array(sampling_dict[algo][2]).reshape(-1)\n\nalgorithms = ['C51', 'DQN (Adam)', 'DQN (Nature)', 'Rainbow', 'IQN', 'REM', 'M-IQN', 'DreamerV2']\nfig, axes = plot_sample_efficiency_curve(\n    sampling_dict,\n    frames+1, \n    figsize=(7, 4.5),\n    algorithms=algorithms,\n    xlabel=r'Number of Frames (in millions)',\n    ylabel='IQM Human Normalized Score')\nfig.savefig('./plot_sample_efficiency_curve.png', format='png', bbox_inches='tight')\n
                                                The output figure is:

                                                "},{"location":"tutorials/mt/irs/","title":"Intrinsic Reward Shaping for Enhancing Exploration","text":"Open in Colab View on GitHub

                                                Since RLLTE decouples RL algorithms into minimum primitives from the perspective of exploitation and exploration, intrinsic reward shaping is supported by default. Due to the large differences in the calculation of different intrinsic reward methods, RLLTE has the following rules:

                                                1. The environments are assumed to be vectorized;
                                                2. The compute_irs function of each intrinsic reward module has a mandatory argument samples, which is a dict like:
                                                  • obs (n_steps, n_envs, *obs_shape), torch.Tensor
                                                  • actions (n_steps, n_envs, *action_shape) torch.Tensor
                                                  • rewards (n_steps, n_envs) torch.Tensor
                                                  • next_obs (n_steps, n_envs, *obs_shape) torch.Tensor

                                                Take RE3 for instance, it computes the intrinsic reward for each state based on the Euclidean distance between the state and its \\(k\\)-nearest neighbor within a mini-batch. Thus it suffices to provide obs data to compute the reward. The following code provides a usage example of RE3: example.py

                                                from rllte.xplore.reward import RE3\nfrom rllte.env import make_dmc_env\nimport torch as th\n\nif __name__ == '__main__':\n    num_envs = 7\n    num_steps = 128\n    # create env\n    env = make_dmc_env(env_id=\"cartpole_balance\", num_envs=num_envs)\n    print(env.observation_space, env.action_space)\n    # create RE3 instance\n    re3 = RE3(\n        observation_space=env.observation_space,\n        action_space=env.action_space\n    )\n    # compute intrinsic rewards\n    obs = th.rand(size=(num_steps, num_envs, *env.observation_space.shape))\n    intrinsic_rewards = re3.compute_irs(samples={'obs': obs})\n\n    print(intrinsic_rewards.shape, type(intrinsic_rewards))\n    print(intrinsic_rewards)\n\n# Output:\n# {'shape': [9, 84, 84]} {'shape': [1], 'type': 'Box', 'range': [-1.0, 1.0]}\n# torch.Size([128, 7]) <class 'torch.Tensor'>\n

                                                You can also invoke the intrinsic reward module in all the implemented algorithms directly by .set function: example.py

                                                from rllte.agent import PPO\nfrom rllte.env import make_atari_env\nfrom rllte.xplore.reward import RE3\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_atari_env(device=device)\n    eval_env = make_atari_env(device=device)\n    # create agent\n    agent = PPO(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"ppo_atari\")\n    # create intrinsic reward\n    re3 = RE3(observation_space=env.observation_space,\n              action_space=env.action_space,\n              device=device)\n    # set the module\n    agent.set(reward=re3)\n    # start training\n    agent.train(num_train_steps=5000)\n
                                                Run example.py and you'll see the intrinsic reward module is invoked:
                                                [08/04/2023 03:54:10 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 03:54:10 PM] - [INFO.] - ================================================================================\n[08/04/2023 03:54:10 PM] - [INFO.] - Tag               : ppo_atari\n[08/04/2023 03:54:10 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 03:54:11 PM] - [DEBUG] - Agent             : PPO\n[08/04/2023 03:54:11 PM] - [DEBUG] - Encoder           : MnihCnnEncoder\n[08/04/2023 03:54:11 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 03:54:11 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 03:54:11 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 03:54:11 PM] - [DEBUG] - Augmentation      : False\n[08/04/2023 03:54:11 PM] - [DEBUG] - Intrinsic Reward  : True, RE3\n[08/04/2023 03:54:11 PM] - [DEBUG] - ================================================================================\n

                                                "},{"location":"tutorials/mt/oa/","title":"Observation Augmentation for Sample Efficiency and Generalization","text":"Open in Colab View on GitHub

                                                Observation augmentation is an efficient approach to improve sample efficiency and generalization, which is also a basic primitive of RLLTE.

                                                • Laskin M, Lee K, Stooke A, et al. Reinforcement learning with augmented data[J]. Advances in neural information processing systems, 2020, 33: 19884-19895.
                                                • Yarats D, Fergus R, Lazaric A, et al. Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning[C]//International Conference on Learning Representations. 2021.

                                                RLLTE implements the augmentation modules via a PyTorch-NN manner, and both imaged-based and state-based observations are supported. A code example is: example.py

                                                from rllte.agent import DrAC\nfrom rllte.env import make_atari_env\nfrom rllte.xplore.augmentation import RandomCrop\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_atari_env(device=device)\n    eval_env = make_atari_env(device=device)\n    # create agent\n    agent = DrAC(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"drac_atari\")\n    # create augmentation module\n    random_crop = RandomCrop()\n    # set the module\n    agent.set(augmentation=random_crop)\n    # start training\n    agent.train(num_train_steps=5000)\n
                                                Run example.py and you'll see the augmentation module is invoked:
                                                [08/04/2023 05:00:15 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 05:00:15 PM] - [INFO.] - ================================================================================\n[08/04/2023 05:00:15 PM] - [INFO.] - Tag               : drac_atari\n[08/04/2023 05:00:16 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 05:00:16 PM] - [DEBUG] - Agent             : DrAC\n[08/04/2023 05:00:16 PM] - [DEBUG] - Encoder           : MnihCnnEncoder\n[08/04/2023 05:00:16 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 05:00:16 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 05:00:16 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 05:00:16 PM] - [DEBUG] - Augmentation      : True, RandomCrop\n[08/04/2023 05:00:16 PM] - [DEBUG] - Intrinsic Reward  : False\n[08/04/2023 05:00:16 PM] - [DEBUG] - ================================================================================\n...\n

                                                Compatibility of augmentation

                                                Note that the module will only make difference when the algorithm supports data augmentation. Please refer to https://docs.rllte.dev/api/ for the compatibility.

                                                "},{"location":"tutorials/mt/pre-training/","title":"Pre-training with Intrinsic Rewards","text":"Open in Colab View on GitHub"},{"location":"tutorials/mt/pre-training/#pre-training","title":"Pre-training","text":"

                                                Currently, RLLTE only supports online pre-training via intrinsic reward. To turn on the pre-training mode, it suffices to write a train.py like: train.py

                                                from rllte.agent import PPO\nfrom rllte.env import make_atari_env\nfrom rllte.xplore.reward import RE3\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_atari_env(device=device)\n    eval_env = make_atari_env(device=device)\n    # create agent and turn on pre-training mode\n    agent = PPO(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"ppo_atari\",\n                pretraining=True)\n    # create intrinsic reward\n    re3 = RE3(observation_space=env.observation_space,\n              action_space=env.action_space,\n              device=device)\n    # set the reward module\n    agent.set(reward=re3)\n    # start training\n    agent.train(num_train_steps=5000)\n
                                                Run train.py and you'll see the pre-training mode is on:
                                                [08/04/2023 05:05:54 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 05:05:54 PM] - [INFO.] - ================================================================================\n[08/04/2023 05:05:54 PM] - [INFO.] - Tag               : ppo_atari\n[08/04/2023 05:05:54 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 05:05:54 PM] - [DEBUG] - Agent             : PPO\n[08/04/2023 05:05:54 PM] - [DEBUG] - Encoder           : MnihCnnEncoder\n[08/04/2023 05:05:54 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 05:05:54 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 05:05:54 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 05:05:54 PM] - [DEBUG] - Augmentation      : False\n[08/04/2023 05:05:54 PM] - [DEBUG] - Intrinsic Reward  : True, RE3\n[08/04/2023 05:05:54 PM] - [INFO.] - Pre-training Mode : On\n[08/04/2023 05:05:54 PM] - [DEBUG] - ================================================================================\n...\n

                                                Tip

                                                When the pre-training mode is on, a reward module must be specified!

                                                For all supported reward modules, see API Documentation.

                                                "},{"location":"tutorials/mt/pre-training/#fine-tuning","title":"Fine-tuning","text":"

                                                Once the pre-training is finished, you can find the model parameters in the pretrained subfolder of the working directory. To load the parameters, just turn off the pre-training mode and load the parameters with .load() function:

                                                train.py

                                                from rllte.agent import PPO\nfrom rllte.env import make_atari_env\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_atari_env(device=device)\n    eval_env = make_atari_env(device=device)\n    # create agent and turn off pre-training mode\n    agent = PPO(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"ppo_atari\",\n                pretraining=False)\n    # start training\n    agent.train(num_train_steps=5000,\n                init_model_path=\"/export/yuanmingqi/code/rllte/logs/ppo_atari/2023-06-05-02-42-12/pretrained/pretrained.pth\")\n
                                                Run train.py and you'll see the pre-trained model parameters are loaded:
                                                [08/04/2023 05:07:52 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 05:07:52 PM] - [INFO.] - ================================================================================\n[08/04/2023 05:07:52 PM] - [INFO.] - Tag               : ppo_atari\n[08/04/2023 05:07:52 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 05:07:53 PM] - [DEBUG] - Agent             : PPO\n[08/04/2023 05:07:53 PM] - [DEBUG] - Encoder           : MnihCnnEncoder\n[08/04/2023 05:07:53 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 05:07:53 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 05:07:53 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 05:07:53 PM] - [DEBUG] - Augmentation      : False\n[08/04/2023 05:07:53 PM] - [DEBUG] - Intrinsic Reward  : False\n[08/04/2023 05:07:53 PM] - [DEBUG] - ================================================================================\n[08/04/2023 05:07:53 PM] - [INFO.] - Loading Initial Parameters from ./logs/ppo_atari/...\n...\n

                                                "},{"location":"tutorials/mt/quick_start/","title":"Quick Start","text":"Open in Colab View on GitHub

                                                RLLTE provides reliable implementations for highly-recognized RL algorithms, and users can build applications with very simple code.

                                                "},{"location":"tutorials/mt/quick_start/#on-nvidia-gpu","title":"On NVIDIA GPU","text":"

                                                Suppose we want to use DrQ-v2 to solve a task of DeepMind Control Suite, and it suffices to write a train.py like:

                                                train.py
                                                # import `env` and `agent` module\nfrom rllte.env import make_dmc_env \nfrom rllte.agent import DrQv2\n\nif __name__ == \"__main__\":\n    device = \"cuda:0\"\n    # create env, and `eval_env` is optional\n    env = make_dmc_env(env_id=\"cartpole_balance\", device=device)\n    eval_env = make_dmc_env(env_id=\"cartpole_balance\", device=device)\n    # create agent\n    agent = DrQv2(env=env, \n                  eval_env=eval_env, \n                  device='cuda',\n                  tag=\"drqv2_dmc_pixel\")\n    # start training\n    agent.train(num_train_steps=5000, log_interval=1000)\n

                                                Run train.py and you will see the following output:

                                                Read the logs

                                                • S: Number of environment steps. Note that S isn't equal to the number of frames in visual tasks, and number_of_frames=number_of_steps * number_of_action_repeats
                                                • E: Number of environment episodes.
                                                • L: Average episode length.
                                                • R: Average episode reward.
                                                • FPS: Training FPS.
                                                • T: Time costs.
                                                "},{"location":"tutorials/mt/quick_start/#on-huawei-npu","title":"On HUAWEI NPU","text":"

                                                Similarly, if we want to train an agent on HUAWEI NPU, it suffices to replace cuda with npu: train.py

                                                device = \"npu:0\"\n

                                                Compatibility of NPU

                                                Please refer to https://docs.rllte.dev/api/ for the compatibility of NPU.

                                                "},{"location":"tutorials/mt/quick_start/#load-the-trained-model","title":"Load the trained model","text":"

                                                Once the training is finished, you can find agent.pth in the subfolder model of the specified working directory.

                                                play.py
                                                import torch as th\n\n# load the model and specify the map location\nagent = th.load(\"agent.pth\", map_location=th.device('cpu'))\nobs = th.zeros(size=(1, 9, 84, 84))\naction = agent(obs)\nprint(action)\n\n# Output: tensor([[-1.0000]], grad_fn=<TanhBackward0>)\n
                                                "},{"location":"tutorials/mt/replacement/","title":"Module Replacement for An Implemented Algorithm","text":"Open in Colab View on GitHub

                                                RLLTE allows developers to replace settled modules of implemented algorithms to make performance comparison and algorithm improvement.

                                                "},{"location":"tutorials/mt/replacement/#use-built-in-modules","title":"Use built-in modules","text":"

                                                For instance, we want to use PPO agent to solve Atari games, it suffices to write train.py like: train.py

                                                from rllte.agent import PPO\nfrom rllte.env import make_atari_env\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_atari_env(device=device)\n    eval_env = make_atari_env(device=device)\n    # create agent\n    agent = PPO(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"ppo_atari\")\n    # start training\n    agent.train(num_train_steps=5000)\n
                                                Run train.py and you'll see the following output:
                                                [08/04/2023 03:45:54 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 03:45:54 PM] - [INFO.] - ================================================================================\n[08/04/2023 03:45:54 PM] - [INFO.] - Tag               : ppo_atari\n[08/04/2023 03:45:54 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 03:45:55 PM] - [DEBUG] - Agent             : PPO\n[08/04/2023 03:45:55 PM] - [DEBUG] - Encoder           : MnihCnnEncoder\n[08/04/2023 03:45:55 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 03:45:55 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 03:45:55 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 03:45:55 PM] - [DEBUG] - Augmentation      : False\n[08/04/2023 03:45:55 PM] - [DEBUG] - Intrinsic Reward  : False\n[08/04/2023 03:45:55 PM] - [DEBUG] - ================================================================================\n[08/04/2023 03:45:56 PM] - [EVAL.] - S: 0           | E: 0           | L: 23          | R: 24.000      | T: 0:00:02    \n[08/04/2023 03:45:57 PM] - [TRAIN] - S: 1024        | E: 8           | L: 44          | R: 99.000      | FPS: 346.187   | T: 0:00:02    \n[08/04/2023 03:45:58 PM] - [TRAIN] - S: 2048        | E: 16          | L: 58          | R: 207.000     | FPS: 514.168   | T: 0:00:03    \n[08/04/2023 03:45:59 PM] - [TRAIN] - S: 3072        | E: 24          | L: 43          | R: 70.000      | FPS: 619.411   | T: 0:00:04    \n[08/04/2023 03:46:00 PM] - [TRAIN] - S: 4096        | E: 32          | L: 43          | R: 67.000      | FPS: 695.523   | T: 0:00:05    \n[08/04/2023 03:46:00 PM] - [INFO.] - Training Accomplished!\n[08/04/2023 03:46:00 PM] - [INFO.] - Model saved at: /export/yuanmingqi/code/rllte/logs/ppo_atari/2023-08-04-03-45-54/model\n

                                                Suppose we want to use a ResNet-based encoder, it suffices to replace the encoder module using .set function: train.py

                                                from rllte.agent import PPO\nfrom rllte.env import make_atari_env\nfrom rllte.xploit.encoder import EspeholtResidualEncoder\n\nif __name__ == \"__main__\":\n    # env setup\n    device = \"cuda:0\"\n    env = make_atari_env(device=device)\n    eval_env = make_atari_env(device=device)\n    # create agent\n    feature_dim = 512\n    agent = PPO(env=env, \n                eval_env=eval_env, \n                device=device,\n                tag=\"ppo_atari\",\n                feature_dim=feature_dim)\n    # create a new encoder\n    encoder = EspeholtResidualEncoder(\n        observation_space=env.observation_space,\n        feature_dim=feature_dim)\n    # set the new encoder\n    agent.set(encoder=encoder)\n    # start training\n    agent.train(num_train_steps=5000)\n
                                                Run train.py and you'll see the old MnihCnnEncoder has been replaced by EspeholtResidualEncoder:
                                                [08/04/2023 03:46:38 PM] - [INFO.] - Invoking RLLTE Engine...\n[08/04/2023 03:46:38 PM] - [INFO.] - ================================================================================\n[08/04/2023 03:46:38 PM] - [INFO.] - Tag               : ppo_atari\n[08/04/2023 03:46:38 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090\n[08/04/2023 03:46:38 PM] - [DEBUG] - Agent             : PPO\n[08/04/2023 03:46:38 PM] - [DEBUG] - Encoder           : EspeholtResidualEncoder\n[08/04/2023 03:46:38 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic\n[08/04/2023 03:46:38 PM] - [DEBUG] - Storage           : VanillaRolloutStorage\n[08/04/2023 03:46:38 PM] - [DEBUG] - Distribution      : Categorical\n[08/04/2023 03:46:38 PM] - [DEBUG] - Augmentation      : False\n[08/04/2023 03:46:38 PM] - [DEBUG] - Intrinsic Reward  : False\n[08/04/2023 03:46:38 PM] - [DEBUG] - ================================================================================\n...\n
                                                For more replaceable modules, please refer to https://docs.rllte.dev/api/.

                                                "},{"location":"tutorials/mt/replacement/#using-custom-modules","title":"Using custom modules","text":"

                                                Developers can also perform replacement using custom modules, see Make A Custom Module for more details.

                                                "}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml new file mode 100644 index 00000000..7b6de8a4 --- /dev/null +++ b/sitemap.xml @@ -0,0 +1,603 @@ + + + + https://docs.rllte.dev/ + 2024-05-14 + daily + + + https://docs.rllte.dev/README-zh-Hans/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_old/ + 2024-05-14 + daily + + + https://docs.rllte.dev/benchmarks/ + 2024-05-14 + daily + + + https://docs.rllte.dev/changelog/ + 2024-05-14 + daily + + + https://docs.rllte.dev/contributing/ + 2024-05-14 + daily + + + https://docs.rllte.dev/copilot/ + 2024-05-14 + daily + + + https://docs.rllte.dev/getting_started/ + 2024-05-14 + daily + + + https://docs.rllte.dev/hub/ + 2024-05-14 + daily + + + https://docs.rllte.dev/license/ + 2024-05-14 + daily + + + https://docs.rllte.dev/verification/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/daac/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/drac/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/drdaac/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/drqv2/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/impala/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/ppg/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/legacy/a2c/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/legacy/ddpg/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/legacy/dqn/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/legacy/ppo/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/legacy/sac/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/agent/legacy/sacd/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/initialization/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/logger/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/preprocessing/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/timer/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/base_agent/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/base_augmentation/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/base_distribution/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/base_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/base_policy/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/base_reward/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/base_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/distributed_agent/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/off_policy_agent/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/common/prototype/on_policy_agent/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/env/utils/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/env/atari/__init__/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/env/bullet/__init__/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/env/dmc/__init__/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/env/minigrid/__init__/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/env/procgen/__init__/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/evaluation/comparison/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/evaluation/performance/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/evaluation/utils/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/evaluation/visualization/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/hub/atari/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/hub/dmc/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/hub/minigrid/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/hub/procgen/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/encoder/espeholt_residual_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/encoder/identity_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/encoder/mnih_cnn_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/encoder/pathak_cnn_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/encoder/raffin_combined_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/encoder/tassa_cnn_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/encoder/vanilla_mlp_encoder/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/policy/distributed_actor_learner/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/policy/off_policy_det_actor_double_critic/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/policy/off_policy_double_actor_double_critic/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/policy/off_policy_double_qnetwork/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/policy/off_policy_stoch_actor_double_critic/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/policy/on_policy_decoupled_actor_critic/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/policy/on_policy_shared_actor_critic/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/dict_replay_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/dict_rollout_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/her_replay_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/nstep_replay_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/prioritized_replay_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/vanilla_distributed_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/vanilla_replay_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xploit/storage/vanilla_rollout_storage/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/gaussian_noise/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/grayscale/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/identity/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_amplitude_scaling/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_colorjitter/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_convolution/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_crop/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_cutout/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_cutoutcolor/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_flip/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_rotate/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_shift/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/augmentation/random_translate/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/bernoulli/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/categorical/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/diagonal_gaussian/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/multi_categorical/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/normal_noise/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/ornstein_uhlenbeck_noise/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/squashed_normal/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/distribution/truncated_normal_noise/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/girm/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/icm/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/ngu/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/pseudo_counts/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/re3/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/revd/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/ride/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/rise/ + 2024-05-14 + daily + + + https://docs.rllte.dev/api_docs/xplore/reward/rnd/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/custom/environment/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/custom/module/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/general/decoupling/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/general/fast/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/md/cann/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/md/deployment/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/md/tensorrt/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/me/comp/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/me/perf/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/me/visual/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/mt/irs/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/mt/oa/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/mt/pre-training/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/mt/quick_start/ + 2024-05-14 + daily + + + https://docs.rllte.dev/tutorials/mt/replacement/ + 2024-05-14 + daily + + \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz new file mode 100644 index 00000000..37658c79 Binary files /dev/null and b/sitemap.xml.gz differ diff --git a/tutorials/custom/environment/index.html b/tutorials/custom/environment/index.html new file mode 100644 index 00000000..4a95f4b2 --- /dev/null +++ b/tutorials/custom/environment/index.html @@ -0,0 +1,4075 @@ + + + + + + + + + + + + + + + + + + + + + Custom Environment - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Custom Environment

                                                + + + + +

                                                Environment definition

                                                +

                                                To use custom environments in RLLTE, it suffices to follow the gymnasium interface and prepare your environment following Tutorials: Make Your Own Custom Environment. A example is: +

                                                example.py
                                                import gymnasium as gym
                                                +import numpy as np
                                                +
                                                +class CustomEnv(gym.Env):
                                                +    def __init__(self, total_length) -> None:
                                                +        super().__init__()
                                                +        self.observation_space = gym.spaces.Box(
                                                +            shape=(9, 84, 84),
                                                +            high=255.0,
                                                +            low=0.,
                                                +            dtype=np.uint8
                                                +        )
                                                +        self.action_space = gym.spaces.Box(
                                                +            shape=(7,),
                                                +            high=1.,
                                                +            low=-1.,
                                                +            dtype=np.float32
                                                +        )
                                                +        self.total_length = total_length
                                                +        self.count = 0
                                                +
                                                +    def step(self, action):
                                                +        obs = self.observation_space.sample()
                                                +        reward = np.random.rand()
                                                +        if self.count < self.total_length:
                                                +            terminated = truncated = False
                                                +        else:
                                                +            terminated = truncated = True
                                                +        info = {"discount": 0.99}
                                                +        self.count += 1
                                                +
                                                +        return obs, reward, terminated, truncated, info
                                                +
                                                +    def reset(self, seed=None, options=None):
                                                +        self.count = 0
                                                +        return self.observation_space.sample(), {"discount": 0.99}
                                                +

                                                +

                                                Use make_rllte_env

                                                +

                                                In RLLTE, the environments are assumed to be vectorized and a make_rllte_env function is used to warp the environments: +

                                                example.py
                                                from rllte.env.utils import make_rllte_env
                                                +# create vectorized environments
                                                +env = make_rllte_env(env_id=CustomEnv, 
                                                +                     device=device, 
                                                +                     env_kwargs={'total_length': 499} # set env arguments
                                                +                     )
                                                +
                                                +After that, you can use the custom environment in application directly. +
                                                train.py
                                                from rllte.agent import DrQv2
                                                +from rllte.env.utils import make_rllte_env
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_rllte_env(env_id=CustomEnv, 
                                                +                        device=device, 
                                                +                        env_kwargs={'total_length': 499} # set env arguments
                                                +                        )
                                                +    eval_env = make_rllte_env(env_id=CustomEnv, 
                                                +                            device=device, 
                                                +                            env_kwargs={'total_length': 499} # set env arguments
                                                +                            )
                                                +    agent = DrQv2(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="drqv2_dmc_pixel")
                                                +    agent.train(num_train_steps=5000)
                                                +

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/custom/module/index.html b/tutorials/custom/module/index.html new file mode 100644 index 00000000..24ec5bf6 --- /dev/null +++ b/tutorials/custom/module/index.html @@ -0,0 +1,4057 @@ + + + + + + + + + + + + + + + + + + + + + Custom Module - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Custom Module

                                                + + + + +

                                                RLLTE is an extremely open platform that supports custom modules, including encoder, storage, policy, etc. Just write a new module based on the BaseClass, then we can insert it into an agent directly. Suppose we want to build a new encoder entitled CustomEncoder. An example is +

                                                example.py
                                                from rllte.agent import PPO
                                                +from rllte.env import make_atari_env
                                                +from rllte.common.prototype import BaseEncoder
                                                +from gymnasium.spaces import Space
                                                +from torch import nn
                                                +import torch as th
                                                +
                                                +class CustomEncoder(BaseEncoder):
                                                +    """Custom encoder.
                                                +
                                                +    Args:
                                                +        observation_space (Space): The observation space of environment.
                                                +        feature_dim (int): Number of features extracted.
                                                +
                                                +    Returns:
                                                +        The new encoder instance.
                                                +    """
                                                +    def __init__(self, observation_space: Space, feature_dim: int = 0) -> None:
                                                +        super().__init__(observation_space, feature_dim)
                                                +
                                                +        obs_shape = observation_space.shape
                                                +        assert len(obs_shape) == 3
                                                +
                                                +        self.trunk = nn.Sequential(
                                                +            nn.Conv2d(obs_shape[0], 32, 3, stride=2), nn.ReLU(),
                                                +            nn.Conv2d(32, 32, 3, stride=2), nn.ReLU(),
                                                +            nn.Flatten(),
                                                +        )
                                                +
                                                +        with th.no_grad():
                                                +            sample = th.ones(size=tuple(obs_shape)).float()
                                                +            n_flatten = self.trunk(sample.unsqueeze(0)).shape[1]
                                                +
                                                +        self.trunk.extend([nn.Linear(n_flatten, feature_dim), nn.ReLU()])
                                                +
                                                +    def forward(self, obs: th.Tensor) -> th.Tensor:
                                                +        h = self.trunk(obs / 255.0)
                                                +
                                                +        return h.view(h.size()[0], -1)
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_atari_env(device=device)
                                                +    eval_env = make_atari_env(device=device)
                                                +    # create agent
                                                +    feature_dim = 512
                                                +    agent = PPO(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="ppo_atari",
                                                +                feature_dim=feature_dim)
                                                +    # create a new encoder
                                                +    encoder = CustomEncoder(observation_space=env.observation_space, 
                                                +                         feature_dim=feature_dim)
                                                +    # set the new encoder
                                                +    agent.set(encoder=encoder)
                                                +    # start training
                                                +    agent.train(num_train_steps=5000)
                                                +
                                                +Run example.py and you'll see the old MnihCnnEncoder has been replaced by CustomEncoder: +
                                                [08/04/2023 03:47:24 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 03:47:24 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 03:47:24 PM] - [INFO.] - Tag               : ppo_atari
                                                +[08/04/2023 03:47:24 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - Agent             : PPO
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - Encoder           : CustomEncoder
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - Augmentation      : False
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - Intrinsic Reward  : False
                                                +[08/04/2023 03:47:24 PM] - [DEBUG] - ================================================================================
                                                +...
                                                +
                                                +As for customizing modules like Storage and Distribution, etc., users should consider compatibility with specific algorithms.

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/general/decoupling/index.html b/tutorials/general/decoupling/index.html new file mode 100644 index 00000000..0e539ffb --- /dev/null +++ b/tutorials/general/decoupling/index.html @@ -0,0 +1,4003 @@ + + + + + + + + + + + + + + + + + + + + + RL Algorithm Decoupling - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                RL Algorithm Decoupling

                                                +

                                                The actual performance of an RL algorithm is affected by various factors (e.g., different network architectures and experience usage +strategies), which are difficult to quantify.

                                                +
                                                +

                                                Huang S, Dossa R F J, Raffin A, et al. The 37 Implementation Details of Proximal Policy Optimization[J]. The ICLR Blog Track 2023, 2022.

                                                +
                                                +

                                                RLLTE decouples RL algorithms into minimum primitives from the perspective of exploitation and exploration and provides abundant modules for development:

                                                +
                                                  +
                                                • Xploit: Modules that focus on exploitation in RL.
                                                    +
                                                  • Encoder: Modules for processing observations and extracting features;
                                                  • +
                                                  • Policy: Modules for interaction and learning;
                                                  • +
                                                  • Storage: Modules for storing and replaying collected experiences;
                                                  • +
                                                  +
                                                • +
                                                • Xplore: Modules that focus on exploration in RL.
                                                    +
                                                  • Distribution: Modules for sampling actions;
                                                  • +
                                                  • Augmentation: Modules for observation augmentation;
                                                  • +
                                                  • Reward: Intrinsic reward modules for enhancing exploration.
                                                  • +
                                                  +
                                                • +
                                                +

                                                Therefore, the core of RLLTE is not designed to provide specific RL algorithms but a toolkit for producing algorithms. Developers are free to use various built-in or customized modules to build RL algorithms.

                                                + +

                                                In particular, developers are allowed to replace modules of an implemented algorithm.

                                                + +

                                                RLLTE is an extremely open framework that allows developers to try anything.

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/general/fast/index.html b/tutorials/general/fast/index.html new file mode 100644 index 00000000..342bb8c5 --- /dev/null +++ b/tutorials/general/fast/index.html @@ -0,0 +1,4138 @@ + + + + + + + + + + + + + + + + + + + + + Fast Algorithm Development - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Fast Algorithm Development

                                                + + + + +

                                                Developers only need three steps to implement an RL algorithm with RLLTE:

                                                +
                                                +

                                                Workflow

                                                +
                                                  +
                                                1. Select an algorithm prototype;
                                                2. +
                                                3. Select desired modules;
                                                4. +
                                                5. Write an update function.
                                                6. +
                                                +
                                                +

                                                The following example illustrates how to write an Advantage Actor-Critic (A2C) agent to solve Atari games.

                                                +

                                                Set prototype

                                                +

                                                Firstly, we select OnPolicyAgent as the prototype +

                                                from rllte.common.prototype import OnPolicyAgent
                                                +
                                                +class A2C(OnPolicyAgent):
                                                +    def __init__(self, env, tag, device, num_steps):
                                                +        # here we only use four arguments
                                                +        super().__init__(env=env, tag=tag, device=device, num_steps=num_steps)
                                                +

                                                +

                                                Set necessary modules

                                                +

                                                Now we need an encoder to process observations, a learnable policy to generate actions, and a storage to store and sample experiences. +

                                                from rllte.xploit.encoder import MnihCnnEncoder
                                                +from rllte.xploit.policy import OnPolicySharedActorCritic
                                                +from rllte.xploit.storage import VanillaRolloutStorage
                                                +from rllte.xplore.distribution import Categorical
                                                +

                                                +

                                                Set update function

                                                +

                                                Run the .describe function of the selected policy and you will see the following output: +

                                                OnPolicySharedActorCritic.describe()
                                                +
                                                +# Output:
                                                +# ================================================================================
                                                +# Name       : OnPolicySharedActorCritic
                                                +# Structure  : self.encoder (shared by actor and critic), self.actor, self.critic
                                                +# Forward    : obs -> self.encoder -> self.actor -> actions
                                                +#            : obs -> self.encoder -> self.critic -> values
                                                +#            : actions -> log_probs
                                                +# Optimizers : self.optimizers['opt'] -> (self.encoder, self.actor, self.critic)
                                                +# ================================================================================
                                                +
                                                +This will illustrate the structure of the policy and indicate the optimizable parts. Finally, merge these modules and write an .update function: +
                                                from torch import nn
                                                +import torch as th
                                                +
                                                +class A2C(OnPolicyAgent):
                                                +    def __init__(self, env, tag, seed, device, num_steps) -> None:
                                                +        super().__init__(env=env, tag=tag, seed=seed, device=device, num_steps=num_steps)
                                                +        # create modules
                                                +        encoder = MnihCnnEncoder(observation_space=env.observation_space, feature_dim=512)
                                                +        policy = OnPolicySharedActorCritic(observation_space=env.observation_space,
                                                +                                           action_space=env.action_space,
                                                +                                           feature_dim=512,
                                                +                                           opt_class=th.optim.Adam,
                                                +                                           opt_kwargs=dict(lr=2.5e-4, eps=1e-5),
                                                +                                           init_fn="xavier_uniform"
                                                +                                           )
                                                +        storage = VanillaRolloutStorage(observation_space=env.observation_space,
                                                +                                        action_space=env.action_space,
                                                +                                        device=device,
                                                +                                        storage_size=self.num_steps,
                                                +                                        num_envs=self.num_envs,
                                                +                                        batch_size=256
                                                +                                        )
                                                +        dist = Categorical()
                                                +        # set all the modules
                                                +        self.set(encoder=encoder, policy=policy, storage=storage, distribution=dist)
                                                +
                                                +    def update(self):
                                                +        for _ in range(4):
                                                +            for batch in self.storage.sample():
                                                +                # evaluate the sampled actions
                                                +                new_values, new_log_probs, entropy = self.policy.evaluate_actions(obs=batch.observations, actions=batch.actions)
                                                +                # policy loss part
                                                +                policy_loss = - (batch.adv_targ * new_log_probs).mean()
                                                +                # value loss part
                                                +                value_loss = 0.5 * (new_values.flatten() - batch.returns).pow(2).mean()
                                                +                # update
                                                +                self.policy.optimizers['opt'].zero_grad(set_to_none=True)
                                                +                (value_loss * 0.5 + policy_loss - entropy * 0.01).backward()
                                                +                nn.utils.clip_grad_norm_(self.policy.parameters(), 0.5)
                                                +                self.policy.optimizers['opt'].step()
                                                +

                                                +

                                                Start training

                                                +

                                                Now we can start training by +

                                                train.py
                                                from rllte.env import make_atari_env
                                                +if __name__ == "__main__":
                                                +    device = "cuda"
                                                +    env = make_atari_env("AlienNoFrameskip-v4", num_envs=8, seed=0, device=device)
                                                +    agent = A2C(env=env, tag="a2c_atari", seed=0, device=device, num_steps=128)
                                                +    agent.train(num_train_steps=10000000)
                                                +
                                                +Run train.py and you will see the following output: +
                                                [08/04/2023 02:19:06 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 02:19:06 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 02:19:06 PM] - [INFO.] - Tag               : a2c_atari
                                                +[08/04/2023 02:19:06 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - Agent             : A2C
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - Encoder           : MnihCnnEncoder
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - Augmentation      : False
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - Intrinsic Reward  : False
                                                +[08/04/2023 02:19:07 PM] - [DEBUG] - ================================================================================
                                                +[08/04/2023 02:19:09 PM] - [TRAIN] - S: 1024        | E: 8           | L: 44          | R: 99.000      | FPS: 407.637   | T: 0:00:02    
                                                +[08/04/2023 02:19:10 PM] - [TRAIN] - S: 2048        | E: 16          | L: 50          | R: 109.000     | FPS: 594.725   | T: 0:00:03    
                                                +[08/04/2023 02:19:11 PM] - [TRAIN] - S: 3072        | E: 24          | L: 47          | R: 96.000      | FPS: 692.433   | T: 0:00:04    
                                                +[08/04/2023 02:19:12 PM] - [TRAIN] - S: 4096        | E: 32          | L: 36          | R: 93.000      | FPS: 755.935   | T: 0:00:05    
                                                +[08/04/2023 02:19:13 PM] - [TRAIN] - S: 5120        | E: 40          | L: 55          | R: 99.000      | FPS: 809.577   | T: 0:00:06    
                                                +[08/04/2023 02:19:14 PM] - [TRAIN] - S: 6144        | E: 48          | L: 46          | R: 34.000      | FPS: 847.310   | T: 0:00:07    
                                                +[08/04/2023 02:19:15 PM] - [TRAIN] - S: 7168        | E: 56          | L: 49          | R: 43.000      | FPS: 878.628   | T: 0:00:08   
                                                +...
                                                +

                                                +

                                                As shown in this example, only a few dozen lines of code are needed to create RL agents with RLLTE.

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/index.html b/tutorials/index.html new file mode 100644 index 00000000..28b06423 --- /dev/null +++ b/tutorials/index.html @@ -0,0 +1,4177 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Tutorials - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                + + + +
                                                + + + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/md/cann/index.html b/tutorials/md/cann/index.html new file mode 100644 index 00000000..f09fda54 --- /dev/null +++ b/tutorials/md/cann/index.html @@ -0,0 +1,4069 @@ + + + + + + + + + + + + + + + + + + + + + with CANN - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                with CANN

                                                +

                                                AscendCL provides a collection of C language APIs for use in the development of DNN inference apps on Compute Architecture for Neural Networks (CANN). These APIs are designed for model and operator loading and execution, as well as media data processing, facilitating deep learning inference computing, graphics and image preprocessing, and single-operator accelerated computing on the Ascend CANN platform.

                                                +
                                                + +
                                                + +

                                                Prerequisites

                                                +

                                                Get the complete repository from GitHub: +

                                                git clone https://github.com/RLE-Foundation/rllte
                                                +

                                                +

                                                Download and install the following necessary libraries: +CANN 6.0.1

                                                +

                                                Model preparation

                                                +

                                                Take Ascend310 for example: +

                                                atc --model=model/test_model.onnx --framework=5 --output=test_model --input_format=NCHW --log=info --soc_version=Ascend310 --input_shape="input:1,9,84,84"
                                                +
                                                +More details can be found in Learning Wizard.

                                                +

                                                C++ development

                                                +
                                                  +
                                                • Include the header file #include "acl/acl.h"
                                                • +
                                                • The main workflow is +
                                                  int main()
                                                  +{   
                                                  +    // 1. Define a resource initialization function for AscendCL initialization and runtime resource allocation (specifying a compute device).
                                                  +    InitResource();
                                                  +
                                                  +    // 2. Define a model loading function for loading the image classification model.
                                                  +    const char *modelPath = "../model/test_model.om";
                                                  +    LoadModel(modelPath);
                                                  +
                                                  +    // 3. Define a function for prepare data to the memory and transferring the data to the device.
                                                  +    LoadData()
                                                  +
                                                  +    // 4. Define an inference function for executing inference.
                                                  +    Inference();
                                                  +
                                                  +    // 5. Define a function for processing inference result data to print the class indexes of the top 5 confidence values of the test image.
                                                  +    PrintResult();
                                                  +
                                                  +    // 6. Define a function for unloading the image classification model.
                                                  +    UnloadModel();
                                                  +
                                                  +    // 7. Define a function for freeing the memory and destroying inference-related data to prevent memory leak.
                                                  +    UnloadData();
                                                  +
                                                  +    // 8. Define a resource deinitialization function for AscendCL deinitialization and runtime resource deallocation (releasing a compute device).
                                                  +    DestroyResource();
                                                  +}
                                                  +
                                                • +
                                                +

                                                Build and run

                                                +
                                                cd ascend
                                                +export APP_SOURCE_PATH=<path_to_rllte_deployment>/ascend
                                                +export DDK_PATH=<path_to_ascend_toolkit>
                                                +export NPU_HOST_LIB=<path_to_ascend_devlib>
                                                +chmod +x sample_build.sh
                                                +./sample_build.sh
                                                +./chmod +x sample_run.sh
                                                +./sample_run.sh
                                                +
                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/md/deployment/index.html b/tutorials/md/deployment/index.html new file mode 100644 index 00000000..ca90ae3c --- /dev/null +++ b/tutorials/md/deployment/index.html @@ -0,0 +1,4145 @@ + + + + + + + + + + + + + + + + + + + + + Model Deployment - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Model Deployment

                                                +

                                                Currently, rllte supports model deployment by:

                                                + +

                                                The following content shows how to convert a rllte model into a .onnx model and deploy it on the corresponding devices.

                                                +

                                                with NVIDIA TensorRT

                                                +

                                                Prerequisites

                                                +

                                                Get the complete repository from GitHub: +

                                                git clone https://github.com/RLE-Foundation/rllte
                                                +

                                                +

                                                Download the following necessary libraries:

                                                + +

                                                Meanwhile, install the following Python packages:

                                                +
                                                  +
                                                • pycuda==2022.2.2
                                                • +
                                                • tensorrt==8.6.0
                                                • +
                                                • numpy==1.24.2
                                                • +
                                                • torch==2.0.0
                                                • +
                                                • onnx==1.14.0
                                                • +
                                                +

                                                The following two examples can used to verify your installation:

                                                +
                                                C++ Port
                                                cd deloyment/c++
                                                +mkdir build && cd build
                                                +cmake .. && make
                                                +./DeployerTest ../../model/test_model.onnx
                                                +
                                                +
                                                Python Port
                                                cd deloyment/python
                                                +python3 pth2onnx.py ../model/test_model.pth
                                                +./trtexec --onnx=test_model.onnx --saveEngine=test_model.trt --skipInference
                                                +python3 infer.py test_model.plan
                                                +
                                                +

                                                Use in Your C++ Project

                                                +

                                                The following code illustrates how to include our library in you project: +

                                                example.cpp
                                                // Including the header file in your cpp file.
                                                +#inlude "RLLTEDeployer.h
                                                +
                                                +// Declear an instance of Options, and configurate the parameters.
                                                +Options options;
                                                +options.deviceIndex = 0;  
                                                +options.doesSupportDynamicBatchSize = false;  
                                                +options.maxWorkspaceSize = 4000000000; 
                                                +options.precision = Precision::FP16;
                                                +
                                                +// Declear an instance of Options, and configurate the parameters.
                                                +RLLTEDeployer deployer(options);
                                                +
                                                +// Use the build member function to convert the onnx model to the TensorRT static model (plan).
                                                +deployer.build(path_of_onnx_model);
                                                +
                                                +// Use the loadPlan member function to load the converted model. If a path is given, 
                                                +// then it will search the path, or it will just search the current working directory.
                                                +deployer.loadPlan();
                                                +
                                                +// Use infer member funtion to execute the infer process. The input is the tensor with 
                                                +// relevant data type, and the output is a pointer with relevant data size and data type. T
                                                +// he infer result will be moved to the output.
                                                +deployer.infer<float>(input, output, 1);
                                                +deployer.infer<float16_t>(input, output, 1);
                                                +deployer.infer<int8>(input, output, 1);
                                                +
                                                +...
                                                +
                                                +Please refer to the DeployerTest.cpp for the complete code.

                                                +

                                                with CMake

                                                +
                                                CMakeLists.txt
                                                find_package(CUDA REQUIRED)
                                                +include_directories(${CUDA_INCLUDE_DIRS} ${Path_of_RLLTEDeployer_h}})
                                                +target_link_libraries(YOUREXECUTEFILE ${PATH_OF_libRLLTEDeployer_so)
                                                +
                                                +

                                                with Docker

                                                +

                                                Install the NVIDIA docker via (make sure the NVIDIA driver is installed): +

                                                install_docker.sh
                                                sudo apt-get install ca-certificates gnupg lsb-release
                                                +sudo mkdir -p /etc/apt/keyrings
                                                +
                                                +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
                                                +echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
                                                +sudo apt-get update
                                                +sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin 
                                                +distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
                                                +
                                                +curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
                                                +curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
                                                +
                                                +sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
                                                +sudo systemctl restart docker
                                                +sudo groupadd docker
                                                +sudo gpasswd -a $USER docker
                                                +

                                                +

                                                Restart your device, and run the following command. +

                                                sudo service docker restart
                                                +

                                                +

                                                Now you can run your model via: +

                                                run_docker.sh
                                                docker pull jakeshihaoluo/rllte_deployment_env:0.0.1
                                                +docker run -it -v ${path_to_the_repo}:/rllte --gpus all jakeshihaoluo/rllte_deployment_env:0.0.1
                                                +cd /rllte/deloyment/c++
                                                +mkdir build && cd build
                                                +cmake .. && make
                                                +./DeployerTest ../../model/test_model.onnx
                                                +

                                                +

                                                with HUAWEI CANN

                                                +

                                                incoming...

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/md/tensorrt/index.html b/tutorials/md/tensorrt/index.html new file mode 100644 index 00000000..9c0fd0fa --- /dev/null +++ b/tutorials/md/tensorrt/index.html @@ -0,0 +1,4112 @@ + + + + + + + + + + + + + + + + + + + + + with TensorRT - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                with TensorRT

                                                +

                                                Prerequisites

                                                +

                                                Get the complete repository from GitHub: +

                                                git clone https://github.com/RLE-Foundation/rllte
                                                +

                                                +

                                                Download the following necessary libraries:

                                                + +

                                                Meanwhile, install the following Python packages:

                                                +
                                                  +
                                                • pycuda==2022.2.2
                                                • +
                                                • tensorrt==8.6.0
                                                • +
                                                • numpy==1.24.2
                                                • +
                                                • torch==2.0.0
                                                • +
                                                • onnx==1.14.0
                                                • +
                                                +

                                                The following two examples can used to verify your installation:

                                                +
                                                C++ Port
                                                cd deloyment/c++
                                                +mkdir build && cd build
                                                +cmake .. && make
                                                +./DeployerTest ../../model/test_model.onnx
                                                +
                                                +
                                                Python Port
                                                cd deloyment/python
                                                +python3 pth2onnx.py ../model/test_model.pth
                                                +./trtexec --onnx=test_model.onnx --saveEngine=test_model.trt --skipInference
                                                +python3 infer.py test_model.plan
                                                +
                                                +

                                                Use in Your C++ Project

                                                +

                                                The following code illustrates how to include our library in you project: +

                                                example.cpp
                                                // Including the header file in your cpp file.
                                                +#inlude "RLLTEDeployer.h
                                                +
                                                +// Declear an instance of Options, and configurate the parameters.
                                                +Options options;
                                                +options.deviceIndex = 0;  
                                                +options.doesSupportDynamicBatchSize = false;  
                                                +options.maxWorkspaceSize = 4000000000; 
                                                +options.precision = Precision::FP16;
                                                +
                                                +// Declear an instance of Options, and configurate the parameters.
                                                +RLLTEDeployer deployer(options);
                                                +
                                                +// Use the build member function to convert the onnx model to the TensorRT static model (plan).
                                                +deployer.build(path_of_onnx_model);
                                                +
                                                +// Use the loadPlan member function to load the converted model. If a path is given, 
                                                +// then it will search the path, or it will just search the current working directory.
                                                +deployer.loadPlan();
                                                +
                                                +// Use infer member funtion to execute the infer process. The input is the tensor with 
                                                +// relevant data type, and the output is a pointer with relevant data size and data type. T
                                                +// he infer result will be moved to the output.
                                                +deployer.infer<float>(input, output, 1);
                                                +deployer.infer<float16_t>(input, output, 1);
                                                +deployer.infer<int8>(input, output, 1);
                                                +
                                                +...
                                                +
                                                +Please refer to the DeployerTest.cpp for the complete code.

                                                +

                                                with CMake

                                                +
                                                CMakeLists.txt
                                                find_package(CUDA REQUIRED)
                                                +include_directories(${CUDA_INCLUDE_DIRS} ${Path_of_RLLTEDeployer_h}})
                                                +target_link_libraries(YOUREXECUTEFILE ${PATH_OF_libRLLTEDeployer_so)
                                                +
                                                +

                                                with Docker

                                                +

                                                Install the NVIDIA docker via (make sure the NVIDIA driver is installed): +

                                                install_docker.sh
                                                sudo apt-get install ca-certificates gnupg lsb-release
                                                +sudo mkdir -p /etc/apt/keyrings
                                                +
                                                +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
                                                +echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
                                                +sudo apt-get update
                                                +sudo apt-get install docker-ce docker-ce-cli containerd.io docker-compose-plugin 
                                                +distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
                                                +
                                                +curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
                                                +curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
                                                +
                                                +sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
                                                +sudo systemctl restart docker
                                                +sudo groupadd docker
                                                +sudo gpasswd -a $USER docker
                                                +

                                                +

                                                Restart your device, and run the following command. +

                                                sudo service docker restart
                                                +

                                                +

                                                Now you can run your model via: +

                                                run_docker.sh
                                                docker pull jakeshihaoluo/rllte_deployment_env:0.0.1
                                                +docker run -it -v ${path_to_the_repo}:/rllte --gpus all jakeshihaoluo/rllte_deployment_env:0.0.1
                                                +cd /rllte/deloyment/c++
                                                +mkdir build && cd build
                                                +cmake .. && make
                                                +./DeployerTest ../../model/test_model.onnx
                                                +

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/me/comp/index.html b/tutorials/me/comp/index.html new file mode 100644 index 00000000..68b4d42b --- /dev/null +++ b/tutorials/me/comp/index.html @@ -0,0 +1,4056 @@ + + + + + + + + + + + + + + + + + + + + + Performance Comparison of Multiple Algorithms - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Performance Comparison of Multiple Algorithms

                                                + + + + +

                                                Download Data

                                                +

                                                Suppose we want to evaluate algorithm performance on the Procgen benchmark. First, download the data from +rllte-hub: +

                                                example.py
                                                # load packages
                                                +from rllte.evaluation import Performance, Comparison, min_max_normalize
                                                +from rllte.hub.datasets import Procgen, Atari
                                                +import numpy as np
                                                +# load scores
                                                +procgen = Procgen()
                                                +procgen_scores = procgen.load_scores()
                                                +print(procgen_scores.keys())
                                                +# get ppo-normalized scores
                                                +ppo_norm_scores = dict()
                                                +MIN_SCORES = np.zeros_like(procgen_scores['ppo'])
                                                +MAX_SCORES = np.mean(procgen_scores['ppo'], axis=0)
                                                +for algo in procgen_scores.keys():
                                                +    ppo_norm_scores[algo] = min_max_normalize(procgen_scores[algo],
                                                +                                              min_scores=MIN_SCORES,
                                                +                                              max_scores=MAX_SCORES)
                                                +
                                                +# Output:
                                                +# dict_keys(['ppg', 'mixreg', 'ppo', 'idaac', 'plr', 'ucb-drac'])
                                                +
                                                +For each algorithm, this will return a NdArray of size (10 x 16) where scores[n][m] represent the score on run n of task m.

                                                +

                                                Performance Comparison

                                                +

                                                Comparison module allows you to compare the performance between two algorithms: +

                                                example.py
                                                comp = Comparison(scores_x=ppo_norm_scores['PPG'],
                                                +                  scores_y=ppo_norm_scores['PPO'],
                                                +                  get_ci=True)
                                                +comp.compute_poi()
                                                +
                                                +# Output:
                                                +# (0.8153125, array([[0.779375  ], [0.85000781]]))
                                                +
                                                +This indicates the overall probability of imporvement of PPG over PPO is 0.8153125.

                                                +

                                                Available metrics:

                                                + + + + + + + + + + + + + +
                                                MetricRemark
                                                .compute_poiCompute the overall probability of imporvement of algorithm X over Y.
                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/me/perf/index.html b/tutorials/me/perf/index.html new file mode 100644 index 00000000..43823e4c --- /dev/null +++ b/tutorials/me/perf/index.html @@ -0,0 +1,4077 @@ + + + + + + + + + + + + + + + + + + + + + Performance Evaluation of Single Algorithm - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Performance Evaluation of Single Algorithm

                                                + + + + +

                                                RLLTE provides evaluation methods based on:

                                                +
                                                +

                                                Agarwal R, Schwarzer M, Castro P S, et al. Deep reinforcement learning at the edge of the statistical precipice[J]. Advances in neural information processing systems, 2021, 34: 29304-29320.

                                                +
                                                +

                                                We reconstruct and improve the code of the official repository rliable, achieving higher convenience and efficiency.

                                                +

                                                Download Data

                                                +

                                                Suppose we want to evaluate algorithm performance on the Procgen benchmark. First, download the data from +rllte-hub: +

                                                example.py
                                                # load packages
                                                +from rllte.evaluation import Performance, Comparison, min_max_normalize
                                                +from rllte.hub.datasets import Procgen, Atari
                                                +import numpy as np
                                                +# load scores
                                                +procgen = Procgen()
                                                +procgen_scores = procgen.load_scores()
                                                +print(procgen_scores.keys())
                                                +# get ppo-normalized scores
                                                +ppo_norm_scores = dict()
                                                +MIN_SCORES = np.zeros_like(procgen_scores['ppo'])
                                                +MAX_SCORES = np.mean(procgen_scores['ppo'], axis=0)
                                                +for algo in procgen_scores.keys():
                                                +    ppo_norm_scores[algo] = min_max_normalize(procgen_scores[algo],
                                                +                                              min_scores=MIN_SCORES,
                                                +                                              max_scores=MAX_SCORES)
                                                +
                                                +# Output:
                                                +# dict_keys(['ppg', 'mixreg', 'ppo', 'idaac', 'plr', 'ucb-drac'])
                                                +
                                                +For each algorithm, this will return a NdArray of size (10 x 16) where scores[n][m] represent the score on run n of task m.

                                                +

                                                Performance Evaluation

                                                +

                                                Initialize the performance evaluator: +

                                                example.py
                                                perf = Performance(scores=ppo_norm_scores['PPO'], 
                                                +                   get_ci=True # get confidence intervals
                                                +                   )
                                                +perf.aggregate_mean()
                                                +
                                                +# Output:
                                                +# Computing confidence interval for aggregate MEAN...
                                                +# (1.0, array([[0.9737281 ], [1.02564405]]))
                                                +
                                                +Available metrics:

                                                + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                MetricRemark
                                                .aggregate_meanComputes mean of sample mean scores per task.
                                                .aggregate_medianComputes median of sample mean scores per task.
                                                .aggregate_ogComputes optimality gap across all runs and tasks.
                                                .aggregate_iqmComputes the interquartile mean across runs and tasks.
                                                .create_performance_profileComputes the performance profiles.
                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/me/visual/index.html b/tutorials/me/visual/index.html new file mode 100644 index 00000000..694f5d6b --- /dev/null +++ b/tutorials/me/visual/index.html @@ -0,0 +1,4208 @@ + + + + + + + + + + + + + + + + + + + + + Metrics Visualization - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                + +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Metrics Visualization

                                                + + + + +

                                                Download Data

                                                +

                                                Suppose we want to visualize algorithm performance on the Procgen benchmark. First, download the data from +rllte-hub: +

                                                example.py
                                                # load packages
                                                +from rllte.evaluation import Performance, Comparison, min_max_normalize
                                                +from rllte.hub.datasets import Procgen, Atari
                                                +from rllte.evaluation import (plot_interval_estimates,
                                                +                              plot_probability_improvement,
                                                +                              plot_sample_efficiency_curve,
                                                +                              plot_performance_profile)
                                                +import numpy as np
                                                +# load scores
                                                +procgen = Procgen()
                                                +procgen_scores = procgen.load_scores()
                                                +print(procgen_scores.keys())
                                                +# get ppo-normalized scores
                                                +ppo_norm_scores = dict()
                                                +MIN_SCORES = np.zeros_like(procgen_scores['ppo'])
                                                +MAX_SCORES = np.mean(procgen_scores['ppo'], axis=0)
                                                +for algo in procgen_scores.keys():
                                                +    ppo_norm_scores[algo] = min_max_normalize(procgen_scores[algo],
                                                +                                              min_scores=MIN_SCORES,
                                                +                                              max_scores=MAX_SCORES)
                                                +
                                                +# Output:
                                                +# dict_keys(['ppg', 'mixreg', 'ppo', 'idaac', 'plr', 'ucb-drac'])
                                                +
                                                +For each algorithm, this will return a NdArray of size (10 x 16) where scores[n][m] represent the score on run n of task m.

                                                +

                                                Visualization

                                                +

                                                .plot_interval_estimates

                                                +

                                                .plot_interval_estimates can plot various performance metrics of algorithms with stratified confidence intervals. Take Procgen for example, we want to plot four reliable metrics computed by Performance evaluator: +

                                                example.py
                                                # construct a performance dict
                                                +aggregate_performance_dict = {
                                                +    "MEAN": {},
                                                +    "MEDIAN": {},
                                                +    "IQM": {},
                                                +    "OG": {}
                                                +}
                                                +for algo in ppo_norm_scores.keys():
                                                +    perf = Performance(scores=ppo_norm_scores[algo], get_ci=True)
                                                +    aggregate_performance_dict['MEAN'][algo] = perf.aggregate_mean()
                                                +    aggregate_performance_dict['MEDIAN'][algo] = perf.aggregate_median()
                                                +    aggregate_performance_dict['IQM'][algo] = perf.aggregate_iqm()
                                                +    aggregate_performance_dict['OG'][algo] = perf.aggregate_og()
                                                +
                                                +# plot all the four metrics of all the algorithms
                                                +fig, axes = plot_interval_estimates(aggregate_performance_dict,
                                                +                                    metric_names=['MEAN', 'MEDIAN', 'IQM', 'OG'],
                                                +                                    algorithms=['PPO', 'MixReg', 'UCB-DrAC', 'PLR', 'PPG', 'IDAAC'],
                                                +                                    xlabel="PPO-Normalized Score")
                                                +fig.savefig('./plot_interval_estimates1.png', format='png', bbox_inches='tight')
                                                +
                                                +# plot two metrics of all the algorithms
                                                +fig, axes = plot_interval_estimates(aggregate_performance_dict,
                                                +                        metric_names=['MEAN', 'MEDIAN'],
                                                +                        algorithms=['PPO', 'MixReg', 'UCB-DrAC', 'PLR', 'PPG', 'IDAAC'],
                                                +                        xlabel="PPO-Normalized Score")
                                                +fig.savefig('./plot_interval_estimates2.png', format='png', bbox_inches='tight')
                                                +
                                                +# plot two metrics of three algorithms
                                                +fig, axes = plot_interval_estimates(aggregate_performance_dict,
                                                +                        metric_names=['MEAN', 'MEDIAN'],
                                                +                        algorithms=['ppg', 'mixreg', 'ppo'],
                                                +                        xlabel="PPO-Normalized Score",
                                                +                        xlabel_y_coordinate=-0.4)
                                                +fig.savefig('./plot_interval_estimates3.png', format='png', bbox_inches='tight')
                                                +
                                                +The output figures are:

                                                +
                                                + + + +
                                                + +

                                                .plot_probability_improvement

                                                +

                                                .plot_probability_improvement plots probability of improvement with stratified confidence intervals. An example is: +

                                                example.py
                                                # construct a comparison dict
                                                +pairs = [['IDAAC', 'PPG'], ['IDAAC', 'UCB-DrAC'], ['IDAAC', 'PPO'],
                                                +    ['PPG', 'PPO'], ['UCB-DrAC', 'PLR'], 
                                                +    ['PLR', 'MixReg'], ['UCB-DrAC', 'MixReg'],  ['MixReg', 'PPO']]
                                                +
                                                +probability_of_improvement_dict = {}
                                                +for pair in pairs:
                                                +    comp = Comparison(scores_x=ppo_norm_scores[pair[0]], 
                                                +                      scores_y=ppo_norm_scores[pair[1]],
                                                +                      get_ci=True)
                                                +    probability_of_improvement_dict['_'.join(pair)] = comp.compute_poi()
                                                +
                                                +fig, ax = plot_probability_improvement(poi_dict=probability_of_improvement_dict)
                                                +fig.savefig('./plot_probability_improvement.png', format='png', bbox_inches='tight')
                                                +
                                                +The output figure is:

                                                +
                                                + +
                                                + +

                                                .plot_performance_profile

                                                +

                                                .plot_performance_profile plots performance profiles with stratified confidence intervals. An example is: +

                                                example.py
                                                profile_dict = dict()
                                                +procgen_tau = np.linspace(0.5, 3.6, 101)
                                                +
                                                +for algo in ppo_norm_scores.keys():
                                                +    perf = Performance(scores=ppo_norm_scores[algo], get_ci=True, reps=2000)
                                                +    profile_dict[algo] = perf.create_performance_profile(tau_list=procgen_tau)
                                                +
                                                +fig, axes = plot_performance_profile(profile_dict, 
                                                +                         procgen_tau,
                                                +                         figsize=(7, 5),
                                                +                         xlabel=r'PPO-Normalized Score $(\tau)$',
                                                +                         )
                                                +fig.savefig('./plot_performance_profile.png', format='png', bbox_inches='tight')
                                                +
                                                +The output figure is:

                                                +
                                                + +
                                                + +

                                                .plot_sample_efficiency_curve

                                                +

                                                .plot_sample_efficiency_curve plots an aggregate metric with CIs as a function of environment frames. An example is: +

                                                example.py
                                                # get Atari games' curve data
                                                +ale_all_frames_scores_dict = Atari().load_curves()
                                                +print(ale_all_frames_scores_dict.keys())
                                                +print(ale_all_frames_scores_dict['C51'].shape)
                                                +# Output:
                                                +# dict_keys(['C51', 'DQN (Adam)', 'DQN (Nature)', 'Rainbow', 'IQN', 'REM', 'M-IQN', 'DreamerV2'])
                                                +# (5, 55, 200)
                                                +# 200 data points of 55 games over 5 random seeds
                                                +
                                                +frames = np.array([1, 10, 25, 50, 75, 100, 125, 150, 175, 200]) - 1
                                                +
                                                +sampling_dict = dict()
                                                +for algo in ale_all_frames_scores_dict.keys():
                                                +    sampling_dict[algo] = [[], [], []]
                                                +    for frame in frames:
                                                +        perf = Performance(ale_all_frames_scores_dict[algo][:, :, frame],
                                                +                           get_ci=True, 
                                                +                           reps=2000)
                                                +        value, CIs = perf.aggregate_iqm()
                                                +        sampling_dict[algo][0].append(value)
                                                +        sampling_dict[algo][1].append(CIs[0]) # lower bound
                                                +        sampling_dict[algo][2].append(CIs[1]) # upper bound
                                                +
                                                +    sampling_dict[algo][0] = np.array(sampling_dict[algo][0]).reshape(-1)
                                                +    sampling_dict[algo][1] = np.array(sampling_dict[algo][1]).reshape(-1)
                                                +    sampling_dict[algo][2] = np.array(sampling_dict[algo][2]).reshape(-1)
                                                +
                                                +algorithms = ['C51', 'DQN (Adam)', 'DQN (Nature)', 'Rainbow', 'IQN', 'REM', 'M-IQN', 'DreamerV2']
                                                +fig, axes = plot_sample_efficiency_curve(
                                                +    sampling_dict,
                                                +    frames+1, 
                                                +    figsize=(7, 4.5),
                                                +    algorithms=algorithms,
                                                +    xlabel=r'Number of Frames (in millions)',
                                                +    ylabel='IQM Human Normalized Score')
                                                +fig.savefig('./plot_sample_efficiency_curve.png', format='png', bbox_inches='tight')
                                                +
                                                +The output figure is:

                                                +
                                                + +
                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/mt/irs/index.html b/tutorials/mt/irs/index.html new file mode 100644 index 00000000..343be690 --- /dev/null +++ b/tutorials/mt/irs/index.html @@ -0,0 +1,4057 @@ + + + + + + + + + + + + + + + + + + + + + Intrinsic Reward Shaping for Enhancing Exploration - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Intrinsic Reward Shaping for Enhancing Exploration

                                                + + + + +

                                                Since RLLTE decouples RL algorithms into minimum primitives from the perspective of exploitation and exploration, intrinsic reward shaping is supported by default. Due to the large differences in the calculation of different intrinsic reward methods, RLLTE has the following rules:

                                                +
                                                  +
                                                1. The environments are assumed to be vectorized;
                                                2. +
                                                3. The compute_irs function of each intrinsic reward module has a mandatory argument samples, which is a dict like:
                                                    +
                                                  • obs (n_steps, n_envs, *obs_shape), torch.Tensor
                                                  • +
                                                  • actions (n_steps, n_envs, *action_shape) torch.Tensor
                                                  • +
                                                  • rewards (n_steps, n_envs) torch.Tensor
                                                  • +
                                                  • next_obs (n_steps, n_envs, *obs_shape) torch.Tensor
                                                  • +
                                                  +
                                                4. +
                                                +

                                                Take RE3 for instance, it computes the intrinsic reward for each state based on the Euclidean distance between the state and +its \(k\)-nearest neighbor within a mini-batch. Thus it suffices to provide obs data to compute the reward. The following code provides a usage example of RE3: +

                                                example.py
                                                from rllte.xplore.reward import RE3
                                                +from rllte.env import make_dmc_env
                                                +import torch as th
                                                +
                                                +if __name__ == '__main__':
                                                +    num_envs = 7
                                                +    num_steps = 128
                                                +    # create env
                                                +    env = make_dmc_env(env_id="cartpole_balance", num_envs=num_envs)
                                                +    print(env.observation_space, env.action_space)
                                                +    # create RE3 instance
                                                +    re3 = RE3(
                                                +        observation_space=env.observation_space,
                                                +        action_space=env.action_space
                                                +    )
                                                +    # compute intrinsic rewards
                                                +    obs = th.rand(size=(num_steps, num_envs, *env.observation_space.shape))
                                                +    intrinsic_rewards = re3.compute_irs(samples={'obs': obs})
                                                +
                                                +    print(intrinsic_rewards.shape, type(intrinsic_rewards))
                                                +    print(intrinsic_rewards)
                                                +
                                                +# Output:
                                                +# {'shape': [9, 84, 84]} {'shape': [1], 'type': 'Box', 'range': [-1.0, 1.0]}
                                                +# torch.Size([128, 7]) <class 'torch.Tensor'>
                                                +

                                                +

                                                You can also invoke the intrinsic reward module in all the implemented algorithms directly by .set function: +

                                                example.py
                                                from rllte.agent import PPO
                                                +from rllte.env import make_atari_env
                                                +from rllte.xplore.reward import RE3
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_atari_env(device=device)
                                                +    eval_env = make_atari_env(device=device)
                                                +    # create agent
                                                +    agent = PPO(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="ppo_atari")
                                                +    # create intrinsic reward
                                                +    re3 = RE3(observation_space=env.observation_space,
                                                +              action_space=env.action_space,
                                                +              device=device)
                                                +    # set the module
                                                +    agent.set(reward=re3)
                                                +    # start training
                                                +    agent.train(num_train_steps=5000)
                                                +
                                                +Run example.py and you'll see the intrinsic reward module is invoked: +
                                                [08/04/2023 03:54:10 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 03:54:10 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 03:54:10 PM] - [INFO.] - Tag               : ppo_atari
                                                +[08/04/2023 03:54:10 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - Agent             : PPO
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - Encoder           : MnihCnnEncoder
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - Augmentation      : False
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - Intrinsic Reward  : True, RE3
                                                +[08/04/2023 03:54:11 PM] - [DEBUG] - ================================================================================
                                                +

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/mt/oa/index.html b/tutorials/mt/oa/index.html new file mode 100644 index 00000000..fd278023 --- /dev/null +++ b/tutorials/mt/oa/index.html @@ -0,0 +1,4029 @@ + + + + + + + + + + + + + + + + + + + + + Observation Augmentation for Sample Efficiency and Generalization - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Observation Augmentation for Sample Efficiency and Generalization

                                                + + + + +

                                                Observation augmentation is an efficient approach to improve sample efficiency and generalization, which is also a basic primitive of RLLTE.

                                                +
                                                +
                                                  +
                                                • Laskin M, Lee K, Stooke A, et al. Reinforcement learning with augmented data[J]. Advances in neural information processing systems, 2020, 33: 19884-19895.
                                                • +
                                                • Yarats D, Fergus R, Lazaric A, et al. Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning[C]//International Conference on Learning Representations. 2021.
                                                • +
                                                +
                                                +

                                                RLLTE implements the augmentation modules via a PyTorch-NN manner, and both imaged-based and state-based observations are supported. A code example is: +

                                                example.py
                                                from rllte.agent import DrAC
                                                +from rllte.env import make_atari_env
                                                +from rllte.xplore.augmentation import RandomCrop
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_atari_env(device=device)
                                                +    eval_env = make_atari_env(device=device)
                                                +    # create agent
                                                +    agent = DrAC(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="drac_atari")
                                                +    # create augmentation module
                                                +    random_crop = RandomCrop()
                                                +    # set the module
                                                +    agent.set(augmentation=random_crop)
                                                +    # start training
                                                +    agent.train(num_train_steps=5000)
                                                +
                                                +Run example.py and you'll see the augmentation module is invoked: +
                                                [08/04/2023 05:00:15 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 05:00:15 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 05:00:15 PM] - [INFO.] - Tag               : drac_atari
                                                +[08/04/2023 05:00:16 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - Agent             : DrAC
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - Encoder           : MnihCnnEncoder
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - Augmentation      : True, RandomCrop
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - Intrinsic Reward  : False
                                                +[08/04/2023 05:00:16 PM] - [DEBUG] - ================================================================================
                                                +...
                                                +

                                                +
                                                +

                                                Compatibility of augmentation

                                                +

                                                Note that the module will only make difference when the algorithm supports data augmentation. +Please refer to https://docs.rllte.dev/api/ for the compatibility.

                                                +
                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/mt/pre-training/index.html b/tutorials/mt/pre-training/index.html new file mode 100644 index 00000000..47405ddb --- /dev/null +++ b/tutorials/mt/pre-training/index.html @@ -0,0 +1,4091 @@ + + + + + + + + + + + + + + + + + + + + + Pre-training with Intrinsic Rewards - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Pre-training with Intrinsic Rewards

                                                + + + + +

                                                Pre-training

                                                +

                                                Currently, RLLTE only supports online pre-training via intrinsic reward. To turn on the pre-training mode, +it suffices to write a train.py like: +

                                                train.py
                                                from rllte.agent import PPO
                                                +from rllte.env import make_atari_env
                                                +from rllte.xplore.reward import RE3
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_atari_env(device=device)
                                                +    eval_env = make_atari_env(device=device)
                                                +    # create agent and turn on pre-training mode
                                                +    agent = PPO(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="ppo_atari",
                                                +                pretraining=True)
                                                +    # create intrinsic reward
                                                +    re3 = RE3(observation_space=env.observation_space,
                                                +              action_space=env.action_space,
                                                +              device=device)
                                                +    # set the reward module
                                                +    agent.set(reward=re3)
                                                +    # start training
                                                +    agent.train(num_train_steps=5000)
                                                +
                                                +Run train.py and you'll see the pre-training mode is on: +
                                                [08/04/2023 05:05:54 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 05:05:54 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 05:05:54 PM] - [INFO.] - Tag               : ppo_atari
                                                +[08/04/2023 05:05:54 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - Agent             : PPO
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - Encoder           : MnihCnnEncoder
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - Augmentation      : False
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - Intrinsic Reward  : True, RE3
                                                +[08/04/2023 05:05:54 PM] - [INFO.] - Pre-training Mode : On
                                                +[08/04/2023 05:05:54 PM] - [DEBUG] - ================================================================================
                                                +...
                                                +

                                                +
                                                +

                                                Tip

                                                +

                                                When the pre-training mode is on, a reward module must be specified!

                                                +
                                                +

                                                For all supported reward modules, see API Documentation.

                                                +

                                                Fine-tuning

                                                +

                                                Once the pre-training is finished, you can find the model parameters in the pretrained subfolder of the working directory. To +load the parameters, just turn off the pre-training mode and load the parameters with .load() function:

                                                +

                                                train.py
                                                from rllte.agent import PPO
                                                +from rllte.env import make_atari_env
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_atari_env(device=device)
                                                +    eval_env = make_atari_env(device=device)
                                                +    # create agent and turn off pre-training mode
                                                +    agent = PPO(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="ppo_atari",
                                                +                pretraining=False)
                                                +    # start training
                                                +    agent.train(num_train_steps=5000,
                                                +                init_model_path="/export/yuanmingqi/code/rllte/logs/ppo_atari/2023-06-05-02-42-12/pretrained/pretrained.pth")
                                                +
                                                +Run train.py and you'll see the pre-trained model parameters are loaded: +
                                                [08/04/2023 05:07:52 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 05:07:52 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 05:07:52 PM] - [INFO.] - Tag               : ppo_atari
                                                +[08/04/2023 05:07:52 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - Agent             : PPO
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - Encoder           : MnihCnnEncoder
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - Augmentation      : False
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - Intrinsic Reward  : False
                                                +[08/04/2023 05:07:53 PM] - [DEBUG] - ================================================================================
                                                +[08/04/2023 05:07:53 PM] - [INFO.] - Loading Initial Parameters from ./logs/ppo_atari/...
                                                +...
                                                +

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/mt/quick_start/index.html b/tutorials/mt/quick_start/index.html new file mode 100644 index 00000000..abc87340 --- /dev/null +++ b/tutorials/mt/quick_start/index.html @@ -0,0 +1,4072 @@ + + + + + + + + + + + + + + + + + + + + + Quick Start - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Quick Start

                                                + + + + +

                                                RLLTE provides reliable implementations for highly-recognized RL algorithms, and users can build applications with very simple code.

                                                +

                                                On NVIDIA GPU

                                                +

                                                Suppose we want to use DrQ-v2 to solve a task of DeepMind Control Suite, and +it suffices to write a train.py like:

                                                +
                                                train.py
                                                # import `env` and `agent` module
                                                +from rllte.env import make_dmc_env 
                                                +from rllte.agent import DrQv2
                                                +
                                                +if __name__ == "__main__":
                                                +    device = "cuda:0"
                                                +    # create env, and `eval_env` is optional
                                                +    env = make_dmc_env(env_id="cartpole_balance", device=device)
                                                +    eval_env = make_dmc_env(env_id="cartpole_balance", device=device)
                                                +    # create agent
                                                +    agent = DrQv2(env=env, 
                                                +                  eval_env=eval_env, 
                                                +                  device='cuda',
                                                +                  tag="drqv2_dmc_pixel")
                                                +    # start training
                                                +    agent.train(num_train_steps=5000, log_interval=1000)
                                                +
                                                +

                                                Run train.py and you will see the following output:

                                                +
                                                + +
                                                + +
                                                +

                                                Read the logs

                                                +
                                                  +
                                                • S: Number of environment steps. Note that S isn't equal to the number of frames in visual tasks, and number_of_frames=number_of_steps * number_of_action_repeats
                                                • +
                                                • E: Number of environment episodes.
                                                • +
                                                • L: Average episode length.
                                                • +
                                                • R: Average episode reward.
                                                • +
                                                • FPS: Training FPS.
                                                • +
                                                • T: Time costs.
                                                • +
                                                +
                                                +

                                                On HUAWEI NPU

                                                +

                                                Similarly, if we want to train an agent on HUAWEI NPU, it suffices to replace cuda with npu: +

                                                train.py
                                                device = "npu:0"
                                                +

                                                +
                                                +

                                                Compatibility of NPU

                                                +

                                                Please refer to https://docs.rllte.dev/api/ for the compatibility of NPU.

                                                +
                                                +

                                                Load the trained model

                                                +

                                                Once the training is finished, you can find agent.pth in the subfolder model of the specified working directory.

                                                +
                                                play.py
                                                import torch as th
                                                +
                                                +# load the model and specify the map location
                                                +agent = th.load("agent.pth", map_location=th.device('cpu'))
                                                +obs = th.zeros(size=(1, 9, 84, 84))
                                                +action = agent(obs)
                                                +print(action)
                                                +
                                                +# Output: tensor([[-1.0000]], grad_fn=<TanhBackward0>)
                                                +
                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tutorials/mt/replacement/index.html b/tutorials/mt/replacement/index.html new file mode 100644 index 00000000..59eb39a8 --- /dev/null +++ b/tutorials/mt/replacement/index.html @@ -0,0 +1,4090 @@ + + + + + + + + + + + + + + + + + + + + + Module Replacement for An Implemented Algorithm - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Module Replacement for An Implemented Algorithm

                                                + + + + +

                                                RLLTE allows developers to replace settled modules of implemented algorithms to make performance comparison and algorithm improvement.

                                                +

                                                Use built-in modules

                                                +

                                                For instance, we want to use PPO agent to solve Atari games, it suffices to write train.py like: +

                                                train.py
                                                from rllte.agent import PPO
                                                +from rllte.env import make_atari_env
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_atari_env(device=device)
                                                +    eval_env = make_atari_env(device=device)
                                                +    # create agent
                                                +    agent = PPO(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="ppo_atari")
                                                +    # start training
                                                +    agent.train(num_train_steps=5000)
                                                +
                                                +Run train.py and you'll see the following output: +
                                                [08/04/2023 03:45:54 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 03:45:54 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 03:45:54 PM] - [INFO.] - Tag               : ppo_atari
                                                +[08/04/2023 03:45:54 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - Agent             : PPO
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - Encoder           : MnihCnnEncoder
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - Augmentation      : False
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - Intrinsic Reward  : False
                                                +[08/04/2023 03:45:55 PM] - [DEBUG] - ================================================================================
                                                +[08/04/2023 03:45:56 PM] - [EVAL.] - S: 0           | E: 0           | L: 23          | R: 24.000      | T: 0:00:02    
                                                +[08/04/2023 03:45:57 PM] - [TRAIN] - S: 1024        | E: 8           | L: 44          | R: 99.000      | FPS: 346.187   | T: 0:00:02    
                                                +[08/04/2023 03:45:58 PM] - [TRAIN] - S: 2048        | E: 16          | L: 58          | R: 207.000     | FPS: 514.168   | T: 0:00:03    
                                                +[08/04/2023 03:45:59 PM] - [TRAIN] - S: 3072        | E: 24          | L: 43          | R: 70.000      | FPS: 619.411   | T: 0:00:04    
                                                +[08/04/2023 03:46:00 PM] - [TRAIN] - S: 4096        | E: 32          | L: 43          | R: 67.000      | FPS: 695.523   | T: 0:00:05    
                                                +[08/04/2023 03:46:00 PM] - [INFO.] - Training Accomplished!
                                                +[08/04/2023 03:46:00 PM] - [INFO.] - Model saved at: /export/yuanmingqi/code/rllte/logs/ppo_atari/2023-08-04-03-45-54/model
                                                +

                                                +

                                                Suppose we want to use a ResNet-based encoder, it suffices to replace the encoder module using .set function: +

                                                train.py
                                                from rllte.agent import PPO
                                                +from rllte.env import make_atari_env
                                                +from rllte.xploit.encoder import EspeholtResidualEncoder
                                                +
                                                +if __name__ == "__main__":
                                                +    # env setup
                                                +    device = "cuda:0"
                                                +    env = make_atari_env(device=device)
                                                +    eval_env = make_atari_env(device=device)
                                                +    # create agent
                                                +    feature_dim = 512
                                                +    agent = PPO(env=env, 
                                                +                eval_env=eval_env, 
                                                +                device=device,
                                                +                tag="ppo_atari",
                                                +                feature_dim=feature_dim)
                                                +    # create a new encoder
                                                +    encoder = EspeholtResidualEncoder(
                                                +        observation_space=env.observation_space,
                                                +        feature_dim=feature_dim)
                                                +    # set the new encoder
                                                +    agent.set(encoder=encoder)
                                                +    # start training
                                                +    agent.train(num_train_steps=5000)
                                                +
                                                +Run train.py and you'll see the old MnihCnnEncoder has been replaced by EspeholtResidualEncoder: +
                                                [08/04/2023 03:46:38 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/04/2023 03:46:38 PM] - [INFO.] - ================================================================================
                                                +[08/04/2023 03:46:38 PM] - [INFO.] - Tag               : ppo_atari
                                                +[08/04/2023 03:46:38 PM] - [INFO.] - Device            : NVIDIA GeForce RTX 3090
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - Agent             : PPO
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - Encoder           : EspeholtResidualEncoder
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - Augmentation      : False
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - Intrinsic Reward  : False
                                                +[08/04/2023 03:46:38 PM] - [DEBUG] - ================================================================================
                                                +...
                                                +
                                                +For more replaceable modules, please refer to https://docs.rllte.dev/api/.

                                                +

                                                Using custom modules

                                                +

                                                Developers can also perform replacement using custom modules, see Make A Custom Module for more details.

                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + +
                                                + + + + +
                                                + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/verification/index.html b/verification/index.html new file mode 100644 index 00000000..6f024651 --- /dev/null +++ b/verification/index.html @@ -0,0 +1,4129 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Verification - CORE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                                + +
                                                + + + + + + +
                                                + + + + + + + +
                                                + +
                                                + + + + +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + + + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                +
                                                + + + +
                                                +
                                                + + + + + + + +

                                                Verification

                                                + +

                                                Software

                                                +

                                                To ensure that RLLTE is installed correctly, we can verify the installation by running a single training script: +

                                                python -m rllte.verification
                                                +
                                                +If successful, you will see the following output: +
                                                [08/03/2023 07:30:21 PM] - [INFO.] - Invoking RLLTE Engine...
                                                +[08/03/2023 07:30:21 PM] - [INFO.] - ================================================================================
                                                +[08/03/2023 07:30:21 PM] - [INFO.] - Tag               : verification
                                                +[08/03/2023 07:30:21 PM] - [INFO.] - Device            : CPU
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - Agent             : PPO
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - Encoder           : IdentityEncoder
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - Policy            : OnPolicySharedActorCritic
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - Storage           : VanillaRolloutStorage
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - Distribution      : Categorical
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - Augmentation      : False
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - Intrinsic Reward  : False
                                                +[08/03/2023 07:30:21 PM] - [DEBUG] - ================================================================================
                                                +[08/03/2023 07:30:22 PM] - [TRAIN] - S: 512         | E: 4           | L: 428         | R: -427.000    | FPS: 1457.513  | T: 0:00:00    
                                                +[08/03/2023 07:30:22 PM] - [TRAIN] - S: 640         | E: 5           | L: 428         | R: -427.000    | FPS: 1513.510  | T: 0:00:00    
                                                +[08/03/2023 07:30:22 PM] - [TRAIN] - S: 768         | E: 6           | L: 353         | R: -352.000    | FPS: 1551.423  | T: 0:00:00    
                                                +[08/03/2023 07:30:22 PM] - [TRAIN] - S: 896         | E: 7           | L: 353         | R: -352.000    | FPS: 1581.616  | T: 0:00:00    
                                                +[08/03/2023 07:30:22 PM] - [INFO.] - Training Accomplished!
                                                +[08/03/2023 07:30:22 PM] - [INFO.] - Model saved at: /export/yuanmingqi/code/rllte/logs/verification/2023-08-03-07-30-21/model
                                                +VERIFICATION PASSED!
                                                +

                                                +

                                                Hardware

                                                +

                                                Additionally, to check if your GPU driver and CUDA is enabled and accessible by PyTorch, run the following commands to return whether or not the CUDA driver is enabled: +

                                                import torch
                                                +torch.cuda.is_available()
                                                +

                                                +

                                                For HUAWEI NPU:

                                                +
                                                import torch
                                                +import torch_npu
                                                +torch.npu.is_available()
                                                +
                                                + + + + + + + + + + + + + +
                                                +
                                                + + + +
                                                + + + +
                                                + + + +
                                                +
                                                +
                                                +
                                                + + + + + + + + + + + + + + + + \ No newline at end of file