Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Minor] Some final adjustments for scheduling models #195

Merged
merged 4 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configs/experiment/scheduling/am-pomo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ model:
_target_: rl4co.models.L2DAttnPolicy
env_name: ${env.name}
scaling_factor: ${scaling_factor}
normalization: "batch"
batch_size: 64
num_starts: 10
num_augment: 0
Expand Down
8 changes: 1 addition & 7 deletions configs/experiment/scheduling/am-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,8 @@ model:
batch_size: 128
val_batch_size: 512
test_batch_size: 64
# Song et al use 1000 iterations over batches of 20 = 20_000
# We train 10 epochs on a set of 2000 instance = 20_000
train_data_size: 2000
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
8 changes: 5 additions & 3 deletions configs/experiment/scheduling/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,19 @@ trainer:

seed: 12345678

scaling_factor: 20
scaling_factor: ${env.generator_params.max_processing_time}

model:
_target_: ???
batch_size: ???
train_data_size: 2_000
val_data_size: 1_000
test_data_size: 1_000
test_data_size: 100
optimizer_kwargs:
lr: 1e-4
lr: 2e-4
weight_decay: 1e-6
lr_scheduler: "ExponentialLR"
lr_scheduler_kwargs:
gamma: 0.95
reward_scale: scale
max_grad_norm: 1
14 changes: 6 additions & 8 deletions configs/experiment/scheduling/gnn-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,22 @@ logger:
model:
_target_: rl4co.models.L2DPPOModel
policy_kwargs:
embed_dim: 128
embed_dim: 256
num_encoder_layers: 3
scaling_factor: ${scaling_factor}
max_grad_norm: 1
ppo_epochs: 3
ppo_epochs: 2
het_emb: False
normalization: instance
test_decode_type: greedy
batch_size: 128
val_batch_size: 512
test_batch_size: 64
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4


trainer:
max_epochs: 10


env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
1 change: 1 addition & 0 deletions configs/experiment/scheduling/hgnn-pomo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ model:
stepwise_encoding: False
scaling_factor: ${scaling_factor}
het_emb: True
normalization: instance
num_starts: 10
batch_size: 64
num_augment: 0
Expand Down
16 changes: 4 additions & 12 deletions configs/experiment/scheduling/hgnn-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,16 @@ logger:
model:
_target_: rl4co.models.L2DPPOModel
policy_kwargs:
embed_dim: 128
embed_dim: 256
num_encoder_layers: 3
scaling_factor: ${scaling_factor}
max_grad_norm: 1
ppo_epochs: 3
ppo_epochs: 2
het_emb: True
normalization: instance
batch_size: 128
val_batch_size: 512
test_batch_size: 64
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

trainer:
max_epochs: 10


env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
8 changes: 1 addition & 7 deletions configs/experiment/scheduling/matnet-ppo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,7 @@ model:
batch_size: 128
val_batch_size: 512
test_batch_size: 64
# Song et al use 1000 iterations over batches of 20 = 20_000
# We train 10 epochs on a set of 2000 instance = 20_000
mini_batch_size: 512
reward_scale: scale
optimizer_kwargs:
lr: 1e-4

env:
stepwise_reward: True
_torchrl_mode: True
stepwise_reward: True
376 changes: 283 additions & 93 deletions examples/other/2-scheduling.ipynb

Large diffs are not rendered by default.

37 changes: 28 additions & 9 deletions rl4co/envs/scheduling/fjsp/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,32 @@ def __init__(
else:
generator = FJSPGenerator(**generator_params)
self.generator = generator
self.num_mas = generator.num_mas
self.num_jobs = generator.num_jobs
self.n_ops_max = generator.max_ops_per_job * self.num_jobs
self._num_mas = generator.num_mas
self._num_jobs = generator.num_jobs
self._n_ops_max = generator.max_ops_per_job * self.num_jobs

self.mask_no_ops = mask_no_ops
self.check_mask = check_mask
self.stepwise_reward = stepwise_reward
self._make_spec(self.generator)

@property
def num_mas(self):
return self._num_mas

@property
def num_jobs(self):
return self._num_jobs

@property
def n_ops_max(self):
return self._n_ops_max

def set_instance_params(self, td):
self._num_jobs = td["start_op_per_job"].size(1)
self._num_mas = td["proc_times"].size(1)
self._n_ops_max = td["proc_times"].size(2)

def _decode_graph_structure(self, td: TensorDict):
batch_size = td.batch_size
start_op_per_job = td["start_op_per_job"]
Expand Down Expand Up @@ -142,6 +160,8 @@ def _decode_graph_structure(self, td: TensorDict):
return td, n_ops_max

def _reset(self, td: TensorDict = None, batch_size=None) -> TensorDict:
self.set_instance_params(td)

td_reset = td.clone()

td_reset, n_ops_max = self._decode_graph_structure(td_reset)
Expand Down Expand Up @@ -333,10 +353,10 @@ def _make_step(self, td: TensorDict) -> TensorDict:
td["ops_sequence_order"] - gather_by_index(td["job_ops_adj"], selected_job, 1)
).clip(0)
# some checks
assert torch.allclose(
td["proc_times"].sum(1).gt(0).sum(1), # num ops with eligible machine
(~(td["op_scheduled"] + td["pad_mask"])).sum(1), # num unscheduled ops
)
# assert torch.allclose(
# td["proc_times"].sum(1).gt(0).sum(1), # num ops with eligible machine
# (~(td["op_scheduled"] + td["pad_mask"])).sum(1), # num unscheduled ops
# )

return td

Expand Down Expand Up @@ -483,7 +503,6 @@ def get_num_starts(self, td):
# NOTE in the paper they use N_s = 100
return 100

@staticmethod
def load_data(fpath, batch_size=[]):
def load_data(self, fpath, batch_size=[]):
g = FJSPFileGenerator(fpath)
return g(batch_size=batch_size)
3 changes: 2 additions & 1 deletion rl4co/envs/scheduling/fjsp/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@


class FJSPGenerator(Generator):

"""Data generator for the Flexible Job-Shop Scheduling Problem (FJSP).

Args:
Expand Down Expand Up @@ -209,6 +208,8 @@ def __init__(self, file_path: str, n_ops_max: int = None, **unused_kwargs):
self.num_mas = num_machines
self.num_jobs = num_jobs
self.max_ops_per_job = max_ops_per_job
self.n_ops_max = max_ops_per_job * num_jobs

self.start_idx = 0

def _generate(self, batch_size: List[int]) -> TensorDict:
Expand Down
25 changes: 3 additions & 22 deletions rl4co/models/nn/env_embeddings/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@
mean_durations = proc_times.sum(1) / (proc_times.gt(0).sum(1) + 1e-9)
feats = [
mean_durations / self.scaling_factor,
# td["lbs"] / self.scaling_factor,
td["is_ready"],
td["num_eligible"],
td["ops_job_map"],
Expand All @@ -430,20 +431,10 @@

class FJSPInitEmbedding(JSSPInitEmbedding):
def __init__(self, embed_dim, linear_bias=False, scaling_factor: int = 100):
super().__init__(embed_dim, linear_bias, scaling_factor, num_op_feats=5)
super().__init__(embed_dim, linear_bias, scaling_factor)
self.init_ma_embed = nn.Linear(1, self.embed_dim, bias=linear_bias)
self.edge_embed = nn.Linear(1, embed_dim, bias=linear_bias)

def _op_features(self, td):
feats = [
td["lbs"] / self.scaling_factor,
td["is_ready"],
td["num_eligible"],
td["op_scheduled"],
td["ops_job_map"],
]
return torch.stack(feats, dim=-1)

def forward(self, td: TensorDict):
ops_emb = self._init_ops_embed(td)
ma_emb = self._init_machine_embed(td)
Expand Down Expand Up @@ -471,19 +462,9 @@
linear_bias: bool = False,
scaling_factor: int = 1000,
):
super().__init__(embed_dim, linear_bias, scaling_factor, num_op_feats=5)
super().__init__(embed_dim, linear_bias, scaling_factor)

Check warning on line 465 in rl4co/models/nn/env_embeddings/init.py

View check run for this annotation

Codecov / codecov/patch

rl4co/models/nn/env_embeddings/init.py#L465

Added line #L465 was not covered by tests
self.init_ma_embed = nn.Linear(1, self.embed_dim, bias=linear_bias)

def _op_features(self, td):
feats = [
td["lbs"] / self.scaling_factor,
td["is_ready"],
td["op_scheduled"],
td["num_eligible"],
td["ops_job_map"],
]
return torch.stack(feats, dim=-1)

def _init_machine_embed(self, td: TensorDict):
busy_for = (td["busy_until"] - td["time"].unsqueeze(1)) / self.scaling_factor
ma_embeddings = self.init_ma_embed(busy_for.unsqueeze(2))
Expand Down
2 changes: 2 additions & 0 deletions rl4co/models/rl/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
def __call__(self, scores: torch.Tensor):
if self.scale is None:
return scores
elif isinstance(self.scale, int):
return scores / self.scale

Check warning on line 24 in rl4co/models/rl/common/utils.py

View check run for this annotation

Codecov / codecov/patch

rl4co/models/rl/common/utils.py#L23-L24

Added lines #L23 - L24 were not covered by tests
# Score scaling
self.update(scores)
tensor_to_kwargs = dict(dtype=scores.dtype, device=scores.device)
Expand Down
21 changes: 12 additions & 9 deletions rl4co/models/rl/ppo/stepwise_ppo.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import copy

from typing import Any
from typing import Any, Union

import torch
import torch.nn as nn
import torch.nn.functional as F

from torchrl.data.replay_buffers import (
LazyTensorStorage,
LazyMemmapStorage,
ListStorage,
SamplerWithoutReplacement,
TensorDictReplayBuffer,
Expand All @@ -23,13 +23,17 @@

def make_replay_buffer(buffer_size, batch_size, device="cpu"):
if device == "cpu":
storage = LazyTensorStorage(buffer_size, device="cpu")
storage = LazyMemmapStorage(buffer_size, device="cpu")
prefetch = 3

Check warning on line 27 in rl4co/models/rl/ppo/stepwise_ppo.py

View check run for this annotation

Codecov / codecov/patch

rl4co/models/rl/ppo/stepwise_ppo.py#L26-L27

Added lines #L26 - L27 were not covered by tests
else:
storage = ListStorage(buffer_size)
prefetch = None
return TensorDictReplayBuffer(
storage=storage,
batch_size=batch_size,
sampler=SamplerWithoutReplacement(drop_last=True),
pin_memory=False,
prefetch=prefetch,
)


Expand All @@ -51,7 +55,7 @@
metrics: dict = {
"train": ["loss", "surrogate_loss", "value_loss", "entropy"],
},
reward_scale: str = None,
reward_scale: Union[str, int] = None,
**kwargs,
):
super().__init__(env, policy, metrics=metrics, batch_size=batch_size, **kwargs)
Expand Down Expand Up @@ -143,13 +147,12 @@
while not next_td["done"].all():
with torch.no_grad():
td = self.policy_old.act(next_td, self.env, phase="train")

assert self.env._torchrl_mode, "Use torchrl mode in stepwise PPO"
td = self.env.step(td)
next_td = td.pop("next")
# get next state
next_td = self.env.step(td)["next"]
# get reward of action
reward = self.env.get_reward(next_td, None)
reward = self.scaler(reward)

# add reward to prior state
td.set("reward", reward)
# add tensordict with action, logprobs and reward information to buffer
self.rb.extend(td)
Expand Down
1 change: 0 additions & 1 deletion rl4co/models/zoo/l2d/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ def __init__(
actor_hidden_dim: int = 128,
actor_hidden_layers: int = 2,
num_encoder_layers: int = 3,
num_heads: int = 8,
normalization: str = "batch",
het_emb: bool = False,
stepwise: bool = False,
Expand Down
5 changes: 4 additions & 1 deletion rl4co/models/zoo/l2d/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
env_name: str = "fjsp",
het_emb: bool = True,
scaling_factor: int = 1000,
normalization: str = "batch",
init_embedding: Optional[nn.Module] = None,
stepwise_encoding: bool = False,
tanh_clipping: float = 10,
Expand Down Expand Up @@ -77,6 +78,7 @@ def __init__(
het_emb=het_emb,
stepwise=stepwise_encoding,
scaling_factor=scaling_factor,
normalization=normalization,
)

# Pass to constructive policy
Expand All @@ -101,6 +103,7 @@ def __init__(
num_heads: int = 8,
num_encoder_layers: int = 4,
scaling_factor: int = 1000,
normalization: str = "batch",
env_name: str = "fjsp",
init_embedding: Optional[nn.Module] = None,
tanh_clipping: float = 10,
Expand All @@ -122,7 +125,7 @@ def __init__(
embed_dim=embed_dim,
num_heads=num_heads,
num_layers=num_encoder_layers,
normalization="batch",
normalization=normalization,
feedforward_hidden=embed_dim * 2,
init_embedding=init_embedding,
)
Expand Down
Loading