Skip to content

Commit

Permalink
Revert "Improving Hydra+DDP support (#11617)"
Browse files Browse the repository at this point in the history
This reverts commit 45ca781.
  • Loading branch information
awaelchli committed Nov 18, 2022
1 parent 609b258 commit eaed66d
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 171 deletions.
45 changes: 35 additions & 10 deletions src/pytorch_lightning/strategies/launchers/subprocess_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import pytorch_lightning as pl
from lightning_lite.plugins import ClusterEnvironment
from lightning_lite.strategies.launchers.base import _Launcher
from lightning_lite.strategies.launchers.subprocess_script import _basic_subprocess_cmd, _hydra_subprocess_cmd

_HYDRA_AVAILABLE = RequirementCache("hydra-core")

Expand Down Expand Up @@ -100,6 +99,32 @@ def _call_children_scripts(self) -> None:
# allow the user to pass the node rank
os.environ["NODE_RANK"] = str(self.cluster_environment.node_rank())
os.environ["LOCAL_RANK"] = str(self.cluster_environment.local_rank())

# Check if the current calling command looked like `python a/b/c.py` or `python -m a.b.c`
# See https://docs.python.org/3/reference/import.html#main-spec
if __main__.__spec__ is None: # pragma: no-cover
# Script called as `python a/b/c.py`
if _HYDRA_AVAILABLE:
# when user is using hydra find the absolute path
from hydra.utils import to_absolute_path

to_abs_path = to_absolute_path
else:
to_abs_path = os.path.abspath

# pull out the commands used to run the script and resolve the absolute file path
command = sys.argv
try:
full_path = to_abs_path(command[0])
except Exception:
full_path = os.path.abspath(command[0])

command[0] = full_path
# use the same python interpreter and actually running
command = [sys.executable] + command
else: # Script called as `python -m a.b.c`
command = [sys.executable, "-m", __main__.__spec__.name] + sys.argv[1:]

os.environ["WORLD_SIZE"] = f"{self.num_processes * self.num_nodes}"

for local_rank in range(1, self.num_processes):
Expand All @@ -110,18 +135,18 @@ def _call_children_scripts(self) -> None:
if os.environ.get("PL_GLOBAL_SEED") is None and "PL_GLOBAL_SEED" in env_copy:
del env_copy["PL_GLOBAL_SEED"]

hydra_in_use = False
# start process
# if hydra is available and initialized, make sure to set the cwd correctly
cwd: Optional[str] = None
if _HYDRA_AVAILABLE:
from hydra.core.hydra_config import HydraConfig
from hydra.utils import get_original_cwd

hydra_in_use = HydraConfig.initialized()

if hydra_in_use:
command = _hydra_subprocess_cmd(local_rank)
else:
command = _basic_subprocess_cmd()

subprocess.Popen(command, env=env_copy)
if HydraConfig.initialized():
cwd = get_original_cwd()
os_cwd = f'"{os.getcwd()}"'
command += [f"hydra.run.dir={os_cwd}", f"hydra.job.name=train_ddp_process_{local_rank}"]
subprocess.Popen(command, env=env_copy, cwd=cwd)

# starting all processes at once can cause issues
# with dataloaders delay between 1-10 seconds
Expand Down
161 changes: 0 additions & 161 deletions tests/tests_pytorch/strategies/launchers/test_subprocess_script.py

This file was deleted.

0 comments on commit eaed66d

Please sign in to comment.