Skip to content

Commit

Permalink
fix: empty result cause undefine variable
Browse files Browse the repository at this point in the history
  • Loading branch information
aiwantaozi authored and gitlawr committed Dec 12, 2024
1 parent 5a6e944 commit cea7999
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 33 deletions.
11 changes: 6 additions & 5 deletions gpustack/detectors/fastfetch/fastfetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,23 +222,24 @@ def _decode_gpu_devices(self, result: str) -> GPUDevicesInfo:
return devices

def _run_command(self, command, parse_output=True):
result = None
try:
result = subprocess.run(
command, capture_output=True, text=True, check=True, encoding="utf-8"
)
output = result.stdout

if result.returncode != 0:
raise Exception(f"Unexpected return code: {result.returncode}")

output = result.stdout
if output == "" or output is None:
raise Exception(f"Output is empty, return code: {result.returncode}")

except Exception as e:
raise Exception(
f"Failed to execute {command}: {e},"
f" stdout: {result.stdout}, stderr: {result.stderr}"
)
error_message = f"Failed to execute {command}: {e}"
if result:
error_message += f", stdout: {result.stdout}, stderr: {result.stderr}"
raise Exception(error_message)

if not parse_output:
return output
Expand Down
11 changes: 6 additions & 5 deletions gpustack/detectors/npu_smi/npu_smi.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,24 +185,25 @@ def decode_gpu_device_mapping(self, result: str) -> Dict[tuple[int], int]:
return mapping

def _run_command(self, command):
result = None
try:
result = subprocess.run(
command, capture_output=True, text=True, check=True, encoding="utf-8"
)
output = result.stdout

if result.returncode != 0:
raise Exception(f"Unexpected return code: {result.returncode}")

output = result.stdout
if output == "" or output is None:
raise Exception(f"Output is empty, return code: {result.returncode}")

return output
except Exception as e:
raise Exception(
f"Failed to execute {command}: {e},"
f" stdout: {result.stdout}, stderr: {result.stderr}"
)
error_message = f"Failed to execute {command}: {e}"
if result:
error_message += f", stdout: {result.stdout}, stderr: {result.stderr}"
raise Exception(error_message)

def _command_gather_gpu(self):
executable_command = [
Expand Down
11 changes: 6 additions & 5 deletions gpustack/detectors/nvidia_smi/nvidia_smi.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,24 +65,25 @@ def decode_gpu_devices(self, result) -> GPUDevicesInfo: # noqa: C901
return devices

def _run_command(self, command):
result = None
try:
result = subprocess.run(
command, capture_output=True, text=True, check=True, encoding="utf-8"
)
output = result.stdout

if result.returncode != 0:
raise Exception(f"Unexpected return code: {result.returncode}")

output = result.stdout
if output == "" or output is None:
raise Exception(f"Output is empty, return code: {result.returncode}")

return output
except Exception as e:
raise Exception(
f"Failed to execute {command}: {e},"
f" stdout: {result.stdout}, stderr: {result.stderr}"
)
error_message = f"Failed to execute {command}: {e}"
if result:
error_message += f", stdout: {result.stdout}, stderr: {result.stderr}"
raise Exception(error_message)

def _command_gather_gpu(self):
executable_command = [
Expand Down
46 changes: 28 additions & 18 deletions gpustack/worker/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,25 @@ def __init__(

def collect(self) -> Worker: # noqa: C901
"""Collect worker status information."""
status = WorkerStatus()

system_info = self._detector_factory.detect_system_info()
gpu_devices = self._detector_factory.detect_gpus()

status = WorkerStatus(
gpu_devices=gpu_devices,
cpu=system_info.cpu,
memory=system_info.memory,
swap=system_info.swap,
filesystem=system_info.filesystem,
os=system_info.os,
kernel=system_info.kernel,
uptime=system_info.uptime,
)
try:
system_info = self._detector_factory.detect_system_info()
status.cpu = system_info.cpu
status.memory = system_info.memory
status.swap = system_info.swap
status.filesystem = system_info.filesystem
status.os = system_info.os
status.kernel = system_info.kernel
status.uptime = system_info.uptime
except Exception as e:
logger.error(f"Failed to detect system info: {e}")

try:
gpu_devices = self._detector_factory.detect_gpus()
status.gpu_devices = gpu_devices
except Exception as e:
logger.error(f"Failed to detect GPU devices: {e}")

self._inject_unified_memory(status)
self._inject_computed_filesystem_usage(status)
Expand Down Expand Up @@ -84,7 +89,8 @@ def _inject_unified_memory(self, status: WorkerStatus):
if status.gpu_devices is not None and len(status.gpu_devices) != 0:
is_unified_memory = status.gpu_devices[0].memory.is_unified_memory

status.memory.is_unified_memory = is_unified_memory
if status.memory is not None:
status.memory.is_unified_memory = is_unified_memory

def _inject_computed_filesystem_usage(self, status: WorkerStatus):
if (
Expand Down Expand Up @@ -114,7 +120,9 @@ def _inject_computed_filesystem_usage(self, status: WorkerStatus):
except Exception as e:
logger.error(f"Failed to inject filesystem usage: {e}")

def _inject_allocated_resource(self, status: WorkerStatus) -> Allocated:
def _inject_allocated_resource( # noqa: C901
self, status: WorkerStatus
) -> Allocated:
allocated = Allocated(ram=0, vram={})
try:
model_instances = self._clientset.model_instances.list()
Expand All @@ -136,8 +144,10 @@ def _inject_allocated_resource(self, status: WorkerStatus) -> Allocated:
) + (vram.get(gpu_index) or 0)

# inject allocated resources
status.memory.allocated = allocated.ram
for ag, agv in allocated.vram.items():
status.gpu_devices[ag].memory.allocated = agv
if status.memory is not None:
status.memory.allocated = allocated.ram
if status.gpu_devices is not None:
for ag, agv in allocated.vram.items():
status.gpu_devices[ag].memory.allocated = agv
except Exception as e:
logger.error(f"Failed to inject allocated resources: {e}")

0 comments on commit cea7999

Please sign in to comment.