Skip to content

Commit

Permalink
[DevOps] update devops files.
Browse files Browse the repository at this point in the history
  • Loading branch information
fedml-alex committed Nov 24, 2023
1 parent 6112691 commit c3c4165
Show file tree
Hide file tree
Showing 9 changed files with 16 additions and 35 deletions.
7 changes: 6 additions & 1 deletion devops/dockerfile/device-image/Dockerfile-Base
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ ADD ./devops/scripts/setup-aws-cli.sh ./fedml/setup-aws-cli.sh
ADD ./devops/scripts/set-aws-credentials.sh ./fedml/set-aws-credentials.sh
ADD ./devops/scripts/docker /usr/bin/
ADD ./devops/scripts/kubectl /usr/bin/
ADD ./devops/scripts/start-redis.sh ./fedml/start-redis.sh

ADD ./devops/scripts/requirements.txt ./fedml/requirements.txt
ADD ./devops/scripts/setup-conda-env.sh ./fedml/setup-conda-env.sh
Expand All @@ -19,7 +20,11 @@ RUN chmod a+x ./fedml/setup-aws-cli.sh
RUN chmod a+x ./fedml/set-aws-credentials.sh
RUN ./fedml/setup-aws-cli.sh

RUN apt-get update
RUN curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg
RUN echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list
RUN apt-get -y update
RUN apt-get install -y redis

#RUN apt-get -y install gettext-base
#RUN apt-get -y install unar wget
#RUN apt-get -y install libquadmath0
Expand Down
6 changes: 1 addition & 5 deletions devops/dockerfile/device-image/Dockerfile-Dev
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,10 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[mxnet]'
RUN pip3 install MNN==1.1.6

RUN curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg; \
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list; \
apt-get -y update; apt-get install -y redis

WORKDIR /fedml

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_DEVICE_ID=0 \
FEDML_PACKAGE_NAME=package FEDML_PACKAGE_URL=s3_url \
FEDML_RUNNER_CMD=3dsad

CMD nohup redis-server &;python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}; ./runner.sh
CMD bash ./start-redis.sh; python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}; bash ./runner.sh
6 changes: 1 addition & 5 deletions devops/dockerfile/device-image/Dockerfile-Release
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,10 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[mxnet]'
RUN pip3 install MNN==1.1.6

RUN curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg; \
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list; \
apt-get -y update; apt-get install -y redis

WORKDIR /fedml

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_DEVICE_ID=0 \
FEDML_PACKAGE_NAME=package FEDML_PACKAGE_URL=s3_url \
FEDML_RUNNER_CMD=3dsad

CMD nohup redis-server &;python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}; ./runner.sh
CMD bash ./start-redis.sh; python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}; bash ./runner.sh
6 changes: 1 addition & 5 deletions devops/dockerfile/device-image/Dockerfile-Test
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,10 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[mxnet]'
RUN pip3 install MNN==1.1.6

RUN curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg; \
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list; \
apt-get -y update; apt-get install -y redis

WORKDIR /fedml

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_DEVICE_ID=0 \
FEDML_PACKAGE_NAME=package FEDML_PACKAGE_URL=s3_url \
FEDML_RUNNER_CMD=3dsad

CMD python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}&& ./runner.sh;
CMD bash ./start-redis.sh; python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_server -rc ${FEDML_RUNNER_CMD} -id ${SERVER_DEVICE_ID}; bash ./runner.sh;
8 changes: 2 additions & 6 deletions devops/dockerfile/server-agent/Dockerfile-Dev
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG VERSION=dev
ARG IS_BUILDING_GPU_IMAGE=0
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
ARG BASE_IMAGE=docker.io/fedml/fedml-device-image:base
FROM ${BASE_IMAGE}

ADD ./devops/scripts/runner.sh ./fedml/runner.sh
Expand All @@ -19,15 +19,11 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'

RUN curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg; \
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list; \
apt-get -y update; apt-get install -y redis

WORKDIR /fedml

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_AGENT_ID=0 \
AWS_IAM_ACCESS_ID=0 \
AWS_IAM_ACCESS_KEY=0 \
AWS_REGION=0

CMD nohup redis-server &;./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_agent -id ${SERVER_AGENT_ID};./runner.sh
CMD bash ./start-redis.sh; ./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_agent -id ${SERVER_AGENT_ID};bash ./runner.sh
8 changes: 2 additions & 6 deletions devops/dockerfile/server-agent/Dockerfile-Release
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG VERSION=release
ARG IS_BUILDING_GPU_IMAGE=0
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
ARG BASE_IMAGE=docker.io/fedml/fedml-device-image:base
FROM ${BASE_IMAGE}

ADD ./devops/scripts/runner.sh ./fedml/runner.sh
Expand All @@ -19,15 +19,11 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'

RUN curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg; \
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list; \
apt-get -y update; apt-get install -y redis

WORKDIR /fedml

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_AGENT_ID=0 \
AWS_IAM_ACCESS_ID=0 \
AWS_IAM_ACCESS_KEY=0 \
AWS_REGION=0

CMD nohup redis-server &;./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_agent -id ${SERVER_AGENT_ID};./runner.sh
CMD bash ./start-redis.sh; ./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_agent -id ${SERVER_AGENT_ID}; bash ./runner.sh
8 changes: 2 additions & 6 deletions devops/dockerfile/server-agent/Dockerfile-Test
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG VERSION=test
ARG IS_BUILDING_GPU_IMAGE=0
ARG BASE_IMAGE=public.ecr.aws/x6k8q1x9/fedml-device-image:base
ARG BASE_IMAGE=docker.io/fedml/fedml-device-image:base
FROM ${BASE_IMAGE}

ADD ./devops/scripts/runner.sh ./fedml/runner.sh
Expand All @@ -19,15 +19,11 @@ RUN pip3 install -e ./
#RUN pip3 install -e '.[jax]'
#RUN pip3 install -e '.[mxnet]'

RUN curl -fsSL https://packages.redis.io/gpg | sudo gpg --dearmor -o /usr/share/keyrings/redis-archive-keyring.gpg; \
echo "deb [signed-by=/usr/share/keyrings/redis-archive-keyring.gpg] https://packages.redis.io/deb $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/redis.list; \
apt-get -y update; apt-get install -y redis

WORKDIR /fedml

ENV MODE=normal FEDML_VERSION=${VERSION} ACCOUNT_ID=0 SERVER_AGENT_ID=0 \
AWS_IAM_ACCESS_ID=0 \
AWS_IAM_ACCESS_KEY=0 \
AWS_REGION=0

CMD nohup redis-server &;./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_agent -id ${SERVER_AGENT_ID};./runner.sh
CMD bash ./start-redis.sh; ./set-aws-credentials.sh ${AWS_IAM_ACCESS_ID} ${AWS_IAM_ACCESS_KEY} ${AWS_REGION};python3 ./fedml-pip/fedml/computing/scheduler/master/server_daemon.py -t login -u ${ACCOUNT_ID} -v ${FEDML_VERSION} -r cloud_agent -id ${SERVER_AGENT_ID}; bash ./runner.sh
1 change: 1 addition & 0 deletions devops/scripts/start-redis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nohup redis-server&
1 change: 0 additions & 1 deletion python/fedml/computing/scheduler/comm_utils/job_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ def sync_run_process_gpu(self):
if all_run_processes_exited:
self.release_gpu_ids(job.job_id, job.edge_id)
except Exception as e:
raise e
logging.info(f"Exception when syncing run process.{traceback.format_exc()}")
pass

Expand Down

0 comments on commit c3c4165

Please sign in to comment.