diff --git a/basic_demo/cli_demo_hf.py b/basic_demo/cli_demo_hf.py index 4777ceeb..ad8d9caf 100644 --- a/basic_demo/cli_demo_hf.py +++ b/basic_demo/cli_demo_hf.py @@ -10,26 +10,44 @@ import torch from PIL import Image from transformers import AutoModelForCausalLM, LlamaTokenizer +import argparse -MODEL_PATH = 'your path of CogAgent or CogVLM' -TOKENIZER_PATH = 'your path of vicuna' +parser = argparse.ArgumentParser() +parser.add_argument("--quant", choices=[4], type=int, default=None, help='quantization bits') +parser.add_argument("--from_pretrained", type=str, default="THUDM/cogagent-chat-hf", help='pretrained ckpt') # TODO +parser.add_argument("--local_tokenizer", type=str, default="lmsys/vicuna-7b-v1.5", help='tokenizer path') #TODO +parser.add_argument("--fp16", action="store_true") +parser.add_argument("--bf16", action="store_true") + +args = parser.parse_args() +MODEL_PATH = args.from_pretrained +TOKENIZER_PATH = args.local_tokenizer DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' tokenizer = LlamaTokenizer.from_pretrained(TOKENIZER_PATH) -if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8: - torch_type = torch.float16 +if args.bf16: + torch_type = torch.bfloat16 else: torch_type = torch.float16 - warnings.warn("Your GPU does not support bfloat16 type, use fp16 instead") print("========Use torch type as:{} with device:{}========\n\n".format(torch_type, DEVICE)) -model = AutoModelForCausalLM.from_pretrained( - MODEL_PATH, - torch_dtype=torch_type, - low_cpu_mem_usage=True, - trust_remote_code=True -).to(DEVICE).eval() +if args.quant: + model = AutoModelForCausalLM.from_pretrained( + MODEL_PATH, + torch_dtype=torch_type, + low_cpu_mem_usage=True, + load_in_4bit=True, + trust_remote_code=True + ).eval() +else: + model = AutoModelForCausalLM.from_pretrained( + MODEL_PATH, + torch_dtype=torch_type, + low_cpu_mem_usage=True, + load_in_4bit=args.quant is not None, + trust_remote_code=True + ).to(DEVICE).eval() while True: image_path = input("image path >>>>> ") diff --git a/basic_demo/cli_demo_sat.py b/basic_demo/cli_demo_sat.py index 9efb5d63..a3a44f3d 100644 --- a/basic_demo/cli_demo_sat.py +++ b/basic_demo/cli_demo_sat.py @@ -23,7 +23,7 @@ def main(): parser.add_argument("--quant", choices=[8, 4], type=int, default=None, help='quantization bits') parser.add_argument("--from_pretrained", type=str, default="cogagent-chat", help='pretrained ckpt') # TODO - parser.add_argument("--local_tokenizer", type=str, default="/share/official_pretrains/hf_home/vicuna-7b-v1.5", help='tokenizer path') #TODO + parser.add_argument("--local_tokenizer", type=str, default="lmsys/vicuna-7b-v1.5", help='tokenizer path') #TODO parser.add_argument("--fp16", action="store_true") parser.add_argument("--bf16", action="store_true") parser.add_argument("--stream_chat", action="store_true") diff --git a/finetune_demo/evaluate_cogvlm.sh b/finetune_demo/evaluate_cogvlm.sh index 5b5bf2ca..82bf080c 100644 --- a/finetune_demo/evaluate_cogvlm.sh +++ b/finetune_demo/evaluate_cogvlm.sh @@ -10,7 +10,7 @@ script_dir=$(dirname $script_path) main_dir=$(dirname $script_dir) MODEL_TYPE="cogvlm-base-490" VERSION="base" -MODEL_ARGS="--from_pretrained ./checkpoints/merged_lora \ +MODEL_ARGS="--from_pretrained ./checkpoints/merged_lora_490 \ --max_length 1288 \ --lora_rank 10 \ --use_lora \ @@ -52,7 +52,7 @@ gpt_options=" \ -run_cmd="${OPTIONS_NCCL} ${OPTIONS_SAT} deepspeed --master_port 16666 --hostfile ${HOST_FILE_PATH} evaluate_demo.py ${gpt_options}" +run_cmd="${OPTIONS_NCCL} ${OPTIONS_SAT} deepspeed --master_port 16666 --hostfile ${HOST_FILE_PATH} evaluate_cogvlm_demo.py ${gpt_options}" echo ${run_cmd} eval ${run_cmd} diff --git a/finetune_demo/finetune_cogvlm_lora.sh b/finetune_demo/finetune_cogvlm_lora.sh index 7f1eaa44..7fad17f5 100644 --- a/finetune_demo/finetune_cogvlm_lora.sh +++ b/finetune_demo/finetune_cogvlm_lora.sh @@ -52,7 +52,7 @@ gpt_options=" \ -run_cmd="${OPTIONS_NCCL} ${OPTIONS_SAT} deepspeed --master_port 16666 --hostfile ${HOST_FILE_PATH} finetune_demo.py ${gpt_options}" +run_cmd="${OPTIONS_NCCL} ${OPTIONS_SAT} deepspeed --master_port 16666 --hostfile ${HOST_FILE_PATH} finetune_cogvlm_demo.py ${gpt_options}" echo ${run_cmd} eval ${run_cmd}