brando90 · December 16, 2024 21:44
diff --git a/multiple_gpus_1_file.py b/multiple_gpus_1_file.py
 def main():
    import os
    import sys
    import socket
    print(sys.executable)
    if socket.gethostname() == 'skampere1':
        print('Hardcoding the path since we are in skampere')
        sys.path = ['', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python311.zip', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/lib-dynload', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/site-packages', '/afs/cs.stanford.edu/u/brando9/beyond-scale-2-alignment-coeff/py_src', '/afs/cs.stanford.edu/u/brando9/ultimate-utils/py_src']
        print(f'{sys.path=}')

    # This example demonstrates running separate scripts for each GPU.

    # llm_gpu0.py
    import os
    from vllm import LLM
    # model: str = 'deepseek-ai/deepseek-math-7b-instruct'
    model: str = 'gpt2'

    print('allocating model 1 gpu1') 
    os.environ["CUDA_VISIBLE_DEVICES"] = "2"
    llm1 = LLM(model=model)
    # prompt = "Hello from GPU 0"
    # output = llm.generate([prompt])
    # print(output[0].outputs[0].text)

    print('allocating model 2 gpu2') 
    os.environ["CUDA_VISIBLE_DEVICES"] = "5"
    llm2 = LLM(model=model)
    # prompt = "Hello from GPU 1"
    # output = llm.generate([prompt])
    # print(output[0].outputs[0].text)
    print('about to generate with both...')
    while True:
        prompt = "Hello from GPU 1"
        output = llm1.generate([prompt])
        print(output[0].outputs[0].text)
        prompt = "Hello from GPU 2"
        output = llm2.generate([prompt])
        print(output[0].outputs[0].text)


 if __name__ == '__main__':
    import fire
    import time
    start = time.time()
    fire.Fire(main)
    print(f"Done!\a Time: {time.time()-start:.2f} sec, {(time.time()-start)/60:.2f} min, {(time.time()-start)/3600:.2f} hr\a")
    ```
	def main():
	import os
	import sys
	import socket
	print(sys.executable)
	if socket.gethostname() == 'skampere1':
	print('Hardcoding the path since we are in skampere')
	sys.path = ['', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python311.zip', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/lib-dynload', '/lfs/skampere1/0/brando9/miniconda/envs/beyond_scale_2/lib/python3.11/site-packages', '/afs/cs.stanford.edu/u/brando9/beyond-scale-2-alignment-coeff/py_src', '/afs/cs.stanford.edu/u/brando9/ultimate-utils/py_src']
	print(f'{sys.path=}')

	# This example demonstrates running separate scripts for each GPU.

	# llm_gpu0.py
	import os
	from vllm import LLM
	# model: str = 'deepseek-ai/deepseek-math-7b-instruct'
	model: str = 'gpt2'

	print('allocating model 1 gpu1')
	os.environ["CUDA_VISIBLE_DEVICES"] = "2"
	llm1 = LLM(model=model)
	# prompt = "Hello from GPU 0"
	# output = llm.generate([prompt])
	# print(output[0].outputs[0].text)

	print('allocating model 2 gpu2')
	os.environ["CUDA_VISIBLE_DEVICES"] = "5"
	llm2 = LLM(model=model)
	# prompt = "Hello from GPU 1"
	# output = llm.generate([prompt])
	# print(output[0].outputs[0].text)
	print('about to generate with both...')
	while True:
	prompt = "Hello from GPU 1"
	output = llm1.generate([prompt])
	print(output[0].outputs[0].text)
	prompt = "Hello from GPU 2"
	output = llm2.generate([prompt])
	print(output[0].outputs[0].text)


	if __name__ == '__main__':
	import fire
	import time
	start = time.time()
	fire.Fire(main)
	print(f"Done!\a Time: {time.time()-start:.2f} sec, {(time.time()-start)/60:.2f} min, {(time.time()-start)/3600:.2f} hr\a")
	```