Norod · April 16, 2023 08:08
diff --git a/text-to-video-modelscope.py b/text-to-video-modelscope.py
 # Ran it with the following packages installed:
 # accelerate                   0.18.0
 # diffusers                    0.16.0.dev0
 # torch                        2.0.0+cu118
 # torchvision                  0.15.0+cu118
 # transformers                 4.28.1
 # xformers                     0.0.18

 import torch
 from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
 from diffusers.utils import export_to_video

 # load pipeline
 pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
 pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

 # optimize for GPU memory
 pipe.enable_model_cpu_offload()
 pipe.enable_vae_slicing()

 # generate
 prompt = "Alien rave party with dancing aliens and a DJ alien playing music. The aliens are wearing colorful clothes and are having a good time. The aliens are having a good time at the rave"
 neg_prompt = "text, watermark, grafitti, blurry" 
 infer_steps = 50
 num_frames = 40
 seed=42
 guidance = 16
 width = 256
 height = 256

 output_video_path = prompt.replace(" ","_") + f'_seed{seed}_steps{infer_steps}_frames{num_frames}_guidance{guidance}_{width}x{height}.mp4'

 if torch.cuda.is_available():
    generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
 else:
    if seed != 0:      
        generator = torch.Generator()
        generator.manual_seed(seed)
    else:
        generator = None

 video_frames = pipe(prompt, 
                    num_inference_steps=infer_steps, 
                    num_frames=num_frames, 
                    negative_prompt=neg_prompt, 
                    guidance_scale = guidance,
                    width = width,
                    height = height,
                    generator = generator).frames

 # convent to video
 video_path = export_to_video(video_frames)
 print(f'video_path: {video_path}')
 print(f'Move to output_video_path: {output_video_path}')


 #copy from video_path to output_video_path
 import shutil
 shutil.move(video_path, output_video_path)
	# Ran it with the following packages installed:
	# accelerate 0.18.0
	# diffusers 0.16.0.dev0
	# torch 2.0.0+cu118
	# torchvision 0.15.0+cu118
	# transformers 4.28.1
	# xformers 0.0.18

	import torch
	from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	from diffusers.utils import export_to_video

	# load pipeline
	pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
	pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

	# optimize for GPU memory
	pipe.enable_model_cpu_offload()
	pipe.enable_vae_slicing()

	# generate
	prompt = "Alien rave party with dancing aliens and a DJ alien playing music. The aliens are wearing colorful clothes and are having a good time. The aliens are having a good time at the rave"
	neg_prompt = "text, watermark, grafitti, blurry"
	infer_steps = 50
	num_frames = 40
	seed=42
	guidance = 16
	width = 256
	height = 256

	output_video_path = prompt.replace(" ","_") + f'_seed{seed}_steps{infer_steps}_frames{num_frames}_guidance{guidance}_{width}x{height}.mp4'

	if torch.cuda.is_available():
	generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
	else:
	if seed != 0:
	generator = torch.Generator()
	generator.manual_seed(seed)
	else:
	generator = None

	video_frames = pipe(prompt,
	num_inference_steps=infer_steps,
	num_frames=num_frames,
	negative_prompt=neg_prompt,
	guidance_scale = guidance,
	width = width,
	height = height,
	generator = generator).frames

	# convent to video
	video_path = export_to_video(video_frames)
	print(f'video_path: {video_path}')
	print(f'Move to output_video_path: {output_video_path}')


	#copy from video_path to output_video_path
	import shutil
	shutil.move(video_path, output_video_path)