hathibelagal-dev · May 8, 2025 02:01
diff --git a/ltx-video.ipynb b/ltx-video.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyMNbc/moos/0oanb15U6unK",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/hathibelagal-dev/8a03f9e08b5ca6a64d6de2d301af7856/ltx-video.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "First enter the prompt (and negative prompt) and run all to generate the prompt embeds. Then restart the notebook and run all again to generate the video.\n",
        "\n",
        "If you want to change the prompt, delete all the .pt files, change the prompt, and restart and run all to generate new prompt embeds.\n",
        "\n",
        "This lets you stay in the limits of the free tier.\n",
        "\n",
        "*-- x.com/hathibel*"
      ],
      "metadata": {
        "id": "fMPlkiv6i9_c"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import BitsAndBytesConfig\n",
        "from accelerate import Accelerator\n",
        "from transformers import T5EncoderModel, T5TokenizerFast\n",
        "from diffusers import LTXPipeline\n",
        "from diffusers.utils import export_to_video\n",
        "import torch\n",
        "import os"
      ],
      "metadata": {
        "id": "hwhNsdKBIsbD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "torch_dtype = torch.bfloat16\n",
        "create_encoder = True"
      ],
      "metadata": {
        "id": "mTOvN16HeVfl"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "if os.path.exists(\"prompt_embeds.pt\"):\n",
        "  create_encoder = False"
      ],
      "metadata": {
        "id": "Kc1szxY88ywX"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "prompt = \"A young woman with wavy, shoulder-length light brown hair is jogging outdoors on a foggy day. She wears a cozy pink turtleneck sweater, with a serene expression and piercing blue eyes. A wooden fence and a misty, grassy field fade into the background, evoking a calm and introspective mood.\"\n",
        "negative_prompt = \"worst quality, inconsistent motion, blurry, jittery, distorted\""
      ],
      "metadata": {
        "id": "1qwPdIfIdVlw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
      ],
      "metadata": {
        "id": "aOe6gqpA972L"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "if create_encoder:\n",
        "  encoder = T5EncoderModel.from_pretrained(\n",
        "      \"Lightricks/LTX-Video\", subfolder=\"text_encoder\", torch_dtype=torch_dtype,\n",
        "      low_cpu_mem_usage=True,\n",
        "  )\n",
        "  encoder.to(device)\n",
        "  tokenizer = T5TokenizerFast.from_pretrained(\n",
        "      \"Lightricks/LTX-Video\", subfolder=\"tokenizer\", torch_dtype=torch_dtype,\n",
        "  )"
      ],
      "metadata": {
        "id": "1IklnJJGODrx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "if create_encoder:\n",
        "  text_inputs_prompt = tokenizer(\n",
        "      prompt,\n",
        "      padding=\"max_length\",\n",
        "      max_length=128,\n",
        "      truncation=True,\n",
        "      add_special_tokens=True,\n",
        "      return_tensors=\"pt\",\n",
        "  )\n",
        "  text_input_ids = text_inputs_prompt.input_ids\n",
        "  prompt_attention_mask = text_inputs_prompt.attention_mask\n",
        "  prompt_attention_mask = prompt_attention_mask.bool().to(device)\n",
        "\n",
        "  text_inputs_negative_prompt = tokenizer(\n",
        "      negative_prompt,\n",
        "      padding=\"max_length\",\n",
        "      max_length=128,\n",
        "      truncation=True,\n",
        "      add_special_tokens=True,\n",
        "      return_tensors=\"pt\",\n",
        "  )\n",
        "  text_input_ids_neg = text_inputs_negative_prompt.input_ids\n",
        "  prompt_attention_mask_neg = text_inputs_negative_prompt.attention_mask\n",
        "  prompt_attention_mask_neg = prompt_attention_mask_neg.bool().to(device)\n",
        "\n",
        "  prompt_embeds = encoder(text_input_ids.to(device))[0]\n",
        "  prompt_embeds = prompt_embeds.to(dtype=torch_dtype, device=device)\n",
        "\n",
        "  negative_prompt_embeds = encoder(text_input_ids_neg.to(device))[0]\n",
        "  negative_prompt_embeds = negative_prompt_embeds.to(dtype=torch_dtype, device=device)\n",
        "\n",
        "  torch.save(prompt_embeds, \"prompt_embeds.pt\")\n",
        "  torch.save(negative_prompt_embeds, \"n_pe.pt\")\n",
        "  torch.save(prompt_attention_mask_neg, \"n_am.pt\")\n",
        "  torch.save(prompt_attention_mask, \"am.pt\")"
      ],
      "metadata": {
        "id": "RSibAimh280M"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "if not create_encoder:\n",
        "  prompt_embeds = torch.load(\"prompt_embeds.pt\")\n",
        "  negative_prompt_embeds = torch.load(\"n_pe.pt\")\n",
        "  prompt_attention_mask = torch.load(\"am.pt\")\n",
        "  prompt_attention_mask_neg = torch.load(\"n_am.pt\")"
      ],
      "metadata": {
        "id": "egM1lcDq6IZL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "if not create_encoder:\n",
        "  from diffusers import LTXVideoTransformer3DModel\n",
        "  transformer = LTXVideoTransformer3DModel.from_pretrained(\n",
        "      \"Lightricks/LTX-Video\",\n",
        "      subfolder=\"transformer\",\n",
        "      torch_dtype=torch_dtype,\n",
        "      low_cpu_mem_usage=True,\n",
        "  )"
      ],
      "metadata": {
        "id": "nxXfmnI6Lo4W"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "if not create_encoder:\n",
        "  pipe = LTXPipeline.from_pretrained(\"Lightricks/LTX-Video\",\n",
        "      torch_dtype=torch_dtype,\n",
        "      text_encoder=None,\n",
        "      transformer=transformer\n",
        "  )\n",
        "  pipe.to(device)"
      ],
      "metadata": {
        "id": "aFYKR3ONI5yx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import gc\n",
        "gc.collect()\n",
        "torch.cuda.empty_cache()"
      ],
      "metadata": {
        "id": "Ha6suHA_SvEl"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "if not create_encoder:\n",
        "  video = pipe(\n",
        "      prompt_embeds=prompt_embeds,\n",
        "      negative_prompt_embeds=negative_prompt_embeds,\n",
        "      prompt_attention_mask=prompt_attention_mask,\n",
        "      negative_prompt_attention_mask=prompt_attention_mask_neg,\n",
        "      num_frames=49,\n",
        "      num_inference_steps=50,\n",
        "  ).frames[0]\n",
        "  export_to_video(video, \"output.mp4\", fps=7)\n",
        "else:\n",
        "  print(\"Generate embeds first and restart.\")"
      ],
      "metadata": {
        "id": "OXBMsxbcJKsk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Just uncomment and run the following if you want a copy of the video at 24 fps."
      ],
      "metadata": {
        "id": "LkK6xPuDn7AT"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# export_to_video(video, \"outputFast.mp4\", fps=24)"
      ],
      "metadata": {
        "id": "xUF6d8YBcO4u"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"gpuType": "T4",
	"authorship_tag": "ABX9TyMNbc/moos/0oanb15U6unK",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/hathibelagal-dev/8a03f9e08b5ca6a64d6de2d301af7856/ltx-video.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"First enter the prompt (and negative prompt) and run all to generate the prompt embeds. Then restart the notebook and run all again to generate the video.\n",
	"\n",
	"If you want to change the prompt, delete all the .pt files, change the prompt, and restart and run all to generate new prompt embeds.\n",
	"\n",
	"This lets you stay in the limits of the free tier.\n",
	"\n",
	"-- x.com/hathibel"
	],
	"metadata": {
	"id": "fMPlkiv6i9_c"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"from transformers import BitsAndBytesConfig\n",
	"from accelerate import Accelerator\n",
	"from transformers import T5EncoderModel, T5TokenizerFast\n",
	"from diffusers import LTXPipeline\n",
	"from diffusers.utils import export_to_video\n",
	"import torch\n",
	"import os"
	],
	"metadata": {
	"id": "hwhNsdKBIsbD"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"torch_dtype = torch.bfloat16\n",
	"create_encoder = True"
	],
	"metadata": {
	"id": "mTOvN16HeVfl"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"if os.path.exists(\"prompt_embeds.pt\"):\n",
	" create_encoder = False"
	],
	"metadata": {
	"id": "Kc1szxY88ywX"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"prompt = \"A young woman with wavy, shoulder-length light brown hair is jogging outdoors on a foggy day. She wears a cozy pink turtleneck sweater, with a serene expression and piercing blue eyes. A wooden fence and a misty, grassy field fade into the background, evoking a calm and introspective mood.\"\n",
	"negative_prompt = \"worst quality, inconsistent motion, blurry, jittery, distorted\""
	],
	"metadata": {
	"id": "1qwPdIfIdVlw"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
	],
	"metadata": {
	"id": "aOe6gqpA972L"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"if create_encoder:\n",
	" encoder = T5EncoderModel.from_pretrained(\n",
	" \"Lightricks/LTX-Video\", subfolder=\"text_encoder\", torch_dtype=torch_dtype,\n",
	" low_cpu_mem_usage=True,\n",
	" )\n",
	" encoder.to(device)\n",
	" tokenizer = T5TokenizerFast.from_pretrained(\n",
	" \"Lightricks/LTX-Video\", subfolder=\"tokenizer\", torch_dtype=torch_dtype,\n",
	" )"
	],
	"metadata": {
	"id": "1IklnJJGODrx"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"if create_encoder:\n",
	" text_inputs_prompt = tokenizer(\n",
	" prompt,\n",
	" padding=\"max_length\",\n",
	" max_length=128,\n",
	" truncation=True,\n",
	" add_special_tokens=True,\n",
	" return_tensors=\"pt\",\n",
	" )\n",
	" text_input_ids = text_inputs_prompt.input_ids\n",
	" prompt_attention_mask = text_inputs_prompt.attention_mask\n",
	" prompt_attention_mask = prompt_attention_mask.bool().to(device)\n",
	"\n",
	" text_inputs_negative_prompt = tokenizer(\n",
	" negative_prompt,\n",
	" padding=\"max_length\",\n",
	" max_length=128,\n",
	" truncation=True,\n",
	" add_special_tokens=True,\n",
	" return_tensors=\"pt\",\n",
	" )\n",
	" text_input_ids_neg = text_inputs_negative_prompt.input_ids\n",
	" prompt_attention_mask_neg = text_inputs_negative_prompt.attention_mask\n",
	" prompt_attention_mask_neg = prompt_attention_mask_neg.bool().to(device)\n",
	"\n",
	" prompt_embeds = encoder(text_input_ids.to(device))[0]\n",
	" prompt_embeds = prompt_embeds.to(dtype=torch_dtype, device=device)\n",
	"\n",
	" negative_prompt_embeds = encoder(text_input_ids_neg.to(device))[0]\n",
	" negative_prompt_embeds = negative_prompt_embeds.to(dtype=torch_dtype, device=device)\n",
	"\n",
	" torch.save(prompt_embeds, \"prompt_embeds.pt\")\n",
	" torch.save(negative_prompt_embeds, \"n_pe.pt\")\n",
	" torch.save(prompt_attention_mask_neg, \"n_am.pt\")\n",
	" torch.save(prompt_attention_mask, \"am.pt\")"
	],
	"metadata": {
	"id": "RSibAimh280M"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"if not create_encoder:\n",
	" prompt_embeds = torch.load(\"prompt_embeds.pt\")\n",
	" negative_prompt_embeds = torch.load(\"n_pe.pt\")\n",
	" prompt_attention_mask = torch.load(\"am.pt\")\n",
	" prompt_attention_mask_neg = torch.load(\"n_am.pt\")"
	],
	"metadata": {
	"id": "egM1lcDq6IZL"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"if not create_encoder:\n",
	" from diffusers import LTXVideoTransformer3DModel\n",
	" transformer = LTXVideoTransformer3DModel.from_pretrained(\n",
	" \"Lightricks/LTX-Video\",\n",
	" subfolder=\"transformer\",\n",
	" torch_dtype=torch_dtype,\n",
	" low_cpu_mem_usage=True,\n",
	" )"
	],
	"metadata": {
	"id": "nxXfmnI6Lo4W"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"if not create_encoder:\n",
	" pipe = LTXPipeline.from_pretrained(\"Lightricks/LTX-Video\",\n",
	" torch_dtype=torch_dtype,\n",
	" text_encoder=None,\n",
	" transformer=transformer\n",
	" )\n",
	" pipe.to(device)"
	],
	"metadata": {
	"id": "aFYKR3ONI5yx"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"import gc\n",
	"gc.collect()\n",
	"torch.cuda.empty_cache()"
	],
	"metadata": {
	"id": "Ha6suHA_SvEl"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"if not create_encoder:\n",
	" video = pipe(\n",
	" prompt_embeds=prompt_embeds,\n",
	" negative_prompt_embeds=negative_prompt_embeds,\n",
	" prompt_attention_mask=prompt_attention_mask,\n",
	" negative_prompt_attention_mask=prompt_attention_mask_neg,\n",
	" num_frames=49,\n",
	" num_inference_steps=50,\n",
	" ).frames[0]\n",
	" export_to_video(video, \"output.mp4\", fps=7)\n",
	"else:\n",
	" print(\"Generate embeds first and restart.\")"
	],
	"metadata": {
	"id": "OXBMsxbcJKsk"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"Just uncomment and run the following if you want a copy of the video at 24 fps."
	],
	"metadata": {
	"id": "LkK6xPuDn7AT"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# export_to_video(video, \"outputFast.mp4\", fps=24)"
	],
	"metadata": {
	"id": "xUF6d8YBcO4u"
	},
	"execution_count": null,
	"outputs": []
	}
	]
	}