Skip to content

Instantly share code, notes, and snippets.

@Xosrov
Created December 24, 2024 15:07
Show Gist options
  • Save Xosrov/1f250bf48528ba15fbaa70c54913a117 to your computer and use it in GitHub Desktop.
Save Xosrov/1f250bf48528ba15fbaa70c54913a117 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "liBacCy6ADqU"
},
"source": [
"# Conversion of RGB -> YPbPr with BT 709 and BT 601"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "m201_l18fxaN"
},
"outputs": [],
"source": [
"# BT 601 (used by cv2)\n",
"# Kr = 0.299\n",
"# Kg = 0.587\n",
"# Kb = 0.114\n",
"\n",
"# BT 709\n",
"Kr = 0.2126\n",
"Kg = 0.7152\n",
"Kb = 0.0722\n",
"\n",
"import numpy as np\n",
"\n",
"# convert from RGB normalized into y'PbPr normalized\n",
"def rgb_to_ypbpr(R, G, B):\n",
"\n",
" # Transform matrix\n",
" transform_matrix = np.array([\n",
" [Kr, Kg, Kb],\n",
" [-0.5 * Kr / (1 - Kb), -0.5 * Kg / (1 - Kb), 0.5],\n",
" [0.5, -0.5 * Kg / (1 - Kr), -0.5 * Kb / (1 - Kr)]\n",
" ])\n",
"\n",
" ycbcr = np.dot(transform_matrix, [R, G, B])\n",
"\n",
" Y = ycbcr[0]\n",
" Pb = ycbcr[1]\n",
" Pr = ycbcr[2]\n",
"\n",
" return Y, Pb, Pr\n",
"\n",
"max = 255\n",
"R, G, B = (100/max, 255/max, 10/max) # can be any number, but needs to be normalized\n",
"Y, Pb, Pr = rgb_to_ypbpr(R, G, B)\n",
"print(f\"Y: {Y:.2f}, Pb: {Pb:.2f}, Pr: {Pr:.2f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "aljSq_OQALUo"
},
"source": [
"# RGB Split an Image"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2ezIIvrSgNDZ"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import cv2\n",
"import urllib\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def generate_component_images(image_path):\n",
" req = urllib.request.urlopen(image_path)\n",
" arr = np.asarray(bytearray(req.read()), dtype=np.uint8)\n",
" img = cv2.imdecode(arr, -1) # 'Load it as it is'\n",
"\n",
" b, g, r = cv2.split(img)\n",
" zeros = np.zeros(b.shape, np.uint8)\n",
"\n",
" # Create images for each component\n",
" blue_img = cv2.merge([b, zeros, zeros])\n",
" green_img = cv2.merge([zeros, g, zeros])\n",
" red_img = cv2.merge([zeros, zeros, r])\n",
"\n",
" # Display images side-by-side using matplotlib\n",
" fig, axes = plt.subplots(1, 4, figsize=(15, 5))\n",
" axes[0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # Original image\n",
" axes[0].set_title('Original Image')\n",
" axes[0].axis('off')\n",
" axes[1].imshow(cv2.cvtColor(blue_img, cv2.COLOR_BGR2RGB))\n",
" axes[1].set_title('Blue Component')\n",
" axes[1].axis('off')\n",
" axes[2].imshow(cv2.cvtColor(green_img, cv2.COLOR_BGR2RGB))\n",
" axes[2].set_title('Green Component')\n",
" axes[2].axis('off')\n",
" axes[3].imshow(cv2.cvtColor(red_img, cv2.COLOR_BGR2RGB))\n",
" axes[3].set_title('Red Component')\n",
" axes[3].axis('off')\n",
" plt.show()\n",
"\n",
"# Replace with the actual path to your image\n",
"image_path = 'https://www.thoughtco.com/thmb/XBEmUuNX5Mwsjl-8V6f-xbr-K2c=/750x0/filters:no_upscale():max_bytes(150000):strip_icc():format(webp)/child-holding-colorful-gum-balls-576720981-5bfeb5c646e0fb0051b6dc20.jpg'\n",
"generate_component_images(image_path)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6SfterqZATV5"
},
"source": [
"# YCrCb Split an Image"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "07qRzXiEizZj"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import cv2\n",
"import urllib\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def generate_ycrcb_component_images(image_path):\n",
" req = urllib.request.urlopen(image_path)\n",
" arr = np.asarray(bytearray(req.read()), dtype=np.uint8)\n",
" img = cv2.imdecode(arr, -1) # 'Load it as it is'\n",
"\n",
" ycrcb_img = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)\n",
" y, cr, cb = cv2.split(ycrcb_img)\n",
" zeros = np.zeros(cb.shape, np.uint8)\n",
"\n",
" # Create images for each component\n",
" y_img = cv2.cvtColor(y, cv2.COLOR_GRAY2BGR) # Convert Y to BGR for display\n",
" cr_img = cv2.cvtColor(cv2.merge([zeros, cr, zeros]), cv2.COLOR_YCR_CB2BGR)\n",
" cb_img = cv2.cvtColor(cv2.merge([zeros, zeros, cb]), cv2.COLOR_YCR_CB2BGR)\n",
"\n",
" # Display images side-by-side using matplotlib\n",
" fig, axes = plt.subplots(1, 4, figsize=(15, 5))\n",
" axes[0].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # Original image\n",
" axes[0].set_title('Original Image')\n",
" axes[0].axis('off')\n",
" axes[1].imshow(cv2.cvtColor(y_img, cv2.COLOR_BGR2RGB))\n",
" axes[1].set_title('Y Component')\n",
" axes[1].axis('off')\n",
" axes[2].imshow(cv2.cvtColor(cr_img, cv2.COLOR_BGR2RGB))\n",
" axes[2].set_title('Cr Component')\n",
" axes[2].axis('off')\n",
" axes[3].imshow(cv2.cvtColor(cb_img, cv2.COLOR_BGR2RGB))\n",
" axes[3].set_title('Cb Component')\n",
" axes[3].axis('off')\n",
" plt.show()\n",
"\n",
"# Replace with the actual path to your image\n",
"image_path = 'https://www.thoughtco.com/thmb/XBEmUuNX5Mwsjl-8V6f-xbr-K2c=/750x0/filters:no_upscale():max_bytes(150000):strip_icc():format(webp)/child-holding-colorful-gum-balls-576720981-5bfeb5c646e0fb0051b6dc20.jpg'\n",
"generate_ycrcb_component_images(image_path)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-QMDkEqFAbNu"
},
"source": [
"# 1D DCT for an Example Crude, and Smooth Array of Elements"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "THC-YiYaqc5n"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from scipy.fft import dct, idct\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.animation import FuncAnimation\n",
"from IPython.display import HTML, display\n",
"\n",
"def create_dct_animation(y_data, title):\n",
" # Compute the DCT\n",
" c_data = dct(y_data, norm='ortho')\n",
"\n",
" # Create figure and axis\n",
" fig, ax = plt.subplots(figsize=(12, 8))\n",
"\n",
" t = np.arange(1, len(y_data) + 1)\n",
" x = np.linspace(0, 1, len(y_data))\n",
"\n",
" def animate(i):\n",
" ax.clear()\n",
"\n",
" # Compute the approximation using i+1 terms\n",
" y_approx = idct(c_data[:i+1], n=len(y_data), norm='ortho')\n",
"\n",
" # Plot the cosine term in the background with discrete points\n",
" if i > 0: # Skip the DC term (i=0)\n",
"\n",
" # Create a finer x-axis for smooth cosine curve\n",
" x_smooth = np.linspace(0, 1, 100) # Increase the number of points for smoothness\n",
" cosine_smooth = c_data[i] * np.cos(i * np.pi * x_smooth)\n",
"\n",
" # FIX: Generate x-axis points matching the length of cosine_smooth\n",
" x_axis = np.linspace(1, len(y_data), 100)\n",
"\n",
" # Plot the smooth cosine curve\n",
" ax.plot(x_axis, cosine_smooth, color='gray', alpha=0.3, linewidth=2)\n",
"\n",
" # Plot original and approximated data\n",
" ax.bar(t, y_data, alpha=0.5, label='Original Pixel Value', width=0.4, align='edge')\n",
" ax.bar(t + 0.4, y_approx, alpha=0.5, label='DCT Approximation', width=0.4, align='edge')\n",
"\n",
" # Set labels and title\n",
" ax.set_xlabel('Pixel')\n",
" ax.set_ylabel('Value')\n",
" ax.legend()\n",
" ax.set_title(f'{title}\\nTerm {i+1} with coefficient {c_data[i]:.2f}, '\n",
" f'MSE: {np.mean(np.square(y_approx - y_data)):.2f}')\n",
"\n",
" # Set y-axis limits to accommodate both the data and the cosine wave\n",
" # FIX: Use np.max and np.min instead of max and min\n",
" y_min = np.min([np.min(y_data), np.min(y_approx), -np.max(np.abs(c_data))])\n",
" y_max = np.max([np.max(y_data), np.max(y_approx), np.max(np.abs(c_data))])\n",
" ax.set_ylim(y_min - 10, y_max + 10)\n",
"\n",
" # Create animation\n",
" anim = FuncAnimation(fig, animate, frames=len(y_data), interval=800, blit=False)\n",
"\n",
" # Close the figure to prevent it from being displayed statically\n",
" plt.close(fig)\n",
"\n",
" return anim\n",
"\n",
"# Define data\n",
"y_crude = np.array([-127, 64, 0, 32, -64, -32, 16, 128, 5, 100, -53, 53, 12, 0, -100, 1])\n",
"y_better = np.array([-127, -120, -100, -80, -64, -32, -16, 12, 15, 45, 53, 74, 90, 100, 110, 128])\n",
"\n",
"# Create animations\n",
"ani_crude = create_dct_animation(y_crude, \"Crude Data DCT Animation\")\n",
"ani_better = create_dct_animation(y_better, \"Better Data DCT Animation\")\n",
"\n",
"# Display animations\n",
"display(HTML(ani_crude.to_jshtml()))\n",
"display(HTML(ani_better.to_jshtml()))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "A9aSvzb5AlHr"
},
"source": [
"# 2D DCT for an Example Crude, and Smooth Array of Elements\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"background_save": true
},
"id": "fv3-uM2XCk06"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from scipy.fft import dctn, idctn\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.animation import FuncAnimation\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"from IPython.display import HTML\n",
"\n",
"def compute_2d_dct(data):\n",
" \"\"\"Computes the 2D DCT of the data.\"\"\"\n",
" return dctn(data, norm='ortho')\n",
"\n",
"def reconstruct_data(dct_coeffs, num_coeffs):\n",
" \"\"\"Reconstructs the data using a given number of DCT coefficients.\"\"\"\n",
" # Create a copy of DCT coefficients with all values set to 0\n",
" reconstructed_coeffs = np.zeros_like(dct_coeffs)\n",
"\n",
" # Iterate through the specified number of coefficients and copy them\n",
" for i in range(num_coeffs):\n",
" reconstructed_coeffs[np.unravel_index(i, dct_coeffs.shape)] = dct_coeffs[np.unravel_index(i, dct_coeffs.shape)]\n",
"\n",
" # Perform inverse DCT to reconstruct the data\n",
" reconstructed_data = idctn(reconstructed_coeffs, norm='ortho')\n",
"\n",
" return reconstructed_data\n",
"\n",
"def update_plot(frame, data, dct_coeffs, ax):\n",
" ax.clear()\n",
"\n",
" reconstructed_data = reconstruct_data(dct_coeffs, frame + 1)\n",
"\n",
" x, y = np.meshgrid(np.arange(data.shape[1]), np.arange(data.shape[0]))\n",
"\n",
" # Plot original data with lower opacity\n",
" ax.bar3d(x.ravel(), y.ravel(), np.zeros_like(data).ravel(), 0.5, 0.5, data.ravel(), shade=True, alpha=0.3, color='gray')\n",
"\n",
" # Plot reconstructed data\n",
" ax.bar3d(x.ravel(), y.ravel(), np.zeros_like(data).ravel(), 0.5, 0.5, reconstructed_data.ravel(), shade=True)\n",
"\n",
" coeff_index = np.unravel_index(frame, dct_coeffs.shape)\n",
" coeff_value = dct_coeffs[coeff_index]\n",
" mse = np.mean(np.square(reconstructed_data - data))\n",
"\n",
" ax.set_title(f'Frame {frame + 1}, Coeff: {coeff_value:.2f}, idx {coeff_index}, MSE: {mse:.2f}')\n",
"\n",
"def create_animation(data, dct_coeffs):\n",
" \"\"\"Creates the animation using FuncAnimation.\"\"\"\n",
" fig = plt.figure()\n",
" ax = fig.add_subplot(111, projection='3d')\n",
" animation = FuncAnimation(fig, update_plot, frames=data.size, fargs=(data, dct_coeffs, ax), interval=200, blit=False)\n",
" return animation\n",
"\n",
"# Define data\n",
"data_smooth = np.array([\n",
" [ 0, 16, 32, 48, 64, 80, 96, 112],\n",
" [ 16, 32, 48, 64, 80, 96, 112, 128],\n",
" [ 32, 48, 64, 80, 96, 112, 128, 144],\n",
" [ 48, 64, 80, 96, 112, 128, 144, 160],\n",
" [ 64, 80, 96, 112, 128, 144, 160, 176],\n",
" [ 80, 96, 112, 128, 144, 160, 176, 192],\n",
" [ 96, 112, 128, 144, 160, 176, 192, 208],\n",
" [112, 128, 144, 160, 176, 192, 208, 224]\n",
"])\n",
"\n",
"data_crude = np.array([\n",
" [ 0, 128, 0, 128, 0, 128, 0, 128],\n",
" [128, 0, 128, 0, 128, 0, 128, 0],\n",
" [ 0, 128, 0, 128, 0, 128, 0, 128],\n",
" [128, 0, 128, 0, 128, 0, 128, 0],\n",
" [ 0, 128, 0, 128, 0, 128, 0, 128],\n",
" [128, 0, 128, 0, 128, 0, 128, 0],\n",
" [ 0, 128, 0, 128, 0, 128, 0, 128],\n",
" [128, 0, 128, 0, 128, 0, 128, 0]\n",
"])\n",
"\n",
"# Create animations\n",
"animation_smooth = create_animation(data_smooth, compute_2d_dct(data_smooth))\n",
"animation_crude = create_animation(data_crude, compute_2d_dct(data_crude))\n",
"\n",
"# Display animations\n",
"display(HTML(animation_smooth.to_jshtml() + animation_crude.to_jshtml()))\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wa5GnSU5-X16"
},
"source": [
"# Generating DCT Coefficient Table from 8x8 Image"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0xD0yrvr-dl_"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from scipy.fftpack import dct\n",
"\n",
"def draw_pixel_array(ax, pixel_values, title):\n",
" \"\"\"Displays the pixel values as a grid.\"\"\"\n",
" ax.imshow(pixel_values, cmap='gray', interpolation='nearest')\n",
" ax.set_title(title)\n",
"\n",
"def draw_dct_coeffs(ax, dct_coeffs, title):\n",
" \"\"\"Displays the DCT coefficients as text in a grid with borders.\"\"\"\n",
" min_abs_coeff = np.min(np.abs(dct_coeffs))\n",
" max_abs_coeff = np.max(np.abs(dct_coeffs))\n",
" normalized_coeffs = (np.abs(dct_coeffs) - min_abs_coeff) / (max_abs_coeff - min_abs_coeff)\n",
"\n",
" for i in range(8):\n",
" for j in range(8):\n",
" text = ax.text(j, i, f'{dct_coeffs[i, j]:.2f}',\n",
" ha=\"center\", va=\"center\", color=\"red\", size=\"x-small\")\n",
" rect = plt.Rectangle((j - 0.5, i - 0.5), 1, 1,\n",
" fill=True,\n",
" color=(normalized_coeffs[i, j], normalized_coeffs[i, j], normalized_coeffs[i, j]),\n",
" edgecolor='black',\n",
" linewidth=0.5)\n",
" ax.add_patch(rect)\n",
"\n",
" ax.set_xlim(-0.5, 7.5)\n",
" ax.set_ylim(-0.5, 7.5)\n",
" ax.set_xticks(np.arange(8))\n",
" ax.set_yticks(np.arange(8))\n",
" ax.set_xticklabels([])\n",
" ax.set_yticklabels([])\n",
" ax.set_title(title)\n",
"\n",
"# Define a base gradient\n",
"# base_gradient = np.linspace(-0, 0, 64).reshape(8, 8)\n",
"base_gradient = np.linspace(-90, 60, 64).reshape(8, 8)\n",
"\n",
"# Add some random noise to the gradient\n",
"# noise = np.random.randint(-127, 127, size=(8, 8))\n",
"noise = np.random.randint(-15, 15, size=(8, 8))\n",
"pixel_values = base_gradient + noise\n",
"\n",
"# Ensure pixel values stay within the range [-127, 128]\n",
"pixel_values = np.clip(pixel_values, -127, 128)\n",
"\n",
"# Calculate DCT coefficients\n",
"dct_coeffs = dct(dct(pixel_values.T, norm='ortho').T, norm='ortho')\n",
"\n",
"# Display the pixel values and DCT coefficients using the new functions\n",
"fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))\n",
"draw_pixel_array(ax1, pixel_values, \"Pixel Values on a Grid\")\n",
"# Also flip for prettier formatting (bring bigger values to the top left)\n",
"draw_dct_coeffs(ax2, np.flip(dct_coeffs,0), \"DCT Coefficients\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dbqUj2oRGlGc"
},
"source": [
"# Generating Inverse Image from DCT Coefficients and Quantization Table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "c11G-douGtMu"
},
"outputs": [],
"source": [
"from scipy.fft import dctn, idctn\n",
"\n",
"def quantize_dct_coeffs(dct_coeffs, quantization_table):\n",
" \"\"\"Quantizes the DCT coefficients using the provided quantization table.\"\"\"\n",
" return np.floor(dct_coeffs / quantization_table) * quantization_table\n",
"\n",
"# Define a custom quantization table\n",
"quantization_table = np.array([\n",
" [3, 5, 7, 9, 11, 13, 15, 17],\n",
" [5, 7, 9, 11, 13, 15, 17, 19],\n",
" [7, 9, 11, 13, 15, 17, 19, 21],\n",
" [9, 11, 13, 15, 17, 19, 21, 23],\n",
" [11, 13, 15, 17, 19, 21, 23, 25],\n",
" [13, 15, 17, 19, 21, 23, 25, 27],\n",
" [15, 17, 19, 21, 23, 25, 27, 29],\n",
" [17, 19, 21, 23, 25, 27, 29, 31]\n",
"])\n",
"\n",
"# Assuming pixel_values and dct_coeffs are already defined from the previous code block\n",
"\n",
"# Quantize the DCT coefficients\n",
"quantized_dct_coeffs = quantize_dct_coeffs(dct_coeffs, quantization_table)\n",
"\n",
"# Perform inverse DCT on the quantized coefficients\n",
"inverse_quantized_dct = idctn(quantized_dct_coeffs, norm='ortho')\n",
"\n",
"# Display the quantized DCT coefficient table and the inverse DCT image\n",
"fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))\n",
"draw_pixel_array(ax1, inverse_quantized_dct, \"Inverse DCT from Quantized Coefficients\")\n",
"# Also flip for prettier formatting (bring bigger values to the top left)\n",
"draw_dct_coeffs(ax2, np.flip(quantized_dct_coeffs, 0), \"Quantized DCT Coefficients\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oct7LZwNMkXl"
},
"outputs": [],
"source": [
"# New code block for displaying figures vertically\n",
"fig, axes = plt.subplots(2, 2, figsize=(10, 10)) # 2 rows, 2 columns\n",
"\n",
"# Second figure (Pixel Values and DCT Coefficients)\n",
"draw_pixel_array(axes[0, 0], pixel_values, \"Pixel Values on a Grid\")\n",
"draw_dct_coeffs(axes[0, 1], np.flip(dct_coeffs, 0), \"DCT Coefficients\")\n",
"\n",
"# First figure (Inverse DCT from Quantized Coefficients)\n",
"draw_pixel_array(axes[1, 0], inverse_quantized_dct, \"Inverse DCT from Quantized Coefficients\")\n",
"draw_dct_coeffs(axes[1, 1], np.flip(quantized_dct_coeffs, 0), \"Quantized DCT Coefficients\")\n",
"\n",
"plt.tight_layout() # Adjust spacing between subplots\n",
"plt.show()"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment