Skip to content

Instantly share code, notes, and snippets.

@dpaluy
Last active March 18, 2025 03:42
Show Gist options
  • Save dpaluy/71e373952c543a9f4178cf5b06df7640 to your computer and use it in GitHub Desktop.
Save dpaluy/71e373952c543a9f4178cf5b06df7640 to your computer and use it in GitHub Desktop.
Use gemini-2.0-flash-exp for image editing

gemini-2.0-flash-exp for iamge editing via API

Notes and Requirements

  1. API Key: Replace 'YOUR_API_KEY' with your actual Google AI API key. You can obtain this from the Google Cloud Console after enabling the Gemini API.

  2. Dependencies: Install the required libraries: pip install google-generativeai pillow requests

  • google-generativeai: The official Python library for the Google Gemini API.
  • Pillow: For image handling.
  • requests: For HTTP requests (though the Generative AI library handles most of this internally).
  1. Input Image: Replace "elephant_image.png" with the actual path to an image file (e.g., the elephant image shown in the thread). Ensure the image format matches the mime_type (e.g., image/png or image/jpeg).

  2. Model: The code uses gemini-2.0-flash-exp, the experimental version mentioned in the thread, which supports image generation and editing.

  3. response_modalities: This is included as per the thread's discovery, ensuring the API returns an image along with text if needed.

  4. Error Handling: The code includes basic error handling to manage API issues or missing data in the response.

import base64
import requests
from google.generativeai import GenerativeModel, genai
from PIL import Image
import io
# Configure the Google Generative AI API key
# Replace 'YOUR_API_KEY' with your actual Google AI API key
genai.configure(api_key='YOUR_API_KEY')
def load_image_to_base64(image_path):
"""Load an image file and convert it to base64 encoding."""
with open(image_path, 'rb') as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def save_base64_to_image(base64_string, output_path):
"""Save a base64-encoded image string to a file."""
img_data = base64.b64decode(base64_string)
img = Image.open(io.BytesIO(img_data))
img.save(output_path, 'PNG')
def generate_image_with_gemini(input_image_path, prompt, output_image_path):
"""
Use Gemini 2.0 Flash to generate or edit an image based on a prompt.
Args:
input_image_path (str): Path to the input image file
prompt (str): Text prompt for image generation/editing
output_image_path (str): Path where the output image will be saved
"""
try:
# Load the input image and convert to base64
image_data = load_image_to_base64(input_image_path)
# Initialize the Gemini 2.0 Flash experimental model
model = GenerativeModel('gemini-2.0-flash-exp')
# Prepare the request content with the image and text prompt
request = {
"contents": [
{
"parts": [
{
"inline_data": {
"mime_type": "image/png", # Adjust mime_type based on your image format (e.g., 'image/jpeg')
"data": image_data
}
},
{
"text": prompt
}
]
}
],
"generation_config": {
"temperature": 1.0,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "image/png" # Specify the response format as an image
},
"response_modalities": ["image", "text"] # Critical for image output, as mentioned in the thread
}
# Generate content using the model
response = model.generate_content(**request)
# Extract the image data from the response
if hasattr(response, 'candidates') and response.candidates:
for candidate in response.candidates:
if candidate.content and candidate.content.parts:
for part in candidate.content.parts:
if 'inline_data' in part and 'data' in part['inline_data']:
image_base64 = part['inline_data']['data']
save_base64_to_image(image_base64, output_image_path)
print(f"Image saved to {output_image_path}")
return
print("No image data found in the response.")
except Exception as e:
print(f"Error generating image: {str(e)}")
# Example usage
if __name__ == "__main__":
# Paths to your input and output images
input_image = "elephant_image.png" # Replace with the path to your input image (e.g., the elephant image)
output_image = "bulldozer_image.png" # Where the edited image will be saved
# Prompt to edit the image (based on the thread example)
prompt = "change elephant to a bulldozer"
# Generate the image
generate_image_with_gemini(input_image, prompt, output_image)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment