|
import base64 |
|
import requests |
|
from google.generativeai import GenerativeModel, genai |
|
from PIL import Image |
|
import io |
|
|
|
# Configure the Google Generative AI API key |
|
# Replace 'YOUR_API_KEY' with your actual Google AI API key |
|
genai.configure(api_key='YOUR_API_KEY') |
|
|
|
def load_image_to_base64(image_path): |
|
"""Load an image file and convert it to base64 encoding.""" |
|
with open(image_path, 'rb') as image_file: |
|
return base64.b64encode(image_file.read()).decode('utf-8') |
|
|
|
def save_base64_to_image(base64_string, output_path): |
|
"""Save a base64-encoded image string to a file.""" |
|
img_data = base64.b64decode(base64_string) |
|
img = Image.open(io.BytesIO(img_data)) |
|
img.save(output_path, 'PNG') |
|
|
|
def generate_image_with_gemini(input_image_path, prompt, output_image_path): |
|
""" |
|
Use Gemini 2.0 Flash to generate or edit an image based on a prompt. |
|
|
|
Args: |
|
input_image_path (str): Path to the input image file |
|
prompt (str): Text prompt for image generation/editing |
|
output_image_path (str): Path where the output image will be saved |
|
""" |
|
try: |
|
# Load the input image and convert to base64 |
|
image_data = load_image_to_base64(input_image_path) |
|
|
|
# Initialize the Gemini 2.0 Flash experimental model |
|
model = GenerativeModel('gemini-2.0-flash-exp') |
|
|
|
# Prepare the request content with the image and text prompt |
|
request = { |
|
"contents": [ |
|
{ |
|
"parts": [ |
|
{ |
|
"inline_data": { |
|
"mime_type": "image/png", # Adjust mime_type based on your image format (e.g., 'image/jpeg') |
|
"data": image_data |
|
} |
|
}, |
|
{ |
|
"text": prompt |
|
} |
|
] |
|
} |
|
], |
|
"generation_config": { |
|
"temperature": 1.0, |
|
"top_p": 0.95, |
|
"top_k": 40, |
|
"max_output_tokens": 8192, |
|
"response_mime_type": "image/png" # Specify the response format as an image |
|
}, |
|
"response_modalities": ["image", "text"] # Critical for image output, as mentioned in the thread |
|
} |
|
|
|
# Generate content using the model |
|
response = model.generate_content(**request) |
|
|
|
# Extract the image data from the response |
|
if hasattr(response, 'candidates') and response.candidates: |
|
for candidate in response.candidates: |
|
if candidate.content and candidate.content.parts: |
|
for part in candidate.content.parts: |
|
if 'inline_data' in part and 'data' in part['inline_data']: |
|
image_base64 = part['inline_data']['data'] |
|
save_base64_to_image(image_base64, output_image_path) |
|
print(f"Image saved to {output_image_path}") |
|
return |
|
|
|
print("No image data found in the response.") |
|
|
|
except Exception as e: |
|
print(f"Error generating image: {str(e)}") |
|
|
|
# Example usage |
|
if __name__ == "__main__": |
|
# Paths to your input and output images |
|
input_image = "elephant_image.png" # Replace with the path to your input image (e.g., the elephant image) |
|
output_image = "bulldozer_image.png" # Where the edited image will be saved |
|
|
|
# Prompt to edit the image (based on the thread example) |
|
prompt = "change elephant to a bulldozer" |
|
|
|
# Generate the image |
|
generate_image_with_gemini(input_image, prompt, output_image) |