dpaluy · March 18, 2025 03:42
diff --git a/README.md b/README.md
diff --git a/main.py b/main.py
 import base64
 import requests
 from google.generativeai import GenerativeModel, genai
 from PIL import Image
 import io

 # Configure the Google Generative AI API key
 # Replace 'YOUR_API_KEY' with your actual Google AI API key
 genai.configure(api_key='YOUR_API_KEY')

 def load_image_to_base64(image_path):
    """Load an image file and convert it to base64 encoding."""
    with open(image_path, 'rb') as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

 def save_base64_to_image(base64_string, output_path):
    """Save a base64-encoded image string to a file."""
    img_data = base64.b64decode(base64_string)
    img = Image.open(io.BytesIO(img_data))
    img.save(output_path, 'PNG')

 def generate_image_with_gemini(input_image_path, prompt, output_image_path):
    """
    Use Gemini 2.0 Flash to generate or edit an image based on a prompt.
    
    Args:
        input_image_path (str): Path to the input image file
        prompt (str): Text prompt for image generation/editing
        output_image_path (str): Path where the output image will be saved
    """
    try:
        # Load the input image and convert to base64
        image_data = load_image_to_base64(input_image_path)

        # Initialize the Gemini 2.0 Flash experimental model
        model = GenerativeModel('gemini-2.0-flash-exp')

        # Prepare the request content with the image and text prompt
        request = {
            "contents": [
                {
                    "parts": [
                        {
                            "inline_data": {
                                "mime_type": "image/png",  # Adjust mime_type based on your image format (e.g., 'image/jpeg')
                                "data": image_data
                            }
                        },
                        {
                            "text": prompt
                        }
                    ]
                }
            ],
            "generation_config": {
                "temperature": 1.0,
                "top_p": 0.95,
                "top_k": 40,
                "max_output_tokens": 8192,
                "response_mime_type": "image/png"  # Specify the response format as an image
            },
            "response_modalities": ["image", "text"]  # Critical for image output, as mentioned in the thread
        }

        # Generate content using the model
        response = model.generate_content(**request)

        # Extract the image data from the response
        if hasattr(response, 'candidates') and response.candidates:
            for candidate in response.candidates:
                if candidate.content and candidate.content.parts:
                    for part in candidate.content.parts:
                        if 'inline_data' in part and 'data' in part['inline_data']:
                            image_base64 = part['inline_data']['data']
                            save_base64_to_image(image_base64, output_image_path)
                            print(f"Image saved to {output_image_path}")
                            return

        print("No image data found in the response.")
        
    except Exception as e:
        print(f"Error generating image: {str(e)}")

 # Example usage
 if __name__ == "__main__":
    # Paths to your input and output images
    input_image = "elephant_image.png"  # Replace with the path to your input image (e.g., the elephant image)
    output_image = "bulldozer_image.png"  # Where the edited image will be saved

    # Prompt to edit the image (based on the thread example)
    prompt = "change elephant to a bulldozer"

    # Generate the image
    generate_image_with_gemini(input_image, prompt, output_image)
	import base64
	import requests
	from google.generativeai import GenerativeModel, genai
	from PIL import Image
	import io

	# Configure the Google Generative AI API key
	# Replace 'YOUR_API_KEY' with your actual Google AI API key
	genai.configure(api_key='YOUR_API_KEY')

	def load_image_to_base64(image_path):
	"""Load an image file and convert it to base64 encoding."""
	with open(image_path, 'rb') as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	def save_base64_to_image(base64_string, output_path):
	"""Save a base64-encoded image string to a file."""
	img_data = base64.b64decode(base64_string)
	img = Image.open(io.BytesIO(img_data))
	img.save(output_path, 'PNG')

	def generate_image_with_gemini(input_image_path, prompt, output_image_path):
	"""
	Use Gemini 2.0 Flash to generate or edit an image based on a prompt.

	Args:
	input_image_path (str): Path to the input image file
	prompt (str): Text prompt for image generation/editing
	output_image_path (str): Path where the output image will be saved
	"""
	try:
	# Load the input image and convert to base64
	image_data = load_image_to_base64(input_image_path)

	# Initialize the Gemini 2.0 Flash experimental model
	model = GenerativeModel('gemini-2.0-flash-exp')

	# Prepare the request content with the image and text prompt
	request = {
	"contents": [
	{
	"parts": [
	{
	"inline_data": {
	"mime_type": "image/png", # Adjust mime_type based on your image format (e.g., 'image/jpeg')
	"data": image_data
	}
	},
	{
	"text": prompt
	}
	]
	}
	],
	"generation_config": {
	"temperature": 1.0,
	"top_p": 0.95,
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_mime_type": "image/png" # Specify the response format as an image
	},
	"response_modalities": ["image", "text"] # Critical for image output, as mentioned in the thread
	}

	# Generate content using the model
	response = model.generate_content(**request)

	# Extract the image data from the response
	if hasattr(response, 'candidates') and response.candidates:
	for candidate in response.candidates:
	if candidate.content and candidate.content.parts:
	for part in candidate.content.parts:
	if 'inline_data' in part and 'data' in part['inline_data']:
	image_base64 = part['inline_data']['data']
	save_base64_to_image(image_base64, output_image_path)
	print(f"Image saved to {output_image_path}")
	return

	print("No image data found in the response.")

	except Exception as e:
	print(f"Error generating image: {str(e)}")

	# Example usage
	if __name__ == "__main__":
	# Paths to your input and output images
	input_image = "elephant_image.png" # Replace with the path to your input image (e.g., the elephant image)
	output_image = "bulldozer_image.png" # Where the edited image will be saved

	# Prompt to edit the image (based on the thread example)
	prompt = "change elephant to a bulldozer"

	# Generate the image
	generate_image_with_gemini(input_image, prompt, output_image)