ksasao · December 2, 2024 23:20
diff --git a/fujisan.py b/fujisan.py
 # Description: このスクリプトは、YouTubeの動画を再生して、その中の富士山が見えるかどうかを判定します。
 # あらかじめchromeをインストールし、そのバージョンにあった chromedriver を
 # 以下からダウンロードしてこのスクリプトと同じフォルダに配置
 # https://googlechromelabs.github.io/chrome-for-testing/
 # 環境変数 OPENAI_API_KEY に OpenAI の APIキーを設定してください。
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.service import Service as ChromeService
 from webdriver_manager.chrome import ChromeDriverManager
 from PIL import Image
 import time
 import io
 from datetime import datetime
 from PIL import Image 
 from PIL.ExifTags import TAGS, GPSTAGS 
 import piexif

 from openai import OpenAI
 import base64
 import requests

 import os

 # 環境変数からAPIキーを取得
 openai_api_key = os.getenv("OPENAI_API_KEY")

 def encode(s):
    b = s.encode('utf-16-le') + b'\x00\x00'
    return tuple([int(i) for i in b])

 def add_exif_comment(image_path, output_path, comment):
    # 画像を開く
    img = Image.open(image_path)

    # 新しいEXIFデータを作成
    exif_dict = {"0th": {piexif.ImageIFD.XPComment: encode(comment)}}

    # EXIFデータをバイナリに変換
    exif_bytes = piexif.dump(exif_dict)
    
    # 保存先フォルダが存在しない場合、フォルダを作成
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # EXIFデータを追加して画像を保存
    img.save(output_path, exif=exif_bytes)

    
 def check_mountain_in_image(image_path):
    # 画像を読み込む
    with open(image_path, 'rb') as image_file:
        image = image_file.read()

    # 画像をBase64エンコードする
    encoded_image = base64.b64encode(image).decode('utf-8')

    client = OpenAI(api_key=openai_api_key)

    # チャットの応答を生成する
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "富士山は見えますか？天候、霞の量、雲の状況を簡潔に答えて下さい。口調はくまのかわいいぬいぐるみっぽく、小学生でも親しみやすい感じでお願いします。"},
                    {"type": "image_url", "image_url":{"url": f"data:image/jpeg;base64,{encoded_image}"}},
                ],
            }
        ],
        max_tokens=300,
    )
    return response.choices[0].message.content

 # ChromeのWebDriverを設定
 driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

 try:
    # YouTubeのURLを開く
    driver.get("https://www.youtube.com/watch?v=mY4Uem3g5Mg")
    time.sleep(5)  # ページの読み込みを待つ

    # 再生ボタンをクリック
    play_button = driver.find_element(By.CSS_SELECTOR, 'button.ytp-large-play-button')
    play_button.click()
    time.sleep(5)  # 動画の再生を待つ

    # 動画プレイヤーの要素を取得
    video_element = driver.find_element(By.CSS_SELECTOR, 'video')

    # 動画プレイヤーの位置とサイズを取得
    location = video_element.location
    size = video_element.size

    # デバッグ: 位置とサイズの確認
    # print(f"Location: {location}, Size: {size}")

    # スクリーンショットを撮る
    screenshot = driver.get_screenshot_as_png()

    # スクリーンショットをPILで読み込む
    image = Image.open(io.BytesIO(screenshot))

    # 動画プレイヤー部分の領域を切り出す
    left = location['x']
    top = location['y']
    right = location['x'] + size['width']
    bottom = location['y'] + size['height']
    video_screenshot = image.crop((left, top, right, bottom))

    # デバッグ: 切り出し領域の確認
    # video_screenshot.show()

    # スクリーンショットを一時ファイルに保存
    temp_file = "temp.jpg"
    video_screenshot.save(temp_file)

    # 画像をLLMに問い合わせて画像のコメント文を取得
    result = check_mountain_in_image(temp_file)
    print(result)

    # 現在の日時を取得してファイル名を生成
    timestamp = datetime.now().strftime("%Y%m%d%H%M")
    file_name = f"images/{timestamp}.jpg"

    # 画像のEXIFにコメント文を追加して保存
    add_exif_comment(temp_file,file_name, result)
    print(f"スクリーンショットを {file_name} に保存しました。")

    # TTSController でしゃべらせる
    # url="http://localhost:1000/?text="+result
    # response=requests.get(url)
 finally:
    # ブラウザを閉じる
    driver.quit()
	# Description: このスクリプトは、YouTubeの動画を再生して、その中の富士山が見えるかどうかを判定します。
	# あらかじめchromeをインストールし、そのバージョンにあった chromedriver を
	# 以下からダウンロードしてこのスクリプトと同じフォルダに配置
	# https://googlechromelabs.github.io/chrome-for-testing/
	# 環境変数 OPENAI_API_KEY に OpenAI の APIキーを設定してください。
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service as ChromeService
	from webdriver_manager.chrome import ChromeDriverManager
	from PIL import Image
	import time
	import io
	from datetime import datetime
	from PIL import Image
	from PIL.ExifTags import TAGS, GPSTAGS
	import piexif

	from openai import OpenAI
	import base64
	import requests

	import os

	# 環境変数からAPIキーを取得
	openai_api_key = os.getenv("OPENAI_API_KEY")

	def encode(s):
	b = s.encode('utf-16-le') + b'\x00\x00'
	return tuple([int(i) for i in b])

	def add_exif_comment(image_path, output_path, comment):
	# 画像を開く
	img = Image.open(image_path)

	# 新しいEXIFデータを作成
	exif_dict = {"0th": {piexif.ImageIFD.XPComment: encode(comment)}}

	# EXIFデータをバイナリに変換
	exif_bytes = piexif.dump(exif_dict)

	# 保存先フォルダが存在しない場合、フォルダを作成
	os.makedirs(os.path.dirname(output_path), exist_ok=True)

	# EXIFデータを追加して画像を保存
	img.save(output_path, exif=exif_bytes)


	def check_mountain_in_image(image_path):
	# 画像を読み込む
	with open(image_path, 'rb') as image_file:
	image = image_file.read()

	# 画像をBase64エンコードする
	encoded_image = base64.b64encode(image).decode('utf-8')

	client = OpenAI(api_key=openai_api_key)

	# チャットの応答を生成する
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "富士山は見えますか？天候、霞の量、雲の状況を簡潔に答えて下さい。口調はくまのかわいいぬいぐるみっぽく、小学生でも親しみやすい感じでお願いします。"},
	{"type": "image_url", "image_url":{"url": f"data:image/jpeg;base64,{encoded_image}"}},
	],
	}
	],
	max_tokens=300,
	)
	return response.choices[0].message.content

	# ChromeのWebDriverを設定
	driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

	try:
	# YouTubeのURLを開く
	driver.get("https://www.youtube.com/watch?v=mY4Uem3g5Mg")
	time.sleep(5) # ページの読み込みを待つ

	# 再生ボタンをクリック
	play_button = driver.find_element(By.CSS_SELECTOR, 'button.ytp-large-play-button')
	play_button.click()
	time.sleep(5) # 動画の再生を待つ

	# 動画プレイヤーの要素を取得
	video_element = driver.find_element(By.CSS_SELECTOR, 'video')

	# 動画プレイヤーの位置とサイズを取得
	location = video_element.location
	size = video_element.size

	# デバッグ: 位置とサイズの確認
	# print(f"Location: {location}, Size: {size}")

	# スクリーンショットを撮る
	screenshot = driver.get_screenshot_as_png()

	# スクリーンショットをPILで読み込む
	image = Image.open(io.BytesIO(screenshot))

	# 動画プレイヤー部分の領域を切り出す
	left = location['x']
	top = location['y']
	right = location['x'] + size['width']
	bottom = location['y'] + size['height']
	video_screenshot = image.crop((left, top, right, bottom))

	# デバッグ: 切り出し領域の確認
	# video_screenshot.show()

	# スクリーンショットを一時ファイルに保存
	temp_file = "temp.jpg"
	video_screenshot.save(temp_file)

	# 画像をLLMに問い合わせて画像のコメント文を取得
	result = check_mountain_in_image(temp_file)
	print(result)

	# 現在の日時を取得してファイル名を生成
	timestamp = datetime.now().strftime("%Y%m%d%H%M")
	file_name = f"images/{timestamp}.jpg"

	# 画像のEXIFにコメント文を追加して保存
	add_exif_comment(temp_file,file_name, result)
	print(f"スクリーンショットを {file_name} に保存しました。")

	# TTSController でしゃべらせる
	# url="http://localhost:1000/?text="+result
	# response=requests.get(url)
	finally:
	# ブラウザを閉じる
	driver.quit()