tori29umai0123 · July 16, 2025 09:45
diff --git a/DepthPop b/DepthPop
 import tempfile
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
 import trimesh
 from huggingface_hub import hf_hub_download
 from depth_anything_v2.dpt import DepthAnythingV2
 from pygltflib import GLTF2, Node, Camera, Perspective, Scene

 css = """
 #img-display-container {
    max-height: 100vh;
 }
 #img-display-input {
    max-height: 80vh;
 }
 #img-display-output {
    max-height: 80vh;
 }
 #download {
    height: 62px;
 }
 """
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
 }
 encoder2name = {
    'vits': 'Small',
    'vitb': 'Base',
    'vitl': 'Large',
    'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
 }
 encoder = 'vitl'
 model_name = encoder2name[encoder]
 model = DepthAnythingV2(**model_configs[encoder])
 filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
 state_dict = torch.load(filepath, map_location="cpu")
 model.load_state_dict(state_dict)
 model = model.to(DEVICE).eval()

 title = "# Depth-Anything-V2-DepthPop"
 description = """
 このツールを使用すると、写真やイラストを飛び出す絵本風にすることができます。
 """
 def predict_depth(image):
    return model.infer_image(image)

 def embed_insider_camera(glb_path):
    gltf = GLTF2().load(glb_path)

    # ── (0) unlit 拡張をルートで宣言 ────────────────────
    if gltf.extensionsUsed is None:
        gltf.extensionsUsed = []
    if "KHR_materials_unlit" not in gltf.extensionsUsed:
        gltf.extensionsUsed.append("KHR_materials_unlit")

    if gltf.extensionsRequired is None:
        gltf.extensionsRequired = []
    if "KHR_materials_unlit" not in gltf.extensionsRequired:
        gltf.extensionsRequired.append("KHR_materials_unlit")

    # ── (1) 既存マテリアルを unlit + 双面描画 + emissive で明るく ────────────
    for mat in gltf.materials:
        mat.doubleSided = True
        # 光源に依存しない unlit
        if mat.extensions is None:
            mat.extensions = {}
        mat.extensions["KHR_materials_unlit"] = {}
        # emissive を立てて、必ず明るく表示
        mat.emissiveFactor = [1.0, 1.0, 1.0]

    # ── (2) カメラノードを追加 ────────────────────────────
    cam = Camera(
        name="InsideCam",
        type="perspective",
        perspective=Perspective(yfov=np.pi/3, znear=0.01, zfar=1000.0)
    )
    gltf.cameras.append(cam)
    cam_idx = len(gltf.cameras) - 1

    node = Node(name="InsideCamNode", camera=cam_idx, translation=[0, 0, 0])
    gltf.nodes.append(node)
    node_idx = len(gltf.nodes) - 1

    if gltf.scene is None:
        gltf.scenes = [Scene(nodes=[])]
        gltf.scene = 0
    gltf.scenes[gltf.scene].nodes.append(node_idx)

    gltf.save(glb_path)
    return glb_path


 def generate_sphere_from_panorama(color_img, resolution, depth_strength=0.5, sphere_radius=1.0, segments=128):
    # 1. 深度予測
    depth = model.infer_image(color_img[:, :, ::-1])
    # リサイズ
    h, w = color_img.shape[:2]
    new_h = resolution
    new_w = int(w * (new_h / h))
    color_r = np.array(Image.fromarray(color_img).resize((new_w, new_h), Image.LANCZOS))
    depth_r = np.array(Image.fromarray(depth).resize((new_w, new_h), Image.LANCZOS))
    
    # 2. UV球メッシュ作成
    mesh = trimesh.creation.uv_sphere(radius=sphere_radius, count=[segments, segments])
    # 内側表示のため面の向きを反転
    mesh.invert()

    # 3. UV座標取得（頂点ごと）
    verts = mesh.vertices  # (N,3)
    # 球座標に変換
    x, y, z = verts[:,0], verts[:,1], verts[:,2]
    lon = np.arctan2(-x, z)        # [-π, π]
    lat = np.arcsin(y / sphere_radius)  # [-π/2, π/2]
    u = (lon / (2*np.pi) + 0.5)
    v = (lat / np.pi + 0.5)
    # 画像ピクセル座標
    px = np.clip((u * (new_w - 1)).astype(int), 0, new_w-1)
    py = np.clip(((1-v) * (new_h - 1)).astype(int), 0, new_h-1)
    # 深度サンプリング
    depth_samples = depth_r[py, px]  # (N,)

    # 4. 改良された深度正規化と頂点オフセット
    # 深度の中央値を基準として相対的な変位を計算
    depth_median = np.median(depth_samples)
    depth_std = np.std(depth_samples)
    
    # 深度値を標準化し、中央値を0として相対的な変位を計算
    if depth_std > 0:
        normalized_depth = (depth_samples - depth_median) / depth_std
    else:
        normalized_depth = np.zeros_like(depth_samples)
    
    # 変位量を制限して球体の基本形状を保持
    max_displacement = sphere_radius * 0.1  # 球体半径の10%まで
    displacement_scale = max_displacement * depth_strength * 0.1  # 強度スケール調整
    
    # 変位量をクリップして極端な変形を防止
    displacement = np.clip(normalized_depth * displacement_scale, 
                          -max_displacement, max_displacement)
    
    # 頂点を法線方向にオフセット（内向きなので符号を反転）
    directions = verts / np.linalg.norm(verts, axis=1)[:,None]
    new_verts = verts - directions * displacement[:,None]  # 内向きオフセット
    
    mesh.vertices = new_verts

    # 5. テクスチャ貼り込み
    mesh.visual = trimesh.visual.texture.TextureVisuals(
        uv=np.column_stack((u,v)),
        image=Image.fromarray(color_r)
    )

    # 6. カメラノードを埋め込み
    out = tempfile.mktemp(suffix='.glb')
    mesh.export(out)
    # カメラ埋め込み
    out = embed_insider_camera(out)
    return out

 with gr.Blocks(css=css) as demo:
    gr.Markdown(title)
    gr.Markdown(description)
    gr.Markdown("### パノラマ→球体変形 & 3D表示")

    with gr.Row():
        input_image = gr.Image(label="パノラマ入力", type='numpy')
    with gr.Row():
        resolution = gr.Slider(512, 2048, value=1024, step=1, label="マップ解像度")
        strength  = gr.Slider(5.0, 20.0, value=10.0, step=0.1, label="凸凹強度")  # 上限を5.0に拡張
    submit = gr.Button("3D球体を生成")

    # カメラを球体の中心に配置 (radius=0)
    output_3d = gr.Model3D(
        label="変形球体",
        clear_color=[0,0,0,0],
        camera_position=(None, None, 0)  # (alpha=None, beta=None, radius=0)
    )
    submit.click(fn=generate_sphere_from_panorama,
                 inputs=[input_image, resolution, strength],
                 outputs=output_3d)

 if __name__ == '__main__':
    demo.queue().launch(share=False)
	import tempfile
	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image
	import trimesh
	from huggingface_hub import hf_hub_download
	from depth_anything_v2.dpt import DepthAnythingV2
	from pygltflib import GLTF2, Node, Camera, Perspective, Scene

	css = """
	#img-display-container {
	max-height: 100vh;
	}
	#img-display-input {
	max-height: 80vh;
	}
	#img-display-output {
	max-height: 80vh;
	}
	#download {
	height: 62px;
	}
	"""
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	model_configs = {
	'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
	'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
	'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
	'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
	}
	encoder2name = {
	'vits': 'Small',
	'vitb': 'Base',
	'vitl': 'Large',
	'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
	}
	encoder = 'vitl'
	model_name = encoder2name[encoder]
	model = DepthAnythingV2(**model_configs[encoder])
	filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
	state_dict = torch.load(filepath, map_location="cpu")
	model.load_state_dict(state_dict)
	model = model.to(DEVICE).eval()

	title = "# Depth-Anything-V2-DepthPop"
	description = """
	このツールを使用すると、写真やイラストを飛び出す絵本風にすることができます。
	"""
	def predict_depth(image):
	return model.infer_image(image)

	def embed_insider_camera(glb_path):
	gltf = GLTF2().load(glb_path)

	# ── (0) unlit 拡張をルートで宣言 ────────────────────
	if gltf.extensionsUsed is None:
	gltf.extensionsUsed = []
	if "KHR_materials_unlit" not in gltf.extensionsUsed:
	gltf.extensionsUsed.append("KHR_materials_unlit")

	if gltf.extensionsRequired is None:
	gltf.extensionsRequired = []
	if "KHR_materials_unlit" not in gltf.extensionsRequired:
	gltf.extensionsRequired.append("KHR_materials_unlit")

	# ── (1) 既存マテリアルを unlit + 双面描画 + emissive で明るく ────────────
	for mat in gltf.materials:
	mat.doubleSided = True
	# 光源に依存しない unlit
	if mat.extensions is None:
	mat.extensions = {}
	mat.extensions["KHR_materials_unlit"] = {}
	# emissive を立てて、必ず明るく表示
	mat.emissiveFactor = [1.0, 1.0, 1.0]

	# ── (2) カメラノードを追加 ────────────────────────────
	cam = Camera(
	name="InsideCam",
	type="perspective",
	perspective=Perspective(yfov=np.pi/3, znear=0.01, zfar=1000.0)
	)
	gltf.cameras.append(cam)
	cam_idx = len(gltf.cameras) - 1

	node = Node(name="InsideCamNode", camera=cam_idx, translation=[0, 0, 0])
	gltf.nodes.append(node)
	node_idx = len(gltf.nodes) - 1

	if gltf.scene is None:
	gltf.scenes = [Scene(nodes=[])]
	gltf.scene = 0
	gltf.scenes[gltf.scene].nodes.append(node_idx)

	gltf.save(glb_path)
	return glb_path


	def generate_sphere_from_panorama(color_img, resolution, depth_strength=0.5, sphere_radius=1.0, segments=128):
	# 1. 深度予測
	depth = model.infer_image(color_img[:, :, ::-1])
	# リサイズ
	h, w = color_img.shape[:2]
	new_h = resolution
	new_w = int(w * (new_h / h))
	color_r = np.array(Image.fromarray(color_img).resize((new_w, new_h), Image.LANCZOS))
	depth_r = np.array(Image.fromarray(depth).resize((new_w, new_h), Image.LANCZOS))

	# 2. UV球メッシュ作成
	mesh = trimesh.creation.uv_sphere(radius=sphere_radius, count=[segments, segments])
	# 内側表示のため面の向きを反転
	mesh.invert()

	# 3. UV座標取得（頂点ごと）
	verts = mesh.vertices # (N,3)
	# 球座標に変換
	x, y, z = verts[:,0], verts[:,1], verts[:,2]
	lon = np.arctan2(-x, z) # [-π, π]
	lat = np.arcsin(y / sphere_radius) # [-π/2, π/2]
	u = (lon / (2*np.pi) + 0.5)
	v = (lat / np.pi + 0.5)
	# 画像ピクセル座標
	px = np.clip((u * (new_w - 1)).astype(int), 0, new_w-1)
	py = np.clip(((1-v) * (new_h - 1)).astype(int), 0, new_h-1)
	# 深度サンプリング
	depth_samples = depth_r[py, px] # (N,)

	# 4. 改良された深度正規化と頂点オフセット
	# 深度の中央値を基準として相対的な変位を計算
	depth_median = np.median(depth_samples)
	depth_std = np.std(depth_samples)

	# 深度値を標準化し、中央値を0として相対的な変位を計算
	if depth_std > 0:
	normalized_depth = (depth_samples - depth_median) / depth_std
	else:
	normalized_depth = np.zeros_like(depth_samples)

	# 変位量を制限して球体の基本形状を保持
	max_displacement = sphere_radius * 0.1 # 球体半径の10%まで
	displacement_scale = max_displacement * depth_strength * 0.1 # 強度スケール調整

	# 変位量をクリップして極端な変形を防止
	displacement = np.clip(normalized_depth * displacement_scale,
	-max_displacement, max_displacement)

	# 頂点を法線方向にオフセット（内向きなので符号を反転）
	directions = verts / np.linalg.norm(verts, axis=1)[:,None]
	new_verts = verts - directions * displacement[:,None] # 内向きオフセット

	mesh.vertices = new_verts

	# 5. テクスチャ貼り込み
	mesh.visual = trimesh.visual.texture.TextureVisuals(
	uv=np.column_stack((u,v)),
	image=Image.fromarray(color_r)
	)

	# 6. カメラノードを埋め込み
	out = tempfile.mktemp(suffix='.glb')
	mesh.export(out)
	# カメラ埋め込み
	out = embed_insider_camera(out)
	return out

	with gr.Blocks(css=css) as demo:
	gr.Markdown(title)
	gr.Markdown(description)
	gr.Markdown("### パノラマ→球体変形 & 3D表示")

	with gr.Row():
	input_image = gr.Image(label="パノラマ入力", type='numpy')
	with gr.Row():
	resolution = gr.Slider(512, 2048, value=1024, step=1, label="マップ解像度")
	strength = gr.Slider(5.0, 20.0, value=10.0, step=0.1, label="凸凹強度") # 上限を5.0に拡張
	submit = gr.Button("3D球体を生成")

	# カメラを球体の中心に配置 (radius=0)
	output_3d = gr.Model3D(
	label="変形球体",
	clear_color=[0,0,0,0],
	camera_position=(None, None, 0) # (alpha=None, beta=None, radius=0)
	)
	submit.click(fn=generate_sphere_from_panorama,
	inputs=[input_image, resolution, strength],
	outputs=output_3d)

	if __name__ == '__main__':
	demo.queue().launch(share=False)