Skip to content

Instantly share code, notes, and snippets.

@tori29umai0123
Created July 16, 2025 09:45
Show Gist options
  • Save tori29umai0123/c164a0ed7fedb04b51f8166141fbaf7a to your computer and use it in GitHub Desktop.
Save tori29umai0123/c164a0ed7fedb04b51f8166141fbaf7a to your computer and use it in GitHub Desktop.
import tempfile
import gradio as gr
import numpy as np
import torch
from PIL import Image
import trimesh
from huggingface_hub import hf_hub_download
from depth_anything_v2.dpt import DepthAnythingV2
from pygltflib import GLTF2, Node, Camera, Perspective, Scene
css = """
#img-display-container {
max-height: 100vh;
}
#img-display-input {
max-height: 80vh;
}
#img-display-output {
max-height: 80vh;
}
#download {
height: 62px;
}
"""
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
model_configs = {
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}
encoder2name = {
'vits': 'Small',
'vitb': 'Base',
'vitl': 'Large',
'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
}
encoder = 'vitl'
model_name = encoder2name[encoder]
model = DepthAnythingV2(**model_configs[encoder])
filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
state_dict = torch.load(filepath, map_location="cpu")
model.load_state_dict(state_dict)
model = model.to(DEVICE).eval()
title = "# Depth-Anything-V2-DepthPop"
description = """
このツールを使用すると、写真やイラストを飛び出す絵本風にすることができます。
"""
def predict_depth(image):
return model.infer_image(image)
def embed_insider_camera(glb_path):
gltf = GLTF2().load(glb_path)
# ── (0) unlit 拡張をルートで宣言 ────────────────────
if gltf.extensionsUsed is None:
gltf.extensionsUsed = []
if "KHR_materials_unlit" not in gltf.extensionsUsed:
gltf.extensionsUsed.append("KHR_materials_unlit")
if gltf.extensionsRequired is None:
gltf.extensionsRequired = []
if "KHR_materials_unlit" not in gltf.extensionsRequired:
gltf.extensionsRequired.append("KHR_materials_unlit")
# ── (1) 既存マテリアルを unlit + 双面描画 + emissive で明るく ────────────
for mat in gltf.materials:
mat.doubleSided = True
# 光源に依存しない unlit
if mat.extensions is None:
mat.extensions = {}
mat.extensions["KHR_materials_unlit"] = {}
# emissive を立てて、必ず明るく表示
mat.emissiveFactor = [1.0, 1.0, 1.0]
# ── (2) カメラノードを追加 ────────────────────────────
cam = Camera(
name="InsideCam",
type="perspective",
perspective=Perspective(yfov=np.pi/3, znear=0.01, zfar=1000.0)
)
gltf.cameras.append(cam)
cam_idx = len(gltf.cameras) - 1
node = Node(name="InsideCamNode", camera=cam_idx, translation=[0, 0, 0])
gltf.nodes.append(node)
node_idx = len(gltf.nodes) - 1
if gltf.scene is None:
gltf.scenes = [Scene(nodes=[])]
gltf.scene = 0
gltf.scenes[gltf.scene].nodes.append(node_idx)
gltf.save(glb_path)
return glb_path
def generate_sphere_from_panorama(color_img, resolution, depth_strength=0.5, sphere_radius=1.0, segments=128):
# 1. 深度予測
depth = model.infer_image(color_img[:, :, ::-1])
# リサイズ
h, w = color_img.shape[:2]
new_h = resolution
new_w = int(w * (new_h / h))
color_r = np.array(Image.fromarray(color_img).resize((new_w, new_h), Image.LANCZOS))
depth_r = np.array(Image.fromarray(depth).resize((new_w, new_h), Image.LANCZOS))
# 2. UV球メッシュ作成
mesh = trimesh.creation.uv_sphere(radius=sphere_radius, count=[segments, segments])
# 内側表示のため面の向きを反転
mesh.invert()
# 3. UV座標取得(頂点ごと)
verts = mesh.vertices # (N,3)
# 球座標に変換
x, y, z = verts[:,0], verts[:,1], verts[:,2]
lon = np.arctan2(-x, z) # [-π, π]
lat = np.arcsin(y / sphere_radius) # [-π/2, π/2]
u = (lon / (2*np.pi) + 0.5)
v = (lat / np.pi + 0.5)
# 画像ピクセル座標
px = np.clip((u * (new_w - 1)).astype(int), 0, new_w-1)
py = np.clip(((1-v) * (new_h - 1)).astype(int), 0, new_h-1)
# 深度サンプリング
depth_samples = depth_r[py, px] # (N,)
# 4. 改良された深度正規化と頂点オフセット
# 深度の中央値を基準として相対的な変位を計算
depth_median = np.median(depth_samples)
depth_std = np.std(depth_samples)
# 深度値を標準化し、中央値を0として相対的な変位を計算
if depth_std > 0:
normalized_depth = (depth_samples - depth_median) / depth_std
else:
normalized_depth = np.zeros_like(depth_samples)
# 変位量を制限して球体の基本形状を保持
max_displacement = sphere_radius * 0.1 # 球体半径の10%まで
displacement_scale = max_displacement * depth_strength * 0.1 # 強度スケール調整
# 変位量をクリップして極端な変形を防止
displacement = np.clip(normalized_depth * displacement_scale,
-max_displacement, max_displacement)
# 頂点を法線方向にオフセット(内向きなので符号を反転)
directions = verts / np.linalg.norm(verts, axis=1)[:,None]
new_verts = verts - directions * displacement[:,None] # 内向きオフセット
mesh.vertices = new_verts
# 5. テクスチャ貼り込み
mesh.visual = trimesh.visual.texture.TextureVisuals(
uv=np.column_stack((u,v)),
image=Image.fromarray(color_r)
)
# 6. カメラノードを埋め込み
out = tempfile.mktemp(suffix='.glb')
mesh.export(out)
# カメラ埋め込み
out = embed_insider_camera(out)
return out
with gr.Blocks(css=css) as demo:
gr.Markdown(title)
gr.Markdown(description)
gr.Markdown("### パノラマ→球体変形 & 3D表示")
with gr.Row():
input_image = gr.Image(label="パノラマ入力", type='numpy')
with gr.Row():
resolution = gr.Slider(512, 2048, value=1024, step=1, label="マップ解像度")
strength = gr.Slider(5.0, 20.0, value=10.0, step=0.1, label="凸凹強度") # 上限を5.0に拡張
submit = gr.Button("3D球体を生成")
# カメラを球体の中心に配置 (radius=0)
output_3d = gr.Model3D(
label="変形球体",
clear_color=[0,0,0,0],
camera_position=(None, None, 0) # (alpha=None, beta=None, radius=0)
)
submit.click(fn=generate_sphere_from_panorama,
inputs=[input_image, resolution, strength],
outputs=output_3d)
if __name__ == '__main__':
demo.queue().launch(share=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment