Created
July 16, 2025 09:45
-
-
Save tori29umai0123/c164a0ed7fedb04b51f8166141fbaf7a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tempfile | |
import gradio as gr | |
import numpy as np | |
import torch | |
from PIL import Image | |
import trimesh | |
from huggingface_hub import hf_hub_download | |
from depth_anything_v2.dpt import DepthAnythingV2 | |
from pygltflib import GLTF2, Node, Camera, Perspective, Scene | |
css = """ | |
#img-display-container { | |
max-height: 100vh; | |
} | |
#img-display-input { | |
max-height: 80vh; | |
} | |
#img-display-output { | |
max-height: 80vh; | |
} | |
#download { | |
height: 62px; | |
} | |
""" | |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' | |
model_configs = { | |
'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, | |
'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, | |
'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, | |
'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} | |
} | |
encoder2name = { | |
'vits': 'Small', | |
'vitb': 'Base', | |
'vitl': 'Large', | |
'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint | |
} | |
encoder = 'vitl' | |
model_name = encoder2name[encoder] | |
model = DepthAnythingV2(**model_configs[encoder]) | |
filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model") | |
state_dict = torch.load(filepath, map_location="cpu") | |
model.load_state_dict(state_dict) | |
model = model.to(DEVICE).eval() | |
title = "# Depth-Anything-V2-DepthPop" | |
description = """ | |
このツールを使用すると、写真やイラストを飛び出す絵本風にすることができます。 | |
""" | |
def predict_depth(image): | |
return model.infer_image(image) | |
def embed_insider_camera(glb_path): | |
gltf = GLTF2().load(glb_path) | |
# ── (0) unlit 拡張をルートで宣言 ──────────────────── | |
if gltf.extensionsUsed is None: | |
gltf.extensionsUsed = [] | |
if "KHR_materials_unlit" not in gltf.extensionsUsed: | |
gltf.extensionsUsed.append("KHR_materials_unlit") | |
if gltf.extensionsRequired is None: | |
gltf.extensionsRequired = [] | |
if "KHR_materials_unlit" not in gltf.extensionsRequired: | |
gltf.extensionsRequired.append("KHR_materials_unlit") | |
# ── (1) 既存マテリアルを unlit + 双面描画 + emissive で明るく ──────────── | |
for mat in gltf.materials: | |
mat.doubleSided = True | |
# 光源に依存しない unlit | |
if mat.extensions is None: | |
mat.extensions = {} | |
mat.extensions["KHR_materials_unlit"] = {} | |
# emissive を立てて、必ず明るく表示 | |
mat.emissiveFactor = [1.0, 1.0, 1.0] | |
# ── (2) カメラノードを追加 ──────────────────────────── | |
cam = Camera( | |
name="InsideCam", | |
type="perspective", | |
perspective=Perspective(yfov=np.pi/3, znear=0.01, zfar=1000.0) | |
) | |
gltf.cameras.append(cam) | |
cam_idx = len(gltf.cameras) - 1 | |
node = Node(name="InsideCamNode", camera=cam_idx, translation=[0, 0, 0]) | |
gltf.nodes.append(node) | |
node_idx = len(gltf.nodes) - 1 | |
if gltf.scene is None: | |
gltf.scenes = [Scene(nodes=[])] | |
gltf.scene = 0 | |
gltf.scenes[gltf.scene].nodes.append(node_idx) | |
gltf.save(glb_path) | |
return glb_path | |
def generate_sphere_from_panorama(color_img, resolution, depth_strength=0.5, sphere_radius=1.0, segments=128): | |
# 1. 深度予測 | |
depth = model.infer_image(color_img[:, :, ::-1]) | |
# リサイズ | |
h, w = color_img.shape[:2] | |
new_h = resolution | |
new_w = int(w * (new_h / h)) | |
color_r = np.array(Image.fromarray(color_img).resize((new_w, new_h), Image.LANCZOS)) | |
depth_r = np.array(Image.fromarray(depth).resize((new_w, new_h), Image.LANCZOS)) | |
# 2. UV球メッシュ作成 | |
mesh = trimesh.creation.uv_sphere(radius=sphere_radius, count=[segments, segments]) | |
# 内側表示のため面の向きを反転 | |
mesh.invert() | |
# 3. UV座標取得(頂点ごと) | |
verts = mesh.vertices # (N,3) | |
# 球座標に変換 | |
x, y, z = verts[:,0], verts[:,1], verts[:,2] | |
lon = np.arctan2(-x, z) # [-π, π] | |
lat = np.arcsin(y / sphere_radius) # [-π/2, π/2] | |
u = (lon / (2*np.pi) + 0.5) | |
v = (lat / np.pi + 0.5) | |
# 画像ピクセル座標 | |
px = np.clip((u * (new_w - 1)).astype(int), 0, new_w-1) | |
py = np.clip(((1-v) * (new_h - 1)).astype(int), 0, new_h-1) | |
# 深度サンプリング | |
depth_samples = depth_r[py, px] # (N,) | |
# 4. 改良された深度正規化と頂点オフセット | |
# 深度の中央値を基準として相対的な変位を計算 | |
depth_median = np.median(depth_samples) | |
depth_std = np.std(depth_samples) | |
# 深度値を標準化し、中央値を0として相対的な変位を計算 | |
if depth_std > 0: | |
normalized_depth = (depth_samples - depth_median) / depth_std | |
else: | |
normalized_depth = np.zeros_like(depth_samples) | |
# 変位量を制限して球体の基本形状を保持 | |
max_displacement = sphere_radius * 0.1 # 球体半径の10%まで | |
displacement_scale = max_displacement * depth_strength * 0.1 # 強度スケール調整 | |
# 変位量をクリップして極端な変形を防止 | |
displacement = np.clip(normalized_depth * displacement_scale, | |
-max_displacement, max_displacement) | |
# 頂点を法線方向にオフセット(内向きなので符号を反転) | |
directions = verts / np.linalg.norm(verts, axis=1)[:,None] | |
new_verts = verts - directions * displacement[:,None] # 内向きオフセット | |
mesh.vertices = new_verts | |
# 5. テクスチャ貼り込み | |
mesh.visual = trimesh.visual.texture.TextureVisuals( | |
uv=np.column_stack((u,v)), | |
image=Image.fromarray(color_r) | |
) | |
# 6. カメラノードを埋め込み | |
out = tempfile.mktemp(suffix='.glb') | |
mesh.export(out) | |
# カメラ埋め込み | |
out = embed_insider_camera(out) | |
return out | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown(title) | |
gr.Markdown(description) | |
gr.Markdown("### パノラマ→球体変形 & 3D表示") | |
with gr.Row(): | |
input_image = gr.Image(label="パノラマ入力", type='numpy') | |
with gr.Row(): | |
resolution = gr.Slider(512, 2048, value=1024, step=1, label="マップ解像度") | |
strength = gr.Slider(5.0, 20.0, value=10.0, step=0.1, label="凸凹強度") # 上限を5.0に拡張 | |
submit = gr.Button("3D球体を生成") | |
# カメラを球体の中心に配置 (radius=0) | |
output_3d = gr.Model3D( | |
label="変形球体", | |
clear_color=[0,0,0,0], | |
camera_position=(None, None, 0) # (alpha=None, beta=None, radius=0) | |
) | |
submit.click(fn=generate_sphere_from_panorama, | |
inputs=[input_image, resolution, strength], | |
outputs=output_3d) | |
if __name__ == '__main__': | |
demo.queue().launch(share=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment