Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import json | |
| import os | |
| import tempfile | |
| from typing import Optional, Tuple, Dict, Any | |
| import time | |
| from pathlib import Path | |
| # Mock implementation for demonstration | |
| # In a real deployment, this would integrate with the actual HY-WorldPlay model | |
| class HYWorldPlayDemo: | |
| def __init__(self): | |
| self.model_loaded = False | |
| self.generation_history = [] | |
| def load_model(self, model_type: str = "bidirectional"): | |
| """Mock model loading""" | |
| time.sleep(2) # Simulate loading time | |
| self.model_loaded = True | |
| return f"✅ {model_type.capitalize()} model loaded successfully!" | |
| def generate_video(self, | |
| prompt: str, | |
| image_path: Optional[str] = None, | |
| resolution: str = "480p", | |
| aspect_ratio: str = "16:9", | |
| num_frames: int = 125, | |
| seed: int = 1, | |
| model_type: str = "bidirectional", | |
| action_type: str = "forward") -> Tuple[str, Dict]: | |
| """ | |
| Mock video generation | |
| Returns: (video_path, metadata) | |
| """ | |
| if not self.model_loaded: | |
| raise gr.Error("Please load the model first!") | |
| # Simulate generation time | |
| progress_steps = [ | |
| "Initializing generation pipeline...", | |
| "Processing prompt and image...", | |
| "Generating video frames...", | |
| "Applying temporal consistency...", | |
| "Rendering final video..." | |
| ] | |
| for step in progress_steps: | |
| yield step, {"status": "processing", "step": step} | |
| time.sleep(1) | |
| # Create a mock video file (in real implementation, this would be actual video generation) | |
| temp_dir = tempfile.mkdtemp() | |
| video_path = os.path.join(temp_dir, "generated_video.mp4") | |
| # Create a simple placeholder text file to simulate video | |
| with open(video_path, 'w') as f: | |
| f.write(f"Generated video for prompt: {prompt}\n") | |
| f.write(f"Resolution: {resolution}\n") | |
| f.write(f"Aspect Ratio: {aspect_ratio}\n") | |
| f.write(f"Frames: {num_frames}\n") | |
| f.write(f"Model: {model_type}\n") | |
| f.write(f"Action: {action_type}\n") | |
| # Save generation metadata | |
| metadata = { | |
| "prompt": prompt, | |
| "resolution": resolution, | |
| "aspect_ratio": aspect_ratio, | |
| "num_frames": num_frames, | |
| "seed": seed, | |
| "model_type": model_type, | |
| "action_type": action_type, | |
| "generation_time": time.strftime("%Y-%m-%d %H:%M:%S"), | |
| "status": "completed" | |
| } | |
| self.generation_history.append(metadata) | |
| yield video_path, metadata | |
| # Initialize demo class | |
| demo_instance = HYWorldPlayDemo() | |
| def load_model_wrapper(model_type): | |
| """Wrapper for model loading with status updates""" | |
| return demo_instance.load_model(model_type) | |
| def generate_video_wrapper(*args): | |
| """Wrapper for video generation with progress tracking""" | |
| for result in demo_instance.generate_video(*args): | |
| yield result | |
| def create_pose_json(trajectory_type: str, num_keyframes: int = 32) -> str: | |
| """Create a mock pose JSON file for camera trajectory""" | |
| poses = [] | |
| if trajectory_type == "forward": | |
| for i in range(num_keyframes): | |
| poses.append({ | |
| "frame": i, | |
| "position": [i * 0.1, 0, 0], | |
| "rotation": [0, 0, 0], | |
| "fov": 60 | |
| }) | |
| elif trajectory_type == "circular": | |
| for i in range(num_keyframes): | |
| angle = (i / num_keyframes) * 2 * np.pi | |
| poses.append({ | |
| "frame": i, | |
| "position": [np.cos(angle) * 2, 0, np.sin(angle) * 2], | |
| "rotation": [0, np.degrees(angle), 0], | |
| "fov": 60 | |
| }) | |
| elif trajectory_type == "zoom": | |
| for i in range(num_keyframes): | |
| zoom = 1 + (i / num_keyframes) * 2 | |
| poses.append({ | |
| "frame": i, | |
| "position": [0, 0, 0], | |
| "rotation": [0, 0, 0], | |
| "fov": 60 / zoom | |
| }) | |
| temp_dir = tempfile.mkdtemp() | |
| json_path = os.path.join(temp_dir, "pose_trajectory.json") | |
| with open(json_path, 'w') as f: | |
| json.dump({"poses": poses}, f, indent=2) | |
| return json_path | |
| def get_generation_history(): | |
| """Return generation history as formatted text""" | |
| if not demo_instance.generation_history: | |
| return "No generations yet." | |
| history_text = "" | |
| for i, gen in enumerate(demo_instance.generation_history[-5:], 1): | |
| history_text += f"**Generation {i}**\n" | |
| history_text += f"- Prompt: {gen['prompt'][:50]}...\n" | |
| history_text += f"- Model: {gen['model_type']}\n" | |
| history_text += f"- Frames: {gen['num_frames']}\n" | |
| history_text += f"- Time: {gen['generation_time']}\n\n" | |
| return history_text | |
| # Custom CSS for enhanced UI | |
| custom_css = """ | |
| .main-container { | |
| max-width: 1400px; | |
| margin: 0 auto; | |
| } | |
| .model-card { | |
| border: 2px solid #e5e7eb; | |
| border-radius: 12px; | |
| padding: 20px; | |
| margin: 10px 0; | |
| transition: all 0.3s ease; | |
| } | |
| .model-card:hover { | |
| border-color: #3b82f6; | |
| box-shadow: 0 4px 12px rgba(59, 130, 246, 0.1); | |
| } | |
| .status-indicator { | |
| display: inline-block; | |
| width: 12px; | |
| height: 12px; | |
| border-radius: 50%; | |
| margin-right: 8px; | |
| } | |
| .status-ready { background-color: #10b981; } | |
| .status-loading { background-color: #f59e0b; } | |
| .status-error { background-color: #ef4444; } | |
| .feature-highlight { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 20px; | |
| border-radius: 12px; | |
| margin: 20px 0; | |
| } | |
| .generation-progress { | |
| font-family: 'Courier New', monospace; | |
| background: #1f2937; | |
| color: #10b981; | |
| padding: 15px; | |
| border-radius: 8px; | |
| margin: 10px 0; | |
| } | |
| """ | |
| with gr.Blocks(css=custom_css, title="HY-WorldPlay: Interactive World Modeling") as demo: | |
| # Header | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 30px;"> | |
| <h1 style="font-size: 2.5em; margin-bottom: 10px;">🎮 HY-WorldPlay</h1> | |
| <p style="font-size: 1.2em; color: #6b7280;">Real-Time Interactive World Modeling with Geometric Consistency</p> | |
| <p style="margin-top: 10px;"> | |
| <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #3b82f6; text-decoration: none;"> | |
| Built with anycoder | |
| </a> | |
| </p> | |
| </div> | |
| """) | |
| # Feature highlights | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="feature-highlight"> | |
| <h3>🚀 Key Features</h3> | |
| <ul style="margin: 10px 0;"> | |
| <li>Real-time video generation at 24 FPS</li> | |
| <li>Long-term geometric consistency</li> | |
| <li>Dual Action Representation for control</li> | |
| <li>Reconstituted Context Memory</li> | |
| <li>WorldCompass RL post-training</li> | |
| <li>Context Forcing distillation</li> | |
| </ul> | |
| </div> | |
| """) | |
| # Main interface tabs | |
| with gr.Tabs() as main_tabs: | |
| # Tab 1: Video Generation | |
| with gr.TabItem("🎬 Video Generation", id="gen_tab"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Configuration") | |
| # Model selection | |
| model_type = gr.Radio( | |
| choices=["bidirectional", "autoregressive", "autoregressive_distilled"], | |
| value="bidirectional", | |
| label="Model Type", | |
| info="Choose the model variant for generation" | |
| ) | |
| load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="lg") | |
| model_status = gr.HTML('<div><span class="status-indicator status-error"></span>Model not loaded</div>') | |
| # Input controls | |
| with gr.Accordion("📝 Input Settings", open=True): | |
| prompt_input = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Describe the world you want to generate...", | |
| lines=3, | |
| value="A peaceful landscape with a stone bridge spanning a calm body of water, surrounded by lush green trees and a traditional pavilion." | |
| ) | |
| image_input = gr.Image( | |
| label="Input Image (Optional)", | |
| type="filepath", | |
| sources=["upload", "clipboard"], | |
| info="Upload an image to guide the generation" | |
| ) | |
| with gr.Accordion("⚙️ Generation Settings", open=False): | |
| with gr.Row(): | |
| resolution = gr.Dropdown( | |
| choices=["480p", "720p", "1080p"], | |
| value="480p", | |
| label="Resolution" | |
| ) | |
| aspect_ratio = gr.Dropdown( | |
| choices=["16:9", "9:16", "1:1", "4:3"], | |
| value="16:9", | |
| label="Aspect Ratio" | |
| ) | |
| with gr.Row(): | |
| num_frames = gr.Slider( | |
| minimum=16, | |
| maximum=250, | |
| value=125, | |
| step=1, | |
| label="Number of Frames" | |
| ) | |
| seed = gr.Number( | |
| value=1, | |
| label="Seed", | |
| precision=0 | |
| ) | |
| # Camera trajectory | |
| with gr.Accordion("🎥 Camera Trajectory", open=False): | |
| trajectory_type = gr.Radio( | |
| choices=["forward", "circular", "zoom", "custom"], | |
| value="forward", | |
| label="Trajectory Type" | |
| ) | |
| create_pose_btn = gr.Button("Generate Trajectory JSON") | |
| pose_status = gr.Textbox(label="Trajectory Status", interactive=False) | |
| # Generation button | |
| generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg", visible=False) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Output & Progress") | |
| # Progress display | |
| progress_display = gr.HTML('<div class="generation-progress">Ready to generate...</div>') | |
| # Output video | |
| video_output = gr.Video( | |
| label="Generated Video", | |
| visible=False | |
| ) | |
| # Metadata | |
| metadata_output = gr.JSON( | |
| label="Generation Metadata", | |
| visible=False | |
| ) | |
| # Tab 2: Model Comparison | |
| with gr.TabItem("📊 Model Comparison", id="compare_tab"): | |
| gr.Markdown("### Performance Comparison") | |
| # Performance metrics table | |
| gr.DataFrame( | |
| headers=["Model", "Real-time", "PSNR ↑", "SSIM ↑", "LPIPS ↓", "R_dist ↓", "T_dist ↓"], | |
| datatype=["str", "str", "number", "number", "number", "number", "number"], | |
| value=[ | |
| ["CameraCtrl", "❌", 17.93, 0.569, 0.298, 0.037, 0.341], | |
| ["SEVA", "❌", 19.84, 0.598, 0.313, 0.047, 0.223], | |
| ["ViewCrafter", "❌", 19.91, 0.617, 0.327, 0.029, 0.543], | |
| ["Gen3C", "❌", 21.68, 0.635, 0.278, 0.024, 0.477], | |
| ["VMem", "❌", 19.97, 0.587, 0.316, 0.048, 0.219], | |
| ["Matrix-Game-2.0", "✅", 17.26, 0.505, 0.383, 0.287, 0.843], | |
| ["GameCraft", "❌", 21.05, 0.639, 0.341, 0.151, 0.617], | |
| ["Ours (w/o Context Forcing)", "❌", 21.27, 0.669, 0.261, 0.033, 0.157], | |
| ["Ours (full)", "✅", 21.92, 0.702, 0.247, 0.031, 0.121] | |
| ], | |
| label="Quantitative Evaluation Results", | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| ### 🔬 Key Improvements | |
| - **Real-time Performance**: Achieves 24 FPS streaming generation | |
| - **Superior Consistency**: Best-in-class long-term geometric consistency | |
| - **Memory Efficiency**: Reconstituted Context Memory prevents error drift | |
| - **Action Control**: Precise keyboard and mouse input response | |
| """) | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| ### 📈 Technical Innovations | |
| - **Dual Action Representation**: Robust action control | |
| - **Context Forcing**: Memory-aware model distillation | |
| - **WorldCompass**: RL-based post-training | |
| - **Temporal Reframing**: Long-past frame accessibility | |
| """) | |
| # Tab 3: Examples | |
| with gr.TabItem("🎨 Examples", id="examples_tab"): | |
| gr.Markdown("### Sample Generations") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| #### Example 1: Bridge Scene | |
| **Prompt**: A paved pathway leads towards a stone arch bridge spanning a calm body of water... | |
| **Action**: Forward movement | |
| **Frames**: 125 | |
| """) | |
| gr.Image("https://via.placeholder.com/400x225/3b82f6/ffffff?text=Bridge+Scene+Example", label="Example 1") | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| #### Example 2: Forest Path | |
| **Prompt**: A winding path through an enchanted forest with ancient trees... | |
| **Action**: Circular trajectory | |
| **Frames**: 125 | |
| """) | |
| gr.Image("https://via.placeholder.com/400x225/10b981/ffffff?text=Forest+Path+Example", label="Example 2") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| #### Example 3: Urban Scene | |
| **Prompt**: A futuristic cityscape with flying vehicles and neon lights... | |
| **Action**: Zoom in | |
| **Frames**: 125 | |
| """) | |
| gr.Image("https://via.placeholder.com/400x225/8b5cf6/ffffff?text=Urban+Scene+Example", label="Example 3") | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| #### Example 4: Interior Scene | |
| **Prompt**: A cozy library with bookshelves and warm lighting... | |
| **Action**: Custom trajectory | |
| **Frames**: 125 | |
| """) | |
| gr.Image("https://via.placeholder.com/400x225/f59e0b/ffffff?text=Interior+Scene+Example", label="Example 4") | |
| # Tab 4: History | |
| with gr.TabItem("📜 History", id="history_tab"): | |
| gr.Markdown("### Generation History") | |
| history_display = gr.Markdown(get_generation_history()) | |
| refresh_history_btn = gr.Button("🔄 Refresh History") | |
| # Footer | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #e5e7eb;"> | |
| <p style="color: #6b7280;"> | |
| HY-WorldPlay: A Systematic Framework for Interactive World Modeling<br> | |
| <a href="https://arxiv.org/abs/2512.14614" target="_blank" style="color: #3b82f6;">Paper</a> | | |
| <a href="https://github.com/Tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">GitHub</a> | | |
| <a href="https://huggingface.co/tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">Model Card</a> | |
| </p> | |
| </div> | |
| """) | |
| # Event handlers | |
| def update_model_status(is_loaded, model_type): | |
| if is_loaded: | |
| return f'<div><span class="status-indicator status-ready"></span>{model_type.capitalize()} model loaded</div>', gr.Button(visible=True) | |
| else: | |
| return f'<div><span class="status-indicator status-loading"></span>Loading {model_type} model...</div>', gr.Button(visible=False) | |
| load_model_btn.click( | |
| fn=lambda x: load_model_wrapper(x), | |
| inputs=[model_type], | |
| outputs=[model_status] | |
| ).then( | |
| fn=lambda x: update_model_status(True, x), | |
| inputs=[model_type], | |
| outputs=[model_status, generate_btn] | |
| ) | |
| def update_progress(progress_text, show_video=False): | |
| if "completed" in progress_text.lower(): | |
| return f'<div class="generation-progress">✅ {progress_text}</div>', gr.Video(visible=True), gr.JSON(visible=True) | |
| else: | |
| return f'<div class="generation-progress">⏳ {progress_text}</div>', gr.Video(visible=False), gr.JSON(visible=False) | |
| generate_btn.click( | |
| fn=generate_video_wrapper, | |
| inputs=[ | |
| prompt_input, | |
| image_input, | |
| resolution, | |
| aspect_ratio, | |
| num_frames, | |
| seed, | |
| model_type, | |
| trajectory_type | |
| ], | |
| outputs=[progress_display, video_output, metadata_output] | |
| ) | |
| create_pose_btn.click( | |
| fn=create_pose_json, | |
| inputs=[trajectory_type], | |
| outputs=[pose_status] | |
| ).then( | |
| fn=lambda x: f"✅ Trajectory JSON created for {x} motion", | |
| inputs=[trajectory_type], | |
| outputs=[pose_status] | |
| ) | |
| refresh_history_btn.click( | |
| fn=get_generation_history, | |
| outputs=[history_display] | |
| ) | |
| # Launch the app | |
| demo.launch( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="indigo", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| text_size="lg", | |
| spacing_size="lg", | |
| radius_size="md" | |
| ), | |
| footer_links=[ | |
| {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, | |
| {"label": "Paper", "url": "https://arxiv.org/abs/2512.14614"}, | |
| {"label": "GitHub", "url": "https://github.com/Tencent/HY-WorldPlay"}, | |
| {"label": "Model Card", "url": "https://huggingface.co/tencent/HY-WorldPlay"} | |
| ] | |
| ) |