import gradio as gr import numpy as np import json import os import tempfile from typing import Optional, Tuple, Dict, Any import time from pathlib import Path # Mock implementation for demonstration # In a real deployment, this would integrate with the actual HY-WorldPlay model class HYWorldPlayDemo: def __init__(self): self.model_loaded = False self.generation_history = [] def load_model(self, model_type: str = "bidirectional"): """Mock model loading""" time.sleep(2) # Simulate loading time self.model_loaded = True return f"✅ {model_type.capitalize()} model loaded successfully!" def generate_video(self, prompt: str, image_path: Optional[str] = None, resolution: str = "480p", aspect_ratio: str = "16:9", num_frames: int = 125, seed: int = 1, model_type: str = "bidirectional", action_type: str = "forward") -> Tuple[str, Dict]: """ Mock video generation Returns: (video_path, metadata) """ if not self.model_loaded: raise gr.Error("Please load the model first!") # Simulate generation time progress_steps = [ "Initializing generation pipeline...", "Processing prompt and image...", "Generating video frames...", "Applying temporal consistency...", "Rendering final video..." ] for step in progress_steps: yield step, {"status": "processing", "step": step} time.sleep(1) # Create a mock video file (in real implementation, this would be actual video generation) temp_dir = tempfile.mkdtemp() video_path = os.path.join(temp_dir, "generated_video.mp4") # Create a simple placeholder text file to simulate video with open(video_path, 'w') as f: f.write(f"Generated video for prompt: {prompt}\n") f.write(f"Resolution: {resolution}\n") f.write(f"Aspect Ratio: {aspect_ratio}\n") f.write(f"Frames: {num_frames}\n") f.write(f"Model: {model_type}\n") f.write(f"Action: {action_type}\n") # Save generation metadata metadata = { "prompt": prompt, "resolution": resolution, "aspect_ratio": aspect_ratio, "num_frames": num_frames, "seed": seed, "model_type": model_type, "action_type": action_type, "generation_time": time.strftime("%Y-%m-%d %H:%M:%S"), "status": "completed" } self.generation_history.append(metadata) yield video_path, metadata # Initialize demo class demo_instance = HYWorldPlayDemo() def load_model_wrapper(model_type): """Wrapper for model loading with status updates""" return demo_instance.load_model(model_type) def generate_video_wrapper(*args): """Wrapper for video generation with progress tracking""" for result in demo_instance.generate_video(*args): yield result def create_pose_json(trajectory_type: str, num_keyframes: int = 32) -> str: """Create a mock pose JSON file for camera trajectory""" poses = [] if trajectory_type == "forward": for i in range(num_keyframes): poses.append({ "frame": i, "position": [i * 0.1, 0, 0], "rotation": [0, 0, 0], "fov": 60 }) elif trajectory_type == "circular": for i in range(num_keyframes): angle = (i / num_keyframes) * 2 * np.pi poses.append({ "frame": i, "position": [np.cos(angle) * 2, 0, np.sin(angle) * 2], "rotation": [0, np.degrees(angle), 0], "fov": 60 }) elif trajectory_type == "zoom": for i in range(num_keyframes): zoom = 1 + (i / num_keyframes) * 2 poses.append({ "frame": i, "position": [0, 0, 0], "rotation": [0, 0, 0], "fov": 60 / zoom }) temp_dir = tempfile.mkdtemp() json_path = os.path.join(temp_dir, "pose_trajectory.json") with open(json_path, 'w') as f: json.dump({"poses": poses}, f, indent=2) return json_path def get_generation_history(): """Return generation history as formatted text""" if not demo_instance.generation_history: return "No generations yet." history_text = "" for i, gen in enumerate(demo_instance.generation_history[-5:], 1): history_text += f"**Generation {i}**\n" history_text += f"- Prompt: {gen['prompt'][:50]}...\n" history_text += f"- Model: {gen['model_type']}\n" history_text += f"- Frames: {gen['num_frames']}\n" history_text += f"- Time: {gen['generation_time']}\n\n" return history_text # Custom CSS for enhanced UI custom_css = """ .main-container { max-width: 1400px; margin: 0 auto; } .model-card { border: 2px solid #e5e7eb; border-radius: 12px; padding: 20px; margin: 10px 0; transition: all 0.3s ease; } .model-card:hover { border-color: #3b82f6; box-shadow: 0 4px 12px rgba(59, 130, 246, 0.1); } .status-indicator { display: inline-block; width: 12px; height: 12px; border-radius: 50%; margin-right: 8px; } .status-ready { background-color: #10b981; } .status-loading { background-color: #f59e0b; } .status-error { background-color: #ef4444; } .feature-highlight { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 12px; margin: 20px 0; } .generation-progress { font-family: 'Courier New', monospace; background: #1f2937; color: #10b981; padding: 15px; border-radius: 8px; margin: 10px 0; } """ with gr.Blocks(css=custom_css, title="HY-WorldPlay: Interactive World Modeling") as demo: # Header gr.HTML("""

🎮 HY-WorldPlay

Real-Time Interactive World Modeling with Geometric Consistency

Built with anycoder

""") # Feature highlights with gr.Row(): with gr.Column(): gr.HTML("""

🚀 Key Features

""") # Main interface tabs with gr.Tabs() as main_tabs: # Tab 1: Video Generation with gr.TabItem("🎬 Video Generation", id="gen_tab"): with gr.Row(): with gr.Column(scale=2): gr.Markdown("### Configuration") # Model selection model_type = gr.Radio( choices=["bidirectional", "autoregressive", "autoregressive_distilled"], value="bidirectional", label="Model Type", info="Choose the model variant for generation" ) load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="lg") model_status = gr.HTML('
Model not loaded
') # Input controls with gr.Accordion("📝 Input Settings", open=True): prompt_input = gr.Textbox( label="Prompt", placeholder="Describe the world you want to generate...", lines=3, value="A peaceful landscape with a stone bridge spanning a calm body of water, surrounded by lush green trees and a traditional pavilion." ) image_input = gr.Image( label="Input Image (Optional)", type="filepath", sources=["upload", "clipboard"], info="Upload an image to guide the generation" ) with gr.Accordion("⚙️ Generation Settings", open=False): with gr.Row(): resolution = gr.Dropdown( choices=["480p", "720p", "1080p"], value="480p", label="Resolution" ) aspect_ratio = gr.Dropdown( choices=["16:9", "9:16", "1:1", "4:3"], value="16:9", label="Aspect Ratio" ) with gr.Row(): num_frames = gr.Slider( minimum=16, maximum=250, value=125, step=1, label="Number of Frames" ) seed = gr.Number( value=1, label="Seed", precision=0 ) # Camera trajectory with gr.Accordion("🎥 Camera Trajectory", open=False): trajectory_type = gr.Radio( choices=["forward", "circular", "zoom", "custom"], value="forward", label="Trajectory Type" ) create_pose_btn = gr.Button("Generate Trajectory JSON") pose_status = gr.Textbox(label="Trajectory Status", interactive=False) # Generation button generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg", visible=False) with gr.Column(scale=1): gr.Markdown("### Output & Progress") # Progress display progress_display = gr.HTML('
Ready to generate...
') # Output video video_output = gr.Video( label="Generated Video", visible=False ) # Metadata metadata_output = gr.JSON( label="Generation Metadata", visible=False ) # Tab 2: Model Comparison with gr.TabItem("📊 Model Comparison", id="compare_tab"): gr.Markdown("### Performance Comparison") # Performance metrics table gr.DataFrame( headers=["Model", "Real-time", "PSNR ↑", "SSIM ↑", "LPIPS ↓", "R_dist ↓", "T_dist ↓"], datatype=["str", "str", "number", "number", "number", "number", "number"], value=[ ["CameraCtrl", "❌", 17.93, 0.569, 0.298, 0.037, 0.341], ["SEVA", "❌", 19.84, 0.598, 0.313, 0.047, 0.223], ["ViewCrafter", "❌", 19.91, 0.617, 0.327, 0.029, 0.543], ["Gen3C", "❌", 21.68, 0.635, 0.278, 0.024, 0.477], ["VMem", "❌", 19.97, 0.587, 0.316, 0.048, 0.219], ["Matrix-Game-2.0", "✅", 17.26, 0.505, 0.383, 0.287, 0.843], ["GameCraft", "❌", 21.05, 0.639, 0.341, 0.151, 0.617], ["Ours (w/o Context Forcing)", "❌", 21.27, 0.669, 0.261, 0.033, 0.157], ["Ours (full)", "✅", 21.92, 0.702, 0.247, 0.031, 0.121] ], label="Quantitative Evaluation Results", interactive=False ) with gr.Row(): with gr.Column(): gr.Markdown(""" ### 🔬 Key Improvements - **Real-time Performance**: Achieves 24 FPS streaming generation - **Superior Consistency**: Best-in-class long-term geometric consistency - **Memory Efficiency**: Reconstituted Context Memory prevents error drift - **Action Control**: Precise keyboard and mouse input response """) with gr.Column(): gr.Markdown(""" ### 📈 Technical Innovations - **Dual Action Representation**: Robust action control - **Context Forcing**: Memory-aware model distillation - **WorldCompass**: RL-based post-training - **Temporal Reframing**: Long-past frame accessibility """) # Tab 3: Examples with gr.TabItem("🎨 Examples", id="examples_tab"): gr.Markdown("### Sample Generations") with gr.Row(): with gr.Column(): gr.Markdown(""" #### Example 1: Bridge Scene **Prompt**: A paved pathway leads towards a stone arch bridge spanning a calm body of water... **Action**: Forward movement **Frames**: 125 """) gr.Image("https://via.placeholder.com/400x225/3b82f6/ffffff?text=Bridge+Scene+Example", label="Example 1") with gr.Column(): gr.Markdown(""" #### Example 2: Forest Path **Prompt**: A winding path through an enchanted forest with ancient trees... **Action**: Circular trajectory **Frames**: 125 """) gr.Image("https://via.placeholder.com/400x225/10b981/ffffff?text=Forest+Path+Example", label="Example 2") with gr.Row(): with gr.Column(): gr.Markdown(""" #### Example 3: Urban Scene **Prompt**: A futuristic cityscape with flying vehicles and neon lights... **Action**: Zoom in **Frames**: 125 """) gr.Image("https://via.placeholder.com/400x225/8b5cf6/ffffff?text=Urban+Scene+Example", label="Example 3") with gr.Column(): gr.Markdown(""" #### Example 4: Interior Scene **Prompt**: A cozy library with bookshelves and warm lighting... **Action**: Custom trajectory **Frames**: 125 """) gr.Image("https://via.placeholder.com/400x225/f59e0b/ffffff?text=Interior+Scene+Example", label="Example 4") # Tab 4: History with gr.TabItem("📜 History", id="history_tab"): gr.Markdown("### Generation History") history_display = gr.Markdown(get_generation_history()) refresh_history_btn = gr.Button("🔄 Refresh History") # Footer gr.HTML("""

HY-WorldPlay: A Systematic Framework for Interactive World Modeling
Paper | GitHub | Model Card

""") # Event handlers def update_model_status(is_loaded, model_type): if is_loaded: return f'
{model_type.capitalize()} model loaded
', gr.Button(visible=True) else: return f'
Loading {model_type} model...
', gr.Button(visible=False) load_model_btn.click( fn=lambda x: load_model_wrapper(x), inputs=[model_type], outputs=[model_status] ).then( fn=lambda x: update_model_status(True, x), inputs=[model_type], outputs=[model_status, generate_btn] ) def update_progress(progress_text, show_video=False): if "completed" in progress_text.lower(): return f'
✅ {progress_text}
', gr.Video(visible=True), gr.JSON(visible=True) else: return f'
⏳ {progress_text}
', gr.Video(visible=False), gr.JSON(visible=False) generate_btn.click( fn=generate_video_wrapper, inputs=[ prompt_input, image_input, resolution, aspect_ratio, num_frames, seed, model_type, trajectory_type ], outputs=[progress_display, video_output, metadata_output] ) create_pose_btn.click( fn=create_pose_json, inputs=[trajectory_type], outputs=[pose_status] ).then( fn=lambda x: f"✅ Trajectory JSON created for {x} motion", inputs=[trajectory_type], outputs=[pose_status] ) refresh_history_btn.click( fn=get_generation_history, outputs=[history_display] ) # Launch the app demo.launch( theme=gr.themes.Soft( primary_hue="blue", secondary_hue="indigo", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), text_size="lg", spacing_size="lg", radius_size="md" ), footer_links=[ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, {"label": "Paper", "url": "https://arxiv.org/abs/2512.14614"}, {"label": "GitHub", "url": "https://github.com/Tencent/HY-WorldPlay"}, {"label": "Model Card", "url": "https://huggingface.co/tencent/HY-WorldPlay"} ] )