import gradio as gr
import numpy as np
import json
import os
import tempfile
from typing import Optional, Tuple, Dict, Any
import time
from pathlib import Path
# Mock implementation for demonstration
# In a real deployment, this would integrate with the actual HY-WorldPlay model
class HYWorldPlayDemo:
def __init__(self):
self.model_loaded = False
self.generation_history = []
def load_model(self, model_type: str = "bidirectional"):
"""Mock model loading"""
time.sleep(2) # Simulate loading time
self.model_loaded = True
return f"✅ {model_type.capitalize()} model loaded successfully!"
def generate_video(self,
prompt: str,
image_path: Optional[str] = None,
resolution: str = "480p",
aspect_ratio: str = "16:9",
num_frames: int = 125,
seed: int = 1,
model_type: str = "bidirectional",
action_type: str = "forward") -> Tuple[str, Dict]:
"""
Mock video generation
Returns: (video_path, metadata)
"""
if not self.model_loaded:
raise gr.Error("Please load the model first!")
# Simulate generation time
progress_steps = [
"Initializing generation pipeline...",
"Processing prompt and image...",
"Generating video frames...",
"Applying temporal consistency...",
"Rendering final video..."
]
for step in progress_steps:
yield step, {"status": "processing", "step": step}
time.sleep(1)
# Create a mock video file (in real implementation, this would be actual video generation)
temp_dir = tempfile.mkdtemp()
video_path = os.path.join(temp_dir, "generated_video.mp4")
# Create a simple placeholder text file to simulate video
with open(video_path, 'w') as f:
f.write(f"Generated video for prompt: {prompt}\n")
f.write(f"Resolution: {resolution}\n")
f.write(f"Aspect Ratio: {aspect_ratio}\n")
f.write(f"Frames: {num_frames}\n")
f.write(f"Model: {model_type}\n")
f.write(f"Action: {action_type}\n")
# Save generation metadata
metadata = {
"prompt": prompt,
"resolution": resolution,
"aspect_ratio": aspect_ratio,
"num_frames": num_frames,
"seed": seed,
"model_type": model_type,
"action_type": action_type,
"generation_time": time.strftime("%Y-%m-%d %H:%M:%S"),
"status": "completed"
}
self.generation_history.append(metadata)
yield video_path, metadata
# Initialize demo class
demo_instance = HYWorldPlayDemo()
def load_model_wrapper(model_type):
"""Wrapper for model loading with status updates"""
return demo_instance.load_model(model_type)
def generate_video_wrapper(*args):
"""Wrapper for video generation with progress tracking"""
for result in demo_instance.generate_video(*args):
yield result
def create_pose_json(trajectory_type: str, num_keyframes: int = 32) -> str:
"""Create a mock pose JSON file for camera trajectory"""
poses = []
if trajectory_type == "forward":
for i in range(num_keyframes):
poses.append({
"frame": i,
"position": [i * 0.1, 0, 0],
"rotation": [0, 0, 0],
"fov": 60
})
elif trajectory_type == "circular":
for i in range(num_keyframes):
angle = (i / num_keyframes) * 2 * np.pi
poses.append({
"frame": i,
"position": [np.cos(angle) * 2, 0, np.sin(angle) * 2],
"rotation": [0, np.degrees(angle), 0],
"fov": 60
})
elif trajectory_type == "zoom":
for i in range(num_keyframes):
zoom = 1 + (i / num_keyframes) * 2
poses.append({
"frame": i,
"position": [0, 0, 0],
"rotation": [0, 0, 0],
"fov": 60 / zoom
})
temp_dir = tempfile.mkdtemp()
json_path = os.path.join(temp_dir, "pose_trajectory.json")
with open(json_path, 'w') as f:
json.dump({"poses": poses}, f, indent=2)
return json_path
def get_generation_history():
"""Return generation history as formatted text"""
if not demo_instance.generation_history:
return "No generations yet."
history_text = ""
for i, gen in enumerate(demo_instance.generation_history[-5:], 1):
history_text += f"**Generation {i}**\n"
history_text += f"- Prompt: {gen['prompt'][:50]}...\n"
history_text += f"- Model: {gen['model_type']}\n"
history_text += f"- Frames: {gen['num_frames']}\n"
history_text += f"- Time: {gen['generation_time']}\n\n"
return history_text
# Custom CSS for enhanced UI
custom_css = """
.main-container {
max-width: 1400px;
margin: 0 auto;
}
.model-card {
border: 2px solid #e5e7eb;
border-radius: 12px;
padding: 20px;
margin: 10px 0;
transition: all 0.3s ease;
}
.model-card:hover {
border-color: #3b82f6;
box-shadow: 0 4px 12px rgba(59, 130, 246, 0.1);
}
.status-indicator {
display: inline-block;
width: 12px;
height: 12px;
border-radius: 50%;
margin-right: 8px;
}
.status-ready { background-color: #10b981; }
.status-loading { background-color: #f59e0b; }
.status-error { background-color: #ef4444; }
.feature-highlight {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 12px;
margin: 20px 0;
}
.generation-progress {
font-family: 'Courier New', monospace;
background: #1f2937;
color: #10b981;
padding: 15px;
border-radius: 8px;
margin: 10px 0;
}
"""
with gr.Blocks(css=custom_css, title="HY-WorldPlay: Interactive World Modeling") as demo:
# Header
gr.HTML("""
🎮 HY-WorldPlay
Real-Time Interactive World Modeling with Geometric Consistency
Built with anycoder
""")
# Feature highlights
with gr.Row():
with gr.Column():
gr.HTML("""
🚀 Key Features
- Real-time video generation at 24 FPS
- Long-term geometric consistency
- Dual Action Representation for control
- Reconstituted Context Memory
- WorldCompass RL post-training
- Context Forcing distillation
""")
# Main interface tabs
with gr.Tabs() as main_tabs:
# Tab 1: Video Generation
with gr.TabItem("🎬 Video Generation", id="gen_tab"):
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### Configuration")
# Model selection
model_type = gr.Radio(
choices=["bidirectional", "autoregressive", "autoregressive_distilled"],
value="bidirectional",
label="Model Type",
info="Choose the model variant for generation"
)
load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
model_status = gr.HTML('Model not loaded
')
# Input controls
with gr.Accordion("📝 Input Settings", open=True):
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Describe the world you want to generate...",
lines=3,
value="A peaceful landscape with a stone bridge spanning a calm body of water, surrounded by lush green trees and a traditional pavilion."
)
image_input = gr.Image(
label="Input Image (Optional)",
type="filepath",
sources=["upload", "clipboard"],
info="Upload an image to guide the generation"
)
with gr.Accordion("⚙️ Generation Settings", open=False):
with gr.Row():
resolution = gr.Dropdown(
choices=["480p", "720p", "1080p"],
value="480p",
label="Resolution"
)
aspect_ratio = gr.Dropdown(
choices=["16:9", "9:16", "1:1", "4:3"],
value="16:9",
label="Aspect Ratio"
)
with gr.Row():
num_frames = gr.Slider(
minimum=16,
maximum=250,
value=125,
step=1,
label="Number of Frames"
)
seed = gr.Number(
value=1,
label="Seed",
precision=0
)
# Camera trajectory
with gr.Accordion("🎥 Camera Trajectory", open=False):
trajectory_type = gr.Radio(
choices=["forward", "circular", "zoom", "custom"],
value="forward",
label="Trajectory Type"
)
create_pose_btn = gr.Button("Generate Trajectory JSON")
pose_status = gr.Textbox(label="Trajectory Status", interactive=False)
# Generation button
generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg", visible=False)
with gr.Column(scale=1):
gr.Markdown("### Output & Progress")
# Progress display
progress_display = gr.HTML('Ready to generate...
')
# Output video
video_output = gr.Video(
label="Generated Video",
visible=False
)
# Metadata
metadata_output = gr.JSON(
label="Generation Metadata",
visible=False
)
# Tab 2: Model Comparison
with gr.TabItem("📊 Model Comparison", id="compare_tab"):
gr.Markdown("### Performance Comparison")
# Performance metrics table
gr.DataFrame(
headers=["Model", "Real-time", "PSNR ↑", "SSIM ↑", "LPIPS ↓", "R_dist ↓", "T_dist ↓"],
datatype=["str", "str", "number", "number", "number", "number", "number"],
value=[
["CameraCtrl", "❌", 17.93, 0.569, 0.298, 0.037, 0.341],
["SEVA", "❌", 19.84, 0.598, 0.313, 0.047, 0.223],
["ViewCrafter", "❌", 19.91, 0.617, 0.327, 0.029, 0.543],
["Gen3C", "❌", 21.68, 0.635, 0.278, 0.024, 0.477],
["VMem", "❌", 19.97, 0.587, 0.316, 0.048, 0.219],
["Matrix-Game-2.0", "✅", 17.26, 0.505, 0.383, 0.287, 0.843],
["GameCraft", "❌", 21.05, 0.639, 0.341, 0.151, 0.617],
["Ours (w/o Context Forcing)", "❌", 21.27, 0.669, 0.261, 0.033, 0.157],
["Ours (full)", "✅", 21.92, 0.702, 0.247, 0.031, 0.121]
],
label="Quantitative Evaluation Results",
interactive=False
)
with gr.Row():
with gr.Column():
gr.Markdown("""
### 🔬 Key Improvements
- **Real-time Performance**: Achieves 24 FPS streaming generation
- **Superior Consistency**: Best-in-class long-term geometric consistency
- **Memory Efficiency**: Reconstituted Context Memory prevents error drift
- **Action Control**: Precise keyboard and mouse input response
""")
with gr.Column():
gr.Markdown("""
### 📈 Technical Innovations
- **Dual Action Representation**: Robust action control
- **Context Forcing**: Memory-aware model distillation
- **WorldCompass**: RL-based post-training
- **Temporal Reframing**: Long-past frame accessibility
""")
# Tab 3: Examples
with gr.TabItem("🎨 Examples", id="examples_tab"):
gr.Markdown("### Sample Generations")
with gr.Row():
with gr.Column():
gr.Markdown("""
#### Example 1: Bridge Scene
**Prompt**: A paved pathway leads towards a stone arch bridge spanning a calm body of water...
**Action**: Forward movement
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/3b82f6/ffffff?text=Bridge+Scene+Example", label="Example 1")
with gr.Column():
gr.Markdown("""
#### Example 2: Forest Path
**Prompt**: A winding path through an enchanted forest with ancient trees...
**Action**: Circular trajectory
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/10b981/ffffff?text=Forest+Path+Example", label="Example 2")
with gr.Row():
with gr.Column():
gr.Markdown("""
#### Example 3: Urban Scene
**Prompt**: A futuristic cityscape with flying vehicles and neon lights...
**Action**: Zoom in
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/8b5cf6/ffffff?text=Urban+Scene+Example", label="Example 3")
with gr.Column():
gr.Markdown("""
#### Example 4: Interior Scene
**Prompt**: A cozy library with bookshelves and warm lighting...
**Action**: Custom trajectory
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/f59e0b/ffffff?text=Interior+Scene+Example", label="Example 4")
# Tab 4: History
with gr.TabItem("📜 History", id="history_tab"):
gr.Markdown("### Generation History")
history_display = gr.Markdown(get_generation_history())
refresh_history_btn = gr.Button("🔄 Refresh History")
# Footer
gr.HTML("""
""")
# Event handlers
def update_model_status(is_loaded, model_type):
if is_loaded:
return f'{model_type.capitalize()} model loaded
', gr.Button(visible=True)
else:
return f'Loading {model_type} model...
', gr.Button(visible=False)
load_model_btn.click(
fn=lambda x: load_model_wrapper(x),
inputs=[model_type],
outputs=[model_status]
).then(
fn=lambda x: update_model_status(True, x),
inputs=[model_type],
outputs=[model_status, generate_btn]
)
def update_progress(progress_text, show_video=False):
if "completed" in progress_text.lower():
return f'✅ {progress_text}
', gr.Video(visible=True), gr.JSON(visible=True)
else:
return f'⏳ {progress_text}
', gr.Video(visible=False), gr.JSON(visible=False)
generate_btn.click(
fn=generate_video_wrapper,
inputs=[
prompt_input,
image_input,
resolution,
aspect_ratio,
num_frames,
seed,
model_type,
trajectory_type
],
outputs=[progress_display, video_output, metadata_output]
)
create_pose_btn.click(
fn=create_pose_json,
inputs=[trajectory_type],
outputs=[pose_status]
).then(
fn=lambda x: f"✅ Trajectory JSON created for {x} motion",
inputs=[trajectory_type],
outputs=[pose_status]
)
refresh_history_btn.click(
fn=get_generation_history,
outputs=[history_display]
)
# Launch the app
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="md"
),
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
{"label": "Paper", "url": "https://arxiv.org/abs/2512.14614"},
{"label": "GitHub", "url": "https://github.com/Tencent/HY-WorldPlay"},
{"label": "Model Card", "url": "https://huggingface.co/tencent/HY-WorldPlay"}
]
)