gagndeep's picture
Upload folder using huggingface_hub
60887b6 verified
raw
history blame
19.6 kB
import gradio as gr
import numpy as np
import json
import os
import tempfile
from typing import Optional, Tuple, Dict, Any
import time
from pathlib import Path
# Mock implementation for demonstration
# In a real deployment, this would integrate with the actual HY-WorldPlay model
class HYWorldPlayDemo:
def __init__(self):
self.model_loaded = False
self.generation_history = []
def load_model(self, model_type: str = "bidirectional"):
"""Mock model loading"""
time.sleep(2) # Simulate loading time
self.model_loaded = True
return f"✅ {model_type.capitalize()} model loaded successfully!"
def generate_video(self,
prompt: str,
image_path: Optional[str] = None,
resolution: str = "480p",
aspect_ratio: str = "16:9",
num_frames: int = 125,
seed: int = 1,
model_type: str = "bidirectional",
action_type: str = "forward") -> Tuple[str, Dict]:
"""
Mock video generation
Returns: (video_path, metadata)
"""
if not self.model_loaded:
raise gr.Error("Please load the model first!")
# Simulate generation time
progress_steps = [
"Initializing generation pipeline...",
"Processing prompt and image...",
"Generating video frames...",
"Applying temporal consistency...",
"Rendering final video..."
]
for step in progress_steps:
yield step, {"status": "processing", "step": step}
time.sleep(1)
# Create a mock video file (in real implementation, this would be actual video generation)
temp_dir = tempfile.mkdtemp()
video_path = os.path.join(temp_dir, "generated_video.mp4")
# Create a simple placeholder text file to simulate video
with open(video_path, 'w') as f:
f.write(f"Generated video for prompt: {prompt}\n")
f.write(f"Resolution: {resolution}\n")
f.write(f"Aspect Ratio: {aspect_ratio}\n")
f.write(f"Frames: {num_frames}\n")
f.write(f"Model: {model_type}\n")
f.write(f"Action: {action_type}\n")
# Save generation metadata
metadata = {
"prompt": prompt,
"resolution": resolution,
"aspect_ratio": aspect_ratio,
"num_frames": num_frames,
"seed": seed,
"model_type": model_type,
"action_type": action_type,
"generation_time": time.strftime("%Y-%m-%d %H:%M:%S"),
"status": "completed"
}
self.generation_history.append(metadata)
yield video_path, metadata
# Initialize demo class
demo_instance = HYWorldPlayDemo()
def load_model_wrapper(model_type):
"""Wrapper for model loading with status updates"""
return demo_instance.load_model(model_type)
def generate_video_wrapper(*args):
"""Wrapper for video generation with progress tracking"""
for result in demo_instance.generate_video(*args):
yield result
def create_pose_json(trajectory_type: str, num_keyframes: int = 32) -> str:
"""Create a mock pose JSON file for camera trajectory"""
poses = []
if trajectory_type == "forward":
for i in range(num_keyframes):
poses.append({
"frame": i,
"position": [i * 0.1, 0, 0],
"rotation": [0, 0, 0],
"fov": 60
})
elif trajectory_type == "circular":
for i in range(num_keyframes):
angle = (i / num_keyframes) * 2 * np.pi
poses.append({
"frame": i,
"position": [np.cos(angle) * 2, 0, np.sin(angle) * 2],
"rotation": [0, np.degrees(angle), 0],
"fov": 60
})
elif trajectory_type == "zoom":
for i in range(num_keyframes):
zoom = 1 + (i / num_keyframes) * 2
poses.append({
"frame": i,
"position": [0, 0, 0],
"rotation": [0, 0, 0],
"fov": 60 / zoom
})
temp_dir = tempfile.mkdtemp()
json_path = os.path.join(temp_dir, "pose_trajectory.json")
with open(json_path, 'w') as f:
json.dump({"poses": poses}, f, indent=2)
return json_path
def get_generation_history():
"""Return generation history as formatted text"""
if not demo_instance.generation_history:
return "No generations yet."
history_text = ""
for i, gen in enumerate(demo_instance.generation_history[-5:], 1):
history_text += f"**Generation {i}**\n"
history_text += f"- Prompt: {gen['prompt'][:50]}...\n"
history_text += f"- Model: {gen['model_type']}\n"
history_text += f"- Frames: {gen['num_frames']}\n"
history_text += f"- Time: {gen['generation_time']}\n\n"
return history_text
# Custom CSS for enhanced UI
custom_css = """
.main-container {
max-width: 1400px;
margin: 0 auto;
}
.model-card {
border: 2px solid #e5e7eb;
border-radius: 12px;
padding: 20px;
margin: 10px 0;
transition: all 0.3s ease;
}
.model-card:hover {
border-color: #3b82f6;
box-shadow: 0 4px 12px rgba(59, 130, 246, 0.1);
}
.status-indicator {
display: inline-block;
width: 12px;
height: 12px;
border-radius: 50%;
margin-right: 8px;
}
.status-ready { background-color: #10b981; }
.status-loading { background-color: #f59e0b; }
.status-error { background-color: #ef4444; }
.feature-highlight {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
border-radius: 12px;
margin: 20px 0;
}
.generation-progress {
font-family: 'Courier New', monospace;
background: #1f2937;
color: #10b981;
padding: 15px;
border-radius: 8px;
margin: 10px 0;
}
"""
with gr.Blocks(css=custom_css, title="HY-WorldPlay: Interactive World Modeling") as demo:
# Header
gr.HTML("""
<div style="text-align: center; margin-bottom: 30px;">
<h1 style="font-size: 2.5em; margin-bottom: 10px;">🎮 HY-WorldPlay</h1>
<p style="font-size: 1.2em; color: #6b7280;">Real-Time Interactive World Modeling with Geometric Consistency</p>
<p style="margin-top: 10px;">
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #3b82f6; text-decoration: none;">
Built with anycoder
</a>
</p>
</div>
""")
# Feature highlights
with gr.Row():
with gr.Column():
gr.HTML("""
<div class="feature-highlight">
<h3>🚀 Key Features</h3>
<ul style="margin: 10px 0;">
<li>Real-time video generation at 24 FPS</li>
<li>Long-term geometric consistency</li>
<li>Dual Action Representation for control</li>
<li>Reconstituted Context Memory</li>
<li>WorldCompass RL post-training</li>
<li>Context Forcing distillation</li>
</ul>
</div>
""")
# Main interface tabs
with gr.Tabs() as main_tabs:
# Tab 1: Video Generation
with gr.TabItem("🎬 Video Generation", id="gen_tab"):
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### Configuration")
# Model selection
model_type = gr.Radio(
choices=["bidirectional", "autoregressive", "autoregressive_distilled"],
value="bidirectional",
label="Model Type",
info="Choose the model variant for generation"
)
load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
model_status = gr.HTML('<div><span class="status-indicator status-error"></span>Model not loaded</div>')
# Input controls
with gr.Accordion("📝 Input Settings", open=True):
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Describe the world you want to generate...",
lines=3,
value="A peaceful landscape with a stone bridge spanning a calm body of water, surrounded by lush green trees and a traditional pavilion."
)
image_input = gr.Image(
label="Input Image (Optional)",
type="filepath",
sources=["upload", "clipboard"],
info="Upload an image to guide the generation"
)
with gr.Accordion("⚙️ Generation Settings", open=False):
with gr.Row():
resolution = gr.Dropdown(
choices=["480p", "720p", "1080p"],
value="480p",
label="Resolution"
)
aspect_ratio = gr.Dropdown(
choices=["16:9", "9:16", "1:1", "4:3"],
value="16:9",
label="Aspect Ratio"
)
with gr.Row():
num_frames = gr.Slider(
minimum=16,
maximum=250,
value=125,
step=1,
label="Number of Frames"
)
seed = gr.Number(
value=1,
label="Seed",
precision=0
)
# Camera trajectory
with gr.Accordion("🎥 Camera Trajectory", open=False):
trajectory_type = gr.Radio(
choices=["forward", "circular", "zoom", "custom"],
value="forward",
label="Trajectory Type"
)
create_pose_btn = gr.Button("Generate Trajectory JSON")
pose_status = gr.Textbox(label="Trajectory Status", interactive=False)
# Generation button
generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg", visible=False)
with gr.Column(scale=1):
gr.Markdown("### Output & Progress")
# Progress display
progress_display = gr.HTML('<div class="generation-progress">Ready to generate...</div>')
# Output video
video_output = gr.Video(
label="Generated Video",
visible=False
)
# Metadata
metadata_output = gr.JSON(
label="Generation Metadata",
visible=False
)
# Tab 2: Model Comparison
with gr.TabItem("📊 Model Comparison", id="compare_tab"):
gr.Markdown("### Performance Comparison")
# Performance metrics table
gr.DataFrame(
headers=["Model", "Real-time", "PSNR ↑", "SSIM ↑", "LPIPS ↓", "R_dist ↓", "T_dist ↓"],
datatype=["str", "str", "number", "number", "number", "number", "number"],
value=[
["CameraCtrl", "❌", 17.93, 0.569, 0.298, 0.037, 0.341],
["SEVA", "❌", 19.84, 0.598, 0.313, 0.047, 0.223],
["ViewCrafter", "❌", 19.91, 0.617, 0.327, 0.029, 0.543],
["Gen3C", "❌", 21.68, 0.635, 0.278, 0.024, 0.477],
["VMem", "❌", 19.97, 0.587, 0.316, 0.048, 0.219],
["Matrix-Game-2.0", "✅", 17.26, 0.505, 0.383, 0.287, 0.843],
["GameCraft", "❌", 21.05, 0.639, 0.341, 0.151, 0.617],
["Ours (w/o Context Forcing)", "❌", 21.27, 0.669, 0.261, 0.033, 0.157],
["Ours (full)", "✅", 21.92, 0.702, 0.247, 0.031, 0.121]
],
label="Quantitative Evaluation Results",
interactive=False
)
with gr.Row():
with gr.Column():
gr.Markdown("""
### 🔬 Key Improvements
- **Real-time Performance**: Achieves 24 FPS streaming generation
- **Superior Consistency**: Best-in-class long-term geometric consistency
- **Memory Efficiency**: Reconstituted Context Memory prevents error drift
- **Action Control**: Precise keyboard and mouse input response
""")
with gr.Column():
gr.Markdown("""
### 📈 Technical Innovations
- **Dual Action Representation**: Robust action control
- **Context Forcing**: Memory-aware model distillation
- **WorldCompass**: RL-based post-training
- **Temporal Reframing**: Long-past frame accessibility
""")
# Tab 3: Examples
with gr.TabItem("🎨 Examples", id="examples_tab"):
gr.Markdown("### Sample Generations")
with gr.Row():
with gr.Column():
gr.Markdown("""
#### Example 1: Bridge Scene
**Prompt**: A paved pathway leads towards a stone arch bridge spanning a calm body of water...
**Action**: Forward movement
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/3b82f6/ffffff?text=Bridge+Scene+Example", label="Example 1")
with gr.Column():
gr.Markdown("""
#### Example 2: Forest Path
**Prompt**: A winding path through an enchanted forest with ancient trees...
**Action**: Circular trajectory
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/10b981/ffffff?text=Forest+Path+Example", label="Example 2")
with gr.Row():
with gr.Column():
gr.Markdown("""
#### Example 3: Urban Scene
**Prompt**: A futuristic cityscape with flying vehicles and neon lights...
**Action**: Zoom in
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/8b5cf6/ffffff?text=Urban+Scene+Example", label="Example 3")
with gr.Column():
gr.Markdown("""
#### Example 4: Interior Scene
**Prompt**: A cozy library with bookshelves and warm lighting...
**Action**: Custom trajectory
**Frames**: 125
""")
gr.Image("https://via.placeholder.com/400x225/f59e0b/ffffff?text=Interior+Scene+Example", label="Example 4")
# Tab 4: History
with gr.TabItem("📜 History", id="history_tab"):
gr.Markdown("### Generation History")
history_display = gr.Markdown(get_generation_history())
refresh_history_btn = gr.Button("🔄 Refresh History")
# Footer
gr.HTML("""
<div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #e5e7eb;">
<p style="color: #6b7280;">
HY-WorldPlay: A Systematic Framework for Interactive World Modeling<br>
<a href="https://arxiv.org/abs/2512.14614" target="_blank" style="color: #3b82f6;">Paper</a> |
<a href="https://github.com/Tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">GitHub</a> |
<a href="https://huggingface.co/tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">Model Card</a>
</p>
</div>
""")
# Event handlers
def update_model_status(is_loaded, model_type):
if is_loaded:
return f'<div><span class="status-indicator status-ready"></span>{model_type.capitalize()} model loaded</div>', gr.Button(visible=True)
else:
return f'<div><span class="status-indicator status-loading"></span>Loading {model_type} model...</div>', gr.Button(visible=False)
load_model_btn.click(
fn=lambda x: load_model_wrapper(x),
inputs=[model_type],
outputs=[model_status]
).then(
fn=lambda x: update_model_status(True, x),
inputs=[model_type],
outputs=[model_status, generate_btn]
)
def update_progress(progress_text, show_video=False):
if "completed" in progress_text.lower():
return f'<div class="generation-progress">✅ {progress_text}</div>', gr.Video(visible=True), gr.JSON(visible=True)
else:
return f'<div class="generation-progress">⏳ {progress_text}</div>', gr.Video(visible=False), gr.JSON(visible=False)
generate_btn.click(
fn=generate_video_wrapper,
inputs=[
prompt_input,
image_input,
resolution,
aspect_ratio,
num_frames,
seed,
model_type,
trajectory_type
],
outputs=[progress_display, video_output, metadata_output]
)
create_pose_btn.click(
fn=create_pose_json,
inputs=[trajectory_type],
outputs=[pose_status]
).then(
fn=lambda x: f"✅ Trajectory JSON created for {x} motion",
inputs=[trajectory_type],
outputs=[pose_status]
)
refresh_history_btn.click(
fn=get_generation_history,
outputs=[history_display]
)
# Launch the app
demo.launch(
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="indigo",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="md"
),
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
{"label": "Paper", "url": "https://arxiv.org/abs/2512.14614"},
{"label": "GitHub", "url": "https://github.com/Tencent/HY-WorldPlay"},
{"label": "Model Card", "url": "https://huggingface.co/tencent/HY-WorldPlay"}
]
)