Spaces:

gagndeep
/

anycoder-e867323b

Runtime error

App Files Files Community

anycoder-e867323b / app.py

gagndeep

Upload folder using huggingface_hub

60887b6 verified 3 days ago

raw

history blame

19.6 kB

	import gradio as gr
	import numpy as np
	import json
	import os
	import tempfile
	from typing import Optional, Tuple, Dict, Any
	import time
	from pathlib import Path

	# Mock implementation for demonstration
	# In a real deployment, this would integrate with the actual HY-WorldPlay model
	class HYWorldPlayDemo:
	def __init__(self):
	self.model_loaded = False
	self.generation_history = []

	def load_model(self, model_type: str = "bidirectional"):
	"""Mock model loading"""
	time.sleep(2) # Simulate loading time
	self.model_loaded = True
	return f"✅ {model_type.capitalize()} model loaded successfully!"

	def generate_video(self,
	prompt: str,
	image_path: Optional[str] = None,
	resolution: str = "480p",
	aspect_ratio: str = "16:9",
	num_frames: int = 125,
	seed: int = 1,
	model_type: str = "bidirectional",
	action_type: str = "forward") -> Tuple[str, Dict]:
	"""
	Mock video generation
	Returns: (video_path, metadata)
	"""
	if not self.model_loaded:
	raise gr.Error("Please load the model first!")

	# Simulate generation time
	progress_steps = [
	"Initializing generation pipeline...",
	"Processing prompt and image...",
	"Generating video frames...",
	"Applying temporal consistency...",
	"Rendering final video..."
	]

	for step in progress_steps:
	yield step, {"status": "processing", "step": step}
	time.sleep(1)

	# Create a mock video file (in real implementation, this would be actual video generation)
	temp_dir = tempfile.mkdtemp()
	video_path = os.path.join(temp_dir, "generated_video.mp4")

	# Create a simple placeholder text file to simulate video
	with open(video_path, 'w') as f:
	f.write(f"Generated video for prompt: {prompt}\n")
	f.write(f"Resolution: {resolution}\n")
	f.write(f"Aspect Ratio: {aspect_ratio}\n")
	f.write(f"Frames: {num_frames}\n")
	f.write(f"Model: {model_type}\n")
	f.write(f"Action: {action_type}\n")

	# Save generation metadata
	metadata = {
	"prompt": prompt,
	"resolution": resolution,
	"aspect_ratio": aspect_ratio,
	"num_frames": num_frames,
	"seed": seed,
	"model_type": model_type,
	"action_type": action_type,
	"generation_time": time.strftime("%Y-%m-%d %H:%M:%S"),
	"status": "completed"
	}

	self.generation_history.append(metadata)

	yield video_path, metadata

	# Initialize demo class
	demo_instance = HYWorldPlayDemo()

	def load_model_wrapper(model_type):
	"""Wrapper for model loading with status updates"""
	return demo_instance.load_model(model_type)

	def generate_video_wrapper(*args):
	"""Wrapper for video generation with progress tracking"""
	for result in demo_instance.generate_video(*args):
	yield result

	def create_pose_json(trajectory_type: str, num_keyframes: int = 32) -> str:
	"""Create a mock pose JSON file for camera trajectory"""
	poses = []

	if trajectory_type == "forward":
	for i in range(num_keyframes):
	poses.append({
	"frame": i,
	"position": [i * 0.1, 0, 0],
	"rotation": [0, 0, 0],
	"fov": 60
	})
	elif trajectory_type == "circular":
	for i in range(num_keyframes):
	angle = (i / num_keyframes) * 2 * np.pi
	poses.append({
	"frame": i,
	"position": [np.cos(angle) * 2, 0, np.sin(angle) * 2],
	"rotation": [0, np.degrees(angle), 0],
	"fov": 60
	})
	elif trajectory_type == "zoom":
	for i in range(num_keyframes):
	zoom = 1 + (i / num_keyframes) * 2
	poses.append({
	"frame": i,
	"position": [0, 0, 0],
	"rotation": [0, 0, 0],
	"fov": 60 / zoom
	})

	temp_dir = tempfile.mkdtemp()
	json_path = os.path.join(temp_dir, "pose_trajectory.json")

	with open(json_path, 'w') as f:
	json.dump({"poses": poses}, f, indent=2)

	return json_path

	def get_generation_history():
	"""Return generation history as formatted text"""
	if not demo_instance.generation_history:
	return "No generations yet."

	history_text = ""
	for i, gen in enumerate(demo_instance.generation_history[-5:], 1):
	history_text += f"Generation {i}\n"
	history_text += f"- Prompt: {gen['prompt'][:50]}...\n"
	history_text += f"- Model: {gen['model_type']}\n"
	history_text += f"- Frames: {gen['num_frames']}\n"
	history_text += f"- Time: {gen['generation_time']}\n\n"

	return history_text

	# Custom CSS for enhanced UI
	custom_css = """
	.main-container {
	max-width: 1400px;
	margin: 0 auto;
	}

	.model-card {
	border: 2px solid #e5e7eb;
	border-radius: 12px;
	padding: 20px;
	margin: 10px 0;
	transition: all 0.3s ease;
	}

	.model-card:hover {
	border-color: #3b82f6;
	box-shadow: 0 4px 12px rgba(59, 130, 246, 0.1);
	}

	.status-indicator {
	display: inline-block;
	width: 12px;
	height: 12px;
	border-radius: 50%;
	margin-right: 8px;
	}

	.status-ready { background-color: #10b981; }
	.status-loading { background-color: #f59e0b; }
	.status-error { background-color: #ef4444; }

	.feature-highlight {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	border-radius: 12px;
	margin: 20px 0;
	}

	.generation-progress {
	font-family: 'Courier New', monospace;
	background: #1f2937;
	color: #10b981;
	padding: 15px;
	border-radius: 8px;
	margin: 10px 0;
	}
	"""

	with gr.Blocks(css=custom_css, title="HY-WorldPlay: Interactive World Modeling") as demo:
	# Header
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 30px;">
	<h1 style="font-size: 2.5em; margin-bottom: 10px;">🎮 HY-WorldPlay</h1>
	<p style="font-size: 1.2em; color: #6b7280;">Real-Time Interactive World Modeling with Geometric Consistency</p>
	<p style="margin-top: 10px;">
	<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #3b82f6; text-decoration: none;">
	Built with anycoder
	</a>
	</p>
	</div>
	""")

	# Feature highlights
	with gr.Row():
	with gr.Column():
	gr.HTML("""
	<div class="feature-highlight">
	<h3>🚀 Key Features</h3>
	<ul style="margin: 10px 0;">
	<li>Real-time video generation at 24 FPS</li>
	<li>Long-term geometric consistency</li>
	<li>Dual Action Representation for control</li>
	<li>Reconstituted Context Memory</li>
	<li>WorldCompass RL post-training</li>
	<li>Context Forcing distillation</li>
	</ul>
	</div>
	""")

	# Main interface tabs
	with gr.Tabs() as main_tabs:
	# Tab 1: Video Generation
	with gr.TabItem("🎬 Video Generation", id="gen_tab"):
	with gr.Row():
	with gr.Column(scale=2):
	gr.Markdown("### Configuration")

	# Model selection
	model_type = gr.Radio(
	choices=["bidirectional", "autoregressive", "autoregressive_distilled"],
	value="bidirectional",
	label="Model Type",
	info="Choose the model variant for generation"
	)

	load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
	model_status = gr.HTML('<div><span class="status-indicator status-error"></span>Model not loaded</div>')

	# Input controls
	with gr.Accordion("📝 Input Settings", open=True):
	prompt_input = gr.Textbox(
	label="Prompt",
	placeholder="Describe the world you want to generate...",
	lines=3,
	value="A peaceful landscape with a stone bridge spanning a calm body of water, surrounded by lush green trees and a traditional pavilion."
	)

	image_input = gr.Image(
	label="Input Image (Optional)",
	type="filepath",
	sources=["upload", "clipboard"],
	info="Upload an image to guide the generation"
	)

	with gr.Accordion("⚙️ Generation Settings", open=False):
	with gr.Row():
	resolution = gr.Dropdown(
	choices=["480p", "720p", "1080p"],
	value="480p",
	label="Resolution"
	)
	aspect_ratio = gr.Dropdown(
	choices=["16:9", "9:16", "1:1", "4:3"],
	value="16:9",
	label="Aspect Ratio"
	)

	with gr.Row():
	num_frames = gr.Slider(
	minimum=16,
	maximum=250,
	value=125,
	step=1,
	label="Number of Frames"
	)
	seed = gr.Number(
	value=1,
	label="Seed",
	precision=0
	)

	# Camera trajectory
	with gr.Accordion("🎥 Camera Trajectory", open=False):
	trajectory_type = gr.Radio(
	choices=["forward", "circular", "zoom", "custom"],
	value="forward",
	label="Trajectory Type"
	)
	create_pose_btn = gr.Button("Generate Trajectory JSON")
	pose_status = gr.Textbox(label="Trajectory Status", interactive=False)

	# Generation button
	generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg", visible=False)

	with gr.Column(scale=1):
	gr.Markdown("### Output & Progress")

	# Progress display
	progress_display = gr.HTML('<div class="generation-progress">Ready to generate...</div>')

	# Output video
	video_output = gr.Video(
	label="Generated Video",
	visible=False
	)

	# Metadata
	metadata_output = gr.JSON(
	label="Generation Metadata",
	visible=False
	)

	# Tab 2: Model Comparison
	with gr.TabItem("📊 Model Comparison", id="compare_tab"):
	gr.Markdown("### Performance Comparison")

	# Performance metrics table
	gr.DataFrame(
	headers=["Model", "Real-time", "PSNR ↑", "SSIM ↑", "LPIPS ↓", "R_dist ↓", "T_dist ↓"],
	datatype=["str", "str", "number", "number", "number", "number", "number"],
	value=[
	["CameraCtrl", "❌", 17.93, 0.569, 0.298, 0.037, 0.341],
	["SEVA", "❌", 19.84, 0.598, 0.313, 0.047, 0.223],
	["ViewCrafter", "❌", 19.91, 0.617, 0.327, 0.029, 0.543],
	["Gen3C", "❌", 21.68, 0.635, 0.278, 0.024, 0.477],
	["VMem", "❌", 19.97, 0.587, 0.316, 0.048, 0.219],
	["Matrix-Game-2.0", "✅", 17.26, 0.505, 0.383, 0.287, 0.843],
	["GameCraft", "❌", 21.05, 0.639, 0.341, 0.151, 0.617],
	["Ours (w/o Context Forcing)", "❌", 21.27, 0.669, 0.261, 0.033, 0.157],
	["Ours (full)", "✅", 21.92, 0.702, 0.247, 0.031, 0.121]
	],
	label="Quantitative Evaluation Results",
	interactive=False
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("""
	### 🔬 Key Improvements
	- Real-time Performance: Achieves 24 FPS streaming generation
	- Superior Consistency: Best-in-class long-term geometric consistency
	- Memory Efficiency: Reconstituted Context Memory prevents error drift
	- Action Control: Precise keyboard and mouse input response
	""")

	with gr.Column():
	gr.Markdown("""
	### 📈 Technical Innovations
	- Dual Action Representation: Robust action control
	- Context Forcing: Memory-aware model distillation
	- WorldCompass: RL-based post-training
	- Temporal Reframing: Long-past frame accessibility
	""")

	# Tab 3: Examples
	with gr.TabItem("🎨 Examples", id="examples_tab"):
	gr.Markdown("### Sample Generations")

	with gr.Row():
	with gr.Column():
	gr.Markdown("""
	#### Example 1: Bridge Scene
	Prompt: A paved pathway leads towards a stone arch bridge spanning a calm body of water...
	Action: Forward movement
	Frames: 125
	""")
	gr.Image("https://via.placeholder.com/400x225/3b82f6/ffffff?text=Bridge+Scene+Example", label="Example 1")

	with gr.Column():
	gr.Markdown("""
	#### Example 2: Forest Path
	Prompt: A winding path through an enchanted forest with ancient trees...
	Action: Circular trajectory
	Frames: 125
	""")
	gr.Image("https://via.placeholder.com/400x225/10b981/ffffff?text=Forest+Path+Example", label="Example 2")

	with gr.Row():
	with gr.Column():
	gr.Markdown("""
	#### Example 3: Urban Scene
	Prompt: A futuristic cityscape with flying vehicles and neon lights...
	Action: Zoom in
	Frames: 125
	""")
	gr.Image("https://via.placeholder.com/400x225/8b5cf6/ffffff?text=Urban+Scene+Example", label="Example 3")

	with gr.Column():
	gr.Markdown("""
	#### Example 4: Interior Scene
	Prompt: A cozy library with bookshelves and warm lighting...
	Action: Custom trajectory
	Frames: 125
	""")
	gr.Image("https://via.placeholder.com/400x225/f59e0b/ffffff?text=Interior+Scene+Example", label="Example 4")

	# Tab 4: History
	with gr.TabItem("📜 History", id="history_tab"):
	gr.Markdown("### Generation History")

	history_display = gr.Markdown(get_generation_history())
	refresh_history_btn = gr.Button("🔄 Refresh History")

	# Footer
	gr.HTML("""
	<div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #e5e7eb;">
	<p style="color: #6b7280;">
	HY-WorldPlay: A Systematic Framework for Interactive World Modeling<br>
	<a href="https://arxiv.org/abs/2512.14614" target="_blank" style="color: #3b82f6;">Paper</a> \|
	<a href="https://github.com/Tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">GitHub</a> \|
	<a href="https://huggingface.co/tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">Model Card</a>
	</p>
	</div>
	""")

	# Event handlers
	def update_model_status(is_loaded, model_type):
	if is_loaded:
	return f'<div><span class="status-indicator status-ready"></span>{model_type.capitalize()} model loaded</div>', gr.Button(visible=True)
	else:
	return f'<div><span class="status-indicator status-loading"></span>Loading {model_type} model...</div>', gr.Button(visible=False)

	load_model_btn.click(
	fn=lambda x: load_model_wrapper(x),
	inputs=[model_type],
	outputs=[model_status]
	).then(
	fn=lambda x: update_model_status(True, x),
	inputs=[model_type],
	outputs=[model_status, generate_btn]
	)

	def update_progress(progress_text, show_video=False):
	if "completed" in progress_text.lower():
	return f'<div class="generation-progress">✅ {progress_text}</div>', gr.Video(visible=True), gr.JSON(visible=True)
	else:
	return f'<div class="generation-progress">⏳ {progress_text}</div>', gr.Video(visible=False), gr.JSON(visible=False)

	generate_btn.click(
	fn=generate_video_wrapper,
	inputs=[
	prompt_input,
	image_input,
	resolution,
	aspect_ratio,
	num_frames,
	seed,
	model_type,
	trajectory_type
	],
	outputs=[progress_display, video_output, metadata_output]
	)

	create_pose_btn.click(
	fn=create_pose_json,
	inputs=[trajectory_type],
	outputs=[pose_status]
	).then(
	fn=lambda x: f"✅ Trajectory JSON created for {x} motion",
	inputs=[trajectory_type],
	outputs=[pose_status]
	)

	refresh_history_btn.click(
	fn=get_generation_history,
	outputs=[history_display]
	)

	# Launch the app
	demo.launch(
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="indigo",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="lg",
	radius_size="md"
	),
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
	{"label": "Paper", "url": "https://arxiv.org/abs/2512.14614"},
	{"label": "GitHub", "url": "https://github.com/Tencent/HY-WorldPlay"},
	{"label": "Model Card", "url": "https://huggingface.co/tencent/HY-WorldPlay"}
	]
	)