Spaces:

gagndeep
/

anycoder-e867323b

Runtime error

App Files Files Community

gagndeep commited on 2 days ago

Commit

60887b6

verified ·

1 Parent(s): 7916ffc

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +495 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,495 @@

+import gradio as gr
+import numpy as np
+import json
+import os
+import tempfile
+from typing import Optional, Tuple, Dict, Any
+import time
+from pathlib import Path
+# Mock implementation for demonstration
+# In a real deployment, this would integrate with the actual HY-WorldPlay model
+class HYWorldPlayDemo:
+    def __init__(self):
+        self.model_loaded = False
+        self.generation_history = []
+    def load_model(self, model_type: str = "bidirectional"):
+        """Mock model loading"""
+        time.sleep(2)  # Simulate loading time
+        self.model_loaded = True
+        return f"✅ {model_type.capitalize()} model loaded successfully!"
+    def generate_video(self,
+                      prompt: str,
+                      image_path: Optional[str] = None,
+                      resolution: str = "480p",
+                      aspect_ratio: str = "16:9",
+                      num_frames: int = 125,
+                      seed: int = 1,
+                      model_type: str = "bidirectional",
+                      action_type: str = "forward") -> Tuple[str, Dict]:
+        """
+        Mock video generation
+        Returns: (video_path, metadata)
+        """
+        if not self.model_loaded:
+            raise gr.Error("Please load the model first!")
+        # Simulate generation time
+        progress_steps = [
+            "Initializing generation pipeline...",
+            "Processing prompt and image...",
+            "Generating video frames...",
+            "Applying temporal consistency...",
+            "Rendering final video..."
+        ]
+        for step in progress_steps:
+            yield step, {"status": "processing", "step": step}
+            time.sleep(1)
+        # Create a mock video file (in real implementation, this would be actual video generation)
+        temp_dir = tempfile.mkdtemp()
+        video_path = os.path.join(temp_dir, "generated_video.mp4")
+        # Create a simple placeholder text file to simulate video
+        with open(video_path, 'w') as f:
+            f.write(f"Generated video for prompt: {prompt}\n")
+            f.write(f"Resolution: {resolution}\n")
+            f.write(f"Aspect Ratio: {aspect_ratio}\n")
+            f.write(f"Frames: {num_frames}\n")
+            f.write(f"Model: {model_type}\n")
+            f.write(f"Action: {action_type}\n")
+        # Save generation metadata
+        metadata = {
+            "prompt": prompt,
+            "resolution": resolution,
+            "aspect_ratio": aspect_ratio,
+            "num_frames": num_frames,
+            "seed": seed,
+            "model_type": model_type,
+            "action_type": action_type,
+            "generation_time": time.strftime("%Y-%m-%d %H:%M:%S"),
+            "status": "completed"
+        }
+        self.generation_history.append(metadata)
+        yield video_path, metadata
+# Initialize demo class
+demo_instance = HYWorldPlayDemo()
+def load_model_wrapper(model_type):
+    """Wrapper for model loading with status updates"""
+    return demo_instance.load_model(model_type)
+def generate_video_wrapper(*args):
+    """Wrapper for video generation with progress tracking"""
+    for result in demo_instance.generate_video(*args):
+        yield result
+def create_pose_json(trajectory_type: str, num_keyframes: int = 32) -> str:
+    """Create a mock pose JSON file for camera trajectory"""
+    poses = []
+    if trajectory_type == "forward":
+        for i in range(num_keyframes):
+            poses.append({
+                "frame": i,
+                "position": [i * 0.1, 0, 0],
+                "rotation": [0, 0, 0],
+                "fov": 60
+            })
+    elif trajectory_type == "circular":
+        for i in range(num_keyframes):
+            angle = (i / num_keyframes) * 2 * np.pi
+            poses.append({
+                "frame": i,
+                "position": [np.cos(angle) * 2, 0, np.sin(angle) * 2],
+                "rotation": [0, np.degrees(angle), 0],
+                "fov": 60
+            })
+    elif trajectory_type == "zoom":
+        for i in range(num_keyframes):
+            zoom = 1 + (i / num_keyframes) * 2
+            poses.append({
+                "frame": i,
+                "position": [0, 0, 0],
+                "rotation": [0, 0, 0],
+                "fov": 60 / zoom
+            })
+    temp_dir = tempfile.mkdtemp()
+    json_path = os.path.join(temp_dir, "pose_trajectory.json")
+    with open(json_path, 'w') as f:
+        json.dump({"poses": poses}, f, indent=2)
+    return json_path
+def get_generation_history():
+    """Return generation history as formatted text"""
+    if not demo_instance.generation_history:
+        return "No generations yet."
+    history_text = ""
+    for i, gen in enumerate(demo_instance.generation_history[-5:], 1):
+        history_text += f"**Generation {i}**\n"
+        history_text += f"- Prompt: {gen['prompt'][:50]}...\n"
+        history_text += f"- Model: {gen['model_type']}\n"
+        history_text += f"- Frames: {gen['num_frames']}\n"
+        history_text += f"- Time: {gen['generation_time']}\n\n"
+    return history_text
+# Custom CSS for enhanced UI
+custom_css = """
+.main-container {
+    max-width: 1400px;
+    margin: 0 auto;
+}
+.model-card {
+    border: 2px solid #e5e7eb;
+    border-radius: 12px;
+    padding: 20px;
+    margin: 10px 0;
+    transition: all 0.3s ease;
+}
+.model-card:hover {
+    border-color: #3b82f6;
+    box-shadow: 0 4px 12px rgba(59, 130, 246, 0.1);
+}
+.status-indicator {
+    display: inline-block;
+    width: 12px;
+    height: 12px;
+    border-radius: 50%;
+    margin-right: 8px;
+}
+.status-ready { background-color: #10b981; }
+.status-loading { background-color: #f59e0b; }
+.status-error { background-color: #ef4444; }
+.feature-highlight {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 20px;
+    border-radius: 12px;
+    margin: 20px 0;
+}
+.generation-progress {
+    font-family: 'Courier New', monospace;
+    background: #1f2937;
+    color: #10b981;
+    padding: 15px;
+    border-radius: 8px;
+    margin: 10px 0;
+}
+"""
+with gr.Blocks(css=custom_css, title="HY-WorldPlay: Interactive World Modeling") as demo:
+    # Header
+    gr.HTML("""
+    <div style="text-align: center; margin-bottom: 30px;">
+        <h1 style="font-size: 2.5em; margin-bottom: 10px;">🎮 HY-WorldPlay</h1>
+        <p style="font-size: 1.2em; color: #6b7280;">Real-Time Interactive World Modeling with Geometric Consistency</p>
+        <p style="margin-top: 10px;">
+            <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #3b82f6; text-decoration: none;">
+                Built with anycoder
+            </a>
+        </p>
+    </div>
+    """)
+    # Feature highlights
+    with gr.Row():
+        with gr.Column():
+            gr.HTML("""
+            <div class="feature-highlight">
+                <h3>🚀 Key Features</h3>
+                <ul style="margin: 10px 0;">
+                    <li>Real-time video generation at 24 FPS</li>
+                    <li>Long-term geometric consistency</li>
+                    <li>Dual Action Representation for control</li>
+                    <li>Reconstituted Context Memory</li>
+                    <li>WorldCompass RL post-training</li>
+                    <li>Context Forcing distillation</li>
+                </ul>
+            </div>
+            """)
+    # Main interface tabs
+    with gr.Tabs() as main_tabs:
+        # Tab 1: Video Generation
+        with gr.TabItem("🎬 Video Generation", id="gen_tab"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    gr.Markdown("### Configuration")
+                    # Model selection
+                    model_type = gr.Radio(
+                        choices=["bidirectional", "autoregressive", "autoregressive_distilled"],
+                        value="bidirectional",
+                        label="Model Type",
+                        info="Choose the model variant for generation"
+                    )
+                    load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
+                    model_status = gr.HTML('<div><span class="status-indicator status-error"></span>Model not loaded</div>')
+                    # Input controls
+                    with gr.Accordion("📝 Input Settings", open=True):
+                        prompt_input = gr.Textbox(
+                            label="Prompt",
+                            placeholder="Describe the world you want to generate...",
+                            lines=3,
+                            value="A peaceful landscape with a stone bridge spanning a calm body of water, surrounded by lush green trees and a traditional pavilion."
+                        )
+                        image_input = gr.Image(
+                            label="Input Image (Optional)",
+                            type="filepath",
+                            sources=["upload", "clipboard"],
+                            info="Upload an image to guide the generation"
+                        )
+                    with gr.Accordion("⚙️ Generation Settings", open=False):
+                        with gr.Row():
+                            resolution = gr.Dropdown(
+                                choices=["480p", "720p", "1080p"],
+                                value="480p",
+                                label="Resolution"
+                            )
+                            aspect_ratio = gr.Dropdown(
+                                choices=["16:9", "9:16", "1:1", "4:3"],
+                                value="16:9",
+                                label="Aspect Ratio"
+                            )
+                        with gr.Row():
+                            num_frames = gr.Slider(
+                                minimum=16,
+                                maximum=250,
+                                value=125,
+                                step=1,
+                                label="Number of Frames"
+                            )
+                            seed = gr.Number(
+                                value=1,
+                                label="Seed",
+                                precision=0
+                            )
+                    # Camera trajectory
+                    with gr.Accordion("🎥 Camera Trajectory", open=False):
+                        trajectory_type = gr.Radio(
+                            choices=["forward", "circular", "zoom", "custom"],
+                            value="forward",
+                            label="Trajectory Type"
+                        )
+                        create_pose_btn = gr.Button("Generate Trajectory JSON")
+                        pose_status = gr.Textbox(label="Trajectory Status", interactive=False)
+                    # Generation button
+                    generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg", visible=False)
+                with gr.Column(scale=1):
+                    gr.Markdown("### Output & Progress")
+                    # Progress display
+                    progress_display = gr.HTML('<div class="generation-progress">Ready to generate...</div>')
+                    # Output video
+                    video_output = gr.Video(
+                        label="Generated Video",
+                        visible=False
+                    )
+                    # Metadata
+                    metadata_output = gr.JSON(
+                        label="Generation Metadata",
+                        visible=False
+                    )
+        # Tab 2: Model Comparison
+        with gr.TabItem("📊 Model Comparison", id="compare_tab"):
+            gr.Markdown("### Performance Comparison")
+            # Performance metrics table
+            gr.DataFrame(
+                headers=["Model", "Real-time", "PSNR ↑", "SSIM ↑", "LPIPS ↓", "R_dist ↓", "T_dist ↓"],
+                datatype=["str", "str", "number", "number", "number", "number", "number"],
+                value=[
+                    ["CameraCtrl", "❌", 17.93, 0.569, 0.298, 0.037, 0.341],
+                    ["SEVA", "❌", 19.84, 0.598, 0.313, 0.047, 0.223],
+                    ["ViewCrafter", "❌", 19.91, 0.617, 0.327, 0.029, 0.543],
+                    ["Gen3C", "❌", 21.68, 0.635, 0.278, 0.024, 0.477],
+                    ["VMem", "❌", 19.97, 0.587, 0.316, 0.048, 0.219],
+                    ["Matrix-Game-2.0", "✅", 17.26, 0.505, 0.383, 0.287, 0.843],
+                    ["GameCraft", "❌", 21.05, 0.639, 0.341, 0.151, 0.617],
+                    ["Ours (w/o Context Forcing)", "❌", 21.27, 0.669, 0.261, 0.033, 0.157],
+                    ["Ours (full)", "✅", 21.92, 0.702, 0.247, 0.031, 0.121]
+                ],
+                label="Quantitative Evaluation Results",
+                interactive=False
+            )
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("""
+                    ### 🔬 Key Improvements
+                    - **Real-time Performance**: Achieves 24 FPS streaming generation
+                    - **Superior Consistency**: Best-in-class long-term geometric consistency
+                    - **Memory Efficiency**: Reconstituted Context Memory prevents error drift
+                    - **Action Control**: Precise keyboard and mouse input response
+                    """)
+                with gr.Column():
+                    gr.Markdown("""
+                    ### 📈 Technical Innovations
+                    - **Dual Action Representation**: Robust action control
+                    - **Context Forcing**: Memory-aware model distillation
+                    - **WorldCompass**: RL-based post-training
+                    - **Temporal Reframing**: Long-past frame accessibility
+                    """)
+        # Tab 3: Examples
+        with gr.TabItem("🎨 Examples", id="examples_tab"):
+            gr.Markdown("### Sample Generations")
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("""
+                    #### Example 1: Bridge Scene
+                    **Prompt**: A paved pathway leads towards a stone arch bridge spanning a calm body of water...
+                    **Action**: Forward movement
+                    **Frames**: 125
+                    """)
+                    gr.Image("https://via.placeholder.com/400x225/3b82f6/ffffff?text=Bridge+Scene+Example", label="Example 1")
+                with gr.Column():
+                    gr.Markdown("""
+                    #### Example 2: Forest Path
+                    **Prompt**: A winding path through an enchanted forest with ancient trees...
+                    **Action**: Circular trajectory
+                    **Frames**: 125
+                    """)
+                    gr.Image("https://via.placeholder.com/400x225/10b981/ffffff?text=Forest+Path+Example", label="Example 2")
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("""
+                    #### Example 3: Urban Scene
+                    **Prompt**: A futuristic cityscape with flying vehicles and neon lights...
+                    **Action**: Zoom in
+                    **Frames**: 125
+                    """)
+                    gr.Image("https://via.placeholder.com/400x225/8b5cf6/ffffff?text=Urban+Scene+Example", label="Example 3")
+                with gr.Column():
+                    gr.Markdown("""
+                    #### Example 4: Interior Scene
+                    **Prompt**: A cozy library with bookshelves and warm lighting...
+                    **Action**: Custom trajectory
+                    **Frames**: 125
+                    """)
+                    gr.Image("https://via.placeholder.com/400x225/f59e0b/ffffff?text=Interior+Scene+Example", label="Example 4")
+        # Tab 4: History
+        with gr.TabItem("📜 History", id="history_tab"):
+            gr.Markdown("### Generation History")
+            history_display = gr.Markdown(get_generation_history())
+            refresh_history_btn = gr.Button("🔄 Refresh History")
+    # Footer
+    gr.HTML("""
+    <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #e5e7eb;">
+        <p style="color: #6b7280;">
+            HY-WorldPlay: A Systematic Framework for Interactive World Modeling<br>
+            <a href="https://arxiv.org/abs/2512.14614" target="_blank" style="color: #3b82f6;">Paper</a> |
+            <a href="https://github.com/Tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">GitHub</a> |
+            <a href="https://huggingface.co/tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">Model Card</a>
+        </p>
+    </div>
+    """)
+    # Event handlers
+    def update_model_status(is_loaded, model_type):
+        if is_loaded:
+            return f'<div><span class="status-indicator status-ready"></span>{model_type.capitalize()} model loaded</div>', gr.Button(visible=True)
+        else:
+            return f'<div><span class="status-indicator status-loading"></span>Loading {model_type} model...</div>', gr.Button(visible=False)
+    load_model_btn.click(
+        fn=lambda x: load_model_wrapper(x),
+        inputs=[model_type],
+        outputs=[model_status]
+    ).then(
+        fn=lambda x: update_model_status(True, x),
+        inputs=[model_type],
+        outputs=[model_status, generate_btn]
+    )
+    def update_progress(progress_text, show_video=False):
+        if "completed" in progress_text.lower():
+            return f'<div class="generation-progress">✅ {progress_text}</div>', gr.Video(visible=True), gr.JSON(visible=True)
+        else:
+            return f'<div class="generation-progress">⏳ {progress_text}</div>', gr.Video(visible=False), gr.JSON(visible=False)
+    generate_btn.click(
+        fn=generate_video_wrapper,
+        inputs=[
+            prompt_input,
+            image_input,
+            resolution,
+            aspect_ratio,
+            num_frames,
+            seed,
+            model_type,
+            trajectory_type
+        ],
+        outputs=[progress_display, video_output, metadata_output]
+    )
+    create_pose_btn.click(
+        fn=create_pose_json,
+        inputs=[trajectory_type],
+        outputs=[pose_status]
+    ).then(
+        fn=lambda x: f"✅ Trajectory JSON created for {x} motion",
+        inputs=[trajectory_type],
+        outputs=[pose_status]
+    )
+    refresh_history_btn.click(
+        fn=get_generation_history,
+        outputs=[history_display]
+    )
+# Launch the app
+demo.launch(
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="indigo",
+        neutral_hue="slate",
+        font=gr.themes.GoogleFont("Inter"),
+        text_size="lg",
+        spacing_size="lg",
+        radius_size="md"
+    ),
+    footer_links=[
+        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+        {"label": "Paper", "url": "https://arxiv.org/abs/2512.14614"},
+        {"label": "GitHub", "url": "https://github.com/Tencent/HY-WorldPlay"},
+        {"label": "Model Card", "url": "https://huggingface.co/tencent/HY-WorldPlay"}
+    ]
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+gradio>=6.0
+requests
+Pillow
+pandas