gagndeep commited on
Commit
60887b6
·
verified ·
1 Parent(s): 7916ffc

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +495 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import json
4
+ import os
5
+ import tempfile
6
+ from typing import Optional, Tuple, Dict, Any
7
+ import time
8
+ from pathlib import Path
9
+
10
+ # Mock implementation for demonstration
11
+ # In a real deployment, this would integrate with the actual HY-WorldPlay model
12
+ class HYWorldPlayDemo:
13
+ def __init__(self):
14
+ self.model_loaded = False
15
+ self.generation_history = []
16
+
17
+ def load_model(self, model_type: str = "bidirectional"):
18
+ """Mock model loading"""
19
+ time.sleep(2) # Simulate loading time
20
+ self.model_loaded = True
21
+ return f"✅ {model_type.capitalize()} model loaded successfully!"
22
+
23
+ def generate_video(self,
24
+ prompt: str,
25
+ image_path: Optional[str] = None,
26
+ resolution: str = "480p",
27
+ aspect_ratio: str = "16:9",
28
+ num_frames: int = 125,
29
+ seed: int = 1,
30
+ model_type: str = "bidirectional",
31
+ action_type: str = "forward") -> Tuple[str, Dict]:
32
+ """
33
+ Mock video generation
34
+ Returns: (video_path, metadata)
35
+ """
36
+ if not self.model_loaded:
37
+ raise gr.Error("Please load the model first!")
38
+
39
+ # Simulate generation time
40
+ progress_steps = [
41
+ "Initializing generation pipeline...",
42
+ "Processing prompt and image...",
43
+ "Generating video frames...",
44
+ "Applying temporal consistency...",
45
+ "Rendering final video..."
46
+ ]
47
+
48
+ for step in progress_steps:
49
+ yield step, {"status": "processing", "step": step}
50
+ time.sleep(1)
51
+
52
+ # Create a mock video file (in real implementation, this would be actual video generation)
53
+ temp_dir = tempfile.mkdtemp()
54
+ video_path = os.path.join(temp_dir, "generated_video.mp4")
55
+
56
+ # Create a simple placeholder text file to simulate video
57
+ with open(video_path, 'w') as f:
58
+ f.write(f"Generated video for prompt: {prompt}\n")
59
+ f.write(f"Resolution: {resolution}\n")
60
+ f.write(f"Aspect Ratio: {aspect_ratio}\n")
61
+ f.write(f"Frames: {num_frames}\n")
62
+ f.write(f"Model: {model_type}\n")
63
+ f.write(f"Action: {action_type}\n")
64
+
65
+ # Save generation metadata
66
+ metadata = {
67
+ "prompt": prompt,
68
+ "resolution": resolution,
69
+ "aspect_ratio": aspect_ratio,
70
+ "num_frames": num_frames,
71
+ "seed": seed,
72
+ "model_type": model_type,
73
+ "action_type": action_type,
74
+ "generation_time": time.strftime("%Y-%m-%d %H:%M:%S"),
75
+ "status": "completed"
76
+ }
77
+
78
+ self.generation_history.append(metadata)
79
+
80
+ yield video_path, metadata
81
+
82
+ # Initialize demo class
83
+ demo_instance = HYWorldPlayDemo()
84
+
85
+ def load_model_wrapper(model_type):
86
+ """Wrapper for model loading with status updates"""
87
+ return demo_instance.load_model(model_type)
88
+
89
+ def generate_video_wrapper(*args):
90
+ """Wrapper for video generation with progress tracking"""
91
+ for result in demo_instance.generate_video(*args):
92
+ yield result
93
+
94
+ def create_pose_json(trajectory_type: str, num_keyframes: int = 32) -> str:
95
+ """Create a mock pose JSON file for camera trajectory"""
96
+ poses = []
97
+
98
+ if trajectory_type == "forward":
99
+ for i in range(num_keyframes):
100
+ poses.append({
101
+ "frame": i,
102
+ "position": [i * 0.1, 0, 0],
103
+ "rotation": [0, 0, 0],
104
+ "fov": 60
105
+ })
106
+ elif trajectory_type == "circular":
107
+ for i in range(num_keyframes):
108
+ angle = (i / num_keyframes) * 2 * np.pi
109
+ poses.append({
110
+ "frame": i,
111
+ "position": [np.cos(angle) * 2, 0, np.sin(angle) * 2],
112
+ "rotation": [0, np.degrees(angle), 0],
113
+ "fov": 60
114
+ })
115
+ elif trajectory_type == "zoom":
116
+ for i in range(num_keyframes):
117
+ zoom = 1 + (i / num_keyframes) * 2
118
+ poses.append({
119
+ "frame": i,
120
+ "position": [0, 0, 0],
121
+ "rotation": [0, 0, 0],
122
+ "fov": 60 / zoom
123
+ })
124
+
125
+ temp_dir = tempfile.mkdtemp()
126
+ json_path = os.path.join(temp_dir, "pose_trajectory.json")
127
+
128
+ with open(json_path, 'w') as f:
129
+ json.dump({"poses": poses}, f, indent=2)
130
+
131
+ return json_path
132
+
133
+ def get_generation_history():
134
+ """Return generation history as formatted text"""
135
+ if not demo_instance.generation_history:
136
+ return "No generations yet."
137
+
138
+ history_text = ""
139
+ for i, gen in enumerate(demo_instance.generation_history[-5:], 1):
140
+ history_text += f"**Generation {i}**\n"
141
+ history_text += f"- Prompt: {gen['prompt'][:50]}...\n"
142
+ history_text += f"- Model: {gen['model_type']}\n"
143
+ history_text += f"- Frames: {gen['num_frames']}\n"
144
+ history_text += f"- Time: {gen['generation_time']}\n\n"
145
+
146
+ return history_text
147
+
148
+ # Custom CSS for enhanced UI
149
+ custom_css = """
150
+ .main-container {
151
+ max-width: 1400px;
152
+ margin: 0 auto;
153
+ }
154
+
155
+ .model-card {
156
+ border: 2px solid #e5e7eb;
157
+ border-radius: 12px;
158
+ padding: 20px;
159
+ margin: 10px 0;
160
+ transition: all 0.3s ease;
161
+ }
162
+
163
+ .model-card:hover {
164
+ border-color: #3b82f6;
165
+ box-shadow: 0 4px 12px rgba(59, 130, 246, 0.1);
166
+ }
167
+
168
+ .status-indicator {
169
+ display: inline-block;
170
+ width: 12px;
171
+ height: 12px;
172
+ border-radius: 50%;
173
+ margin-right: 8px;
174
+ }
175
+
176
+ .status-ready { background-color: #10b981; }
177
+ .status-loading { background-color: #f59e0b; }
178
+ .status-error { background-color: #ef4444; }
179
+
180
+ .feature-highlight {
181
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
182
+ color: white;
183
+ padding: 20px;
184
+ border-radius: 12px;
185
+ margin: 20px 0;
186
+ }
187
+
188
+ .generation-progress {
189
+ font-family: 'Courier New', monospace;
190
+ background: #1f2937;
191
+ color: #10b981;
192
+ padding: 15px;
193
+ border-radius: 8px;
194
+ margin: 10px 0;
195
+ }
196
+ """
197
+
198
+ with gr.Blocks(css=custom_css, title="HY-WorldPlay: Interactive World Modeling") as demo:
199
+ # Header
200
+ gr.HTML("""
201
+ <div style="text-align: center; margin-bottom: 30px;">
202
+ <h1 style="font-size: 2.5em; margin-bottom: 10px;">🎮 HY-WorldPlay</h1>
203
+ <p style="font-size: 1.2em; color: #6b7280;">Real-Time Interactive World Modeling with Geometric Consistency</p>
204
+ <p style="margin-top: 10px;">
205
+ <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #3b82f6; text-decoration: none;">
206
+ Built with anycoder
207
+ </a>
208
+ </p>
209
+ </div>
210
+ """)
211
+
212
+ # Feature highlights
213
+ with gr.Row():
214
+ with gr.Column():
215
+ gr.HTML("""
216
+ <div class="feature-highlight">
217
+ <h3>🚀 Key Features</h3>
218
+ <ul style="margin: 10px 0;">
219
+ <li>Real-time video generation at 24 FPS</li>
220
+ <li>Long-term geometric consistency</li>
221
+ <li>Dual Action Representation for control</li>
222
+ <li>Reconstituted Context Memory</li>
223
+ <li>WorldCompass RL post-training</li>
224
+ <li>Context Forcing distillation</li>
225
+ </ul>
226
+ </div>
227
+ """)
228
+
229
+ # Main interface tabs
230
+ with gr.Tabs() as main_tabs:
231
+ # Tab 1: Video Generation
232
+ with gr.TabItem("🎬 Video Generation", id="gen_tab"):
233
+ with gr.Row():
234
+ with gr.Column(scale=2):
235
+ gr.Markdown("### Configuration")
236
+
237
+ # Model selection
238
+ model_type = gr.Radio(
239
+ choices=["bidirectional", "autoregressive", "autoregressive_distilled"],
240
+ value="bidirectional",
241
+ label="Model Type",
242
+ info="Choose the model variant for generation"
243
+ )
244
+
245
+ load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
246
+ model_status = gr.HTML('<div><span class="status-indicator status-error"></span>Model not loaded</div>')
247
+
248
+ # Input controls
249
+ with gr.Accordion("📝 Input Settings", open=True):
250
+ prompt_input = gr.Textbox(
251
+ label="Prompt",
252
+ placeholder="Describe the world you want to generate...",
253
+ lines=3,
254
+ value="A peaceful landscape with a stone bridge spanning a calm body of water, surrounded by lush green trees and a traditional pavilion."
255
+ )
256
+
257
+ image_input = gr.Image(
258
+ label="Input Image (Optional)",
259
+ type="filepath",
260
+ sources=["upload", "clipboard"],
261
+ info="Upload an image to guide the generation"
262
+ )
263
+
264
+ with gr.Accordion("⚙️ Generation Settings", open=False):
265
+ with gr.Row():
266
+ resolution = gr.Dropdown(
267
+ choices=["480p", "720p", "1080p"],
268
+ value="480p",
269
+ label="Resolution"
270
+ )
271
+ aspect_ratio = gr.Dropdown(
272
+ choices=["16:9", "9:16", "1:1", "4:3"],
273
+ value="16:9",
274
+ label="Aspect Ratio"
275
+ )
276
+
277
+ with gr.Row():
278
+ num_frames = gr.Slider(
279
+ minimum=16,
280
+ maximum=250,
281
+ value=125,
282
+ step=1,
283
+ label="Number of Frames"
284
+ )
285
+ seed = gr.Number(
286
+ value=1,
287
+ label="Seed",
288
+ precision=0
289
+ )
290
+
291
+ # Camera trajectory
292
+ with gr.Accordion("🎥 Camera Trajectory", open=False):
293
+ trajectory_type = gr.Radio(
294
+ choices=["forward", "circular", "zoom", "custom"],
295
+ value="forward",
296
+ label="Trajectory Type"
297
+ )
298
+ create_pose_btn = gr.Button("Generate Trajectory JSON")
299
+ pose_status = gr.Textbox(label="Trajectory Status", interactive=False)
300
+
301
+ # Generation button
302
+ generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg", visible=False)
303
+
304
+ with gr.Column(scale=1):
305
+ gr.Markdown("### Output & Progress")
306
+
307
+ # Progress display
308
+ progress_display = gr.HTML('<div class="generation-progress">Ready to generate...</div>')
309
+
310
+ # Output video
311
+ video_output = gr.Video(
312
+ label="Generated Video",
313
+ visible=False
314
+ )
315
+
316
+ # Metadata
317
+ metadata_output = gr.JSON(
318
+ label="Generation Metadata",
319
+ visible=False
320
+ )
321
+
322
+ # Tab 2: Model Comparison
323
+ with gr.TabItem("📊 Model Comparison", id="compare_tab"):
324
+ gr.Markdown("### Performance Comparison")
325
+
326
+ # Performance metrics table
327
+ gr.DataFrame(
328
+ headers=["Model", "Real-time", "PSNR ↑", "SSIM ↑", "LPIPS ↓", "R_dist ↓", "T_dist ↓"],
329
+ datatype=["str", "str", "number", "number", "number", "number", "number"],
330
+ value=[
331
+ ["CameraCtrl", "❌", 17.93, 0.569, 0.298, 0.037, 0.341],
332
+ ["SEVA", "❌", 19.84, 0.598, 0.313, 0.047, 0.223],
333
+ ["ViewCrafter", "❌", 19.91, 0.617, 0.327, 0.029, 0.543],
334
+ ["Gen3C", "❌", 21.68, 0.635, 0.278, 0.024, 0.477],
335
+ ["VMem", "❌", 19.97, 0.587, 0.316, 0.048, 0.219],
336
+ ["Matrix-Game-2.0", "✅", 17.26, 0.505, 0.383, 0.287, 0.843],
337
+ ["GameCraft", "❌", 21.05, 0.639, 0.341, 0.151, 0.617],
338
+ ["Ours (w/o Context Forcing)", "❌", 21.27, 0.669, 0.261, 0.033, 0.157],
339
+ ["Ours (full)", "✅", 21.92, 0.702, 0.247, 0.031, 0.121]
340
+ ],
341
+ label="Quantitative Evaluation Results",
342
+ interactive=False
343
+ )
344
+
345
+ with gr.Row():
346
+ with gr.Column():
347
+ gr.Markdown("""
348
+ ### 🔬 Key Improvements
349
+ - **Real-time Performance**: Achieves 24 FPS streaming generation
350
+ - **Superior Consistency**: Best-in-class long-term geometric consistency
351
+ - **Memory Efficiency**: Reconstituted Context Memory prevents error drift
352
+ - **Action Control**: Precise keyboard and mouse input response
353
+ """)
354
+
355
+ with gr.Column():
356
+ gr.Markdown("""
357
+ ### 📈 Technical Innovations
358
+ - **Dual Action Representation**: Robust action control
359
+ - **Context Forcing**: Memory-aware model distillation
360
+ - **WorldCompass**: RL-based post-training
361
+ - **Temporal Reframing**: Long-past frame accessibility
362
+ """)
363
+
364
+ # Tab 3: Examples
365
+ with gr.TabItem("🎨 Examples", id="examples_tab"):
366
+ gr.Markdown("### Sample Generations")
367
+
368
+ with gr.Row():
369
+ with gr.Column():
370
+ gr.Markdown("""
371
+ #### Example 1: Bridge Scene
372
+ **Prompt**: A paved pathway leads towards a stone arch bridge spanning a calm body of water...
373
+ **Action**: Forward movement
374
+ **Frames**: 125
375
+ """)
376
+ gr.Image("https://via.placeholder.com/400x225/3b82f6/ffffff?text=Bridge+Scene+Example", label="Example 1")
377
+
378
+ with gr.Column():
379
+ gr.Markdown("""
380
+ #### Example 2: Forest Path
381
+ **Prompt**: A winding path through an enchanted forest with ancient trees...
382
+ **Action**: Circular trajectory
383
+ **Frames**: 125
384
+ """)
385
+ gr.Image("https://via.placeholder.com/400x225/10b981/ffffff?text=Forest+Path+Example", label="Example 2")
386
+
387
+ with gr.Row():
388
+ with gr.Column():
389
+ gr.Markdown("""
390
+ #### Example 3: Urban Scene
391
+ **Prompt**: A futuristic cityscape with flying vehicles and neon lights...
392
+ **Action**: Zoom in
393
+ **Frames**: 125
394
+ """)
395
+ gr.Image("https://via.placeholder.com/400x225/8b5cf6/ffffff?text=Urban+Scene+Example", label="Example 3")
396
+
397
+ with gr.Column():
398
+ gr.Markdown("""
399
+ #### Example 4: Interior Scene
400
+ **Prompt**: A cozy library with bookshelves and warm lighting...
401
+ **Action**: Custom trajectory
402
+ **Frames**: 125
403
+ """)
404
+ gr.Image("https://via.placeholder.com/400x225/f59e0b/ffffff?text=Interior+Scene+Example", label="Example 4")
405
+
406
+ # Tab 4: History
407
+ with gr.TabItem("📜 History", id="history_tab"):
408
+ gr.Markdown("### Generation History")
409
+
410
+ history_display = gr.Markdown(get_generation_history())
411
+ refresh_history_btn = gr.Button("🔄 Refresh History")
412
+
413
+ # Footer
414
+ gr.HTML("""
415
+ <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #e5e7eb;">
416
+ <p style="color: #6b7280;">
417
+ HY-WorldPlay: A Systematic Framework for Interactive World Modeling<br>
418
+ <a href="https://arxiv.org/abs/2512.14614" target="_blank" style="color: #3b82f6;">Paper</a> |
419
+ <a href="https://github.com/Tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">GitHub</a> |
420
+ <a href="https://huggingface.co/tencent/HY-WorldPlay" target="_blank" style="color: #3b82f6;">Model Card</a>
421
+ </p>
422
+ </div>
423
+ """)
424
+
425
+ # Event handlers
426
+ def update_model_status(is_loaded, model_type):
427
+ if is_loaded:
428
+ return f'<div><span class="status-indicator status-ready"></span>{model_type.capitalize()} model loaded</div>', gr.Button(visible=True)
429
+ else:
430
+ return f'<div><span class="status-indicator status-loading"></span>Loading {model_type} model...</div>', gr.Button(visible=False)
431
+
432
+ load_model_btn.click(
433
+ fn=lambda x: load_model_wrapper(x),
434
+ inputs=[model_type],
435
+ outputs=[model_status]
436
+ ).then(
437
+ fn=lambda x: update_model_status(True, x),
438
+ inputs=[model_type],
439
+ outputs=[model_status, generate_btn]
440
+ )
441
+
442
+ def update_progress(progress_text, show_video=False):
443
+ if "completed" in progress_text.lower():
444
+ return f'<div class="generation-progress">✅ {progress_text}</div>', gr.Video(visible=True), gr.JSON(visible=True)
445
+ else:
446
+ return f'<div class="generation-progress">⏳ {progress_text}</div>', gr.Video(visible=False), gr.JSON(visible=False)
447
+
448
+ generate_btn.click(
449
+ fn=generate_video_wrapper,
450
+ inputs=[
451
+ prompt_input,
452
+ image_input,
453
+ resolution,
454
+ aspect_ratio,
455
+ num_frames,
456
+ seed,
457
+ model_type,
458
+ trajectory_type
459
+ ],
460
+ outputs=[progress_display, video_output, metadata_output]
461
+ )
462
+
463
+ create_pose_btn.click(
464
+ fn=create_pose_json,
465
+ inputs=[trajectory_type],
466
+ outputs=[pose_status]
467
+ ).then(
468
+ fn=lambda x: f"✅ Trajectory JSON created for {x} motion",
469
+ inputs=[trajectory_type],
470
+ outputs=[pose_status]
471
+ )
472
+
473
+ refresh_history_btn.click(
474
+ fn=get_generation_history,
475
+ outputs=[history_display]
476
+ )
477
+
478
+ # Launch the app
479
+ demo.launch(
480
+ theme=gr.themes.Soft(
481
+ primary_hue="blue",
482
+ secondary_hue="indigo",
483
+ neutral_hue="slate",
484
+ font=gr.themes.GoogleFont("Inter"),
485
+ text_size="lg",
486
+ spacing_size="lg",
487
+ radius_size="md"
488
+ ),
489
+ footer_links=[
490
+ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
491
+ {"label": "Paper", "url": "https://arxiv.org/abs/2512.14614"},
492
+ {"label": "GitHub", "url": "https://github.com/Tencent/HY-WorldPlay"},
493
+ {"label": "Model Card", "url": "https://huggingface.co/tencent/HY-WorldPlay"}
494
+ ]
495
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ gradio>=6.0
3
+ requests
4
+ Pillow
5
+ pandas