akhaliq's picture
akhaliq HF Staff
Upload index.js with huggingface_hub
def3655 verified
import {
AutoProcessor,
AutoModelForImageTextToText,
load_image,
TextStreamer,
} from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]";
class VideoCaptionApp {
constructor() {
this.videoFile = null;
this.model = null;
this.processor = null;
this.isProcessing = false;
this.captionInterval = null;
this.isLiveCaptioning = false;
this.captionHistory = [];
this.initializeElements();
this.attachEventListeners();
this.checkWebGPUSupport();
}
initializeElements() {
this.elements = {
videoPlayer: document.getElementById('videoPlayer'),
videoInput: document.getElementById('videoInput'),
uploadArea: document.getElementById('uploadArea'),
processBtn: document.getElementById('processBtn'),
deviceSelect: document.getElementById('deviceSelect'),
results: document.getElementById('results'),
frameCaptions: document.getElementById('frameCaptions'),
controls: document.getElementById('controls'),
controlsContent: document.getElementById('controlsContent'),
controlsToggle: document.getElementById('controlsToggle'),
copyBtn: document.getElementById('copyBtn'),
downloadBtn: document.getElementById('downloadBtn'),
changeVideoBtn: document.getElementById('changeVideoBtn'),
liveCaption: document.getElementById('liveCaption'),
captionText: document.getElementById('captionText')
};
}
attachEventListeners() {
this.elements.uploadArea.addEventListener('click', () => {
if (!this.isProcessing) {
this.elements.videoInput.click();
}
});
this.elements.uploadArea.addEventListener('dragover', (e) => {
e.preventDefault();
if (!this.isProcessing) {
this.elements.uploadArea.classList.add('drag-over');
}
});
this.elements.uploadArea.addEventListener('dragleave', () => {
this.elements.uploadArea.classList.remove('drag-over');
});
this.elements.uploadArea.addEventListener('drop', (e) => {
e.preventDefault();
this.elements.uploadArea.classList.remove('drag-over');
if (!this.isProcessing && e.dataTransfer.files.length > 0) {
const file = e.dataTransfer.files[0];
if (file.type.startsWith('video/')) {
this.handleVideoUpload(file);
}
}
});
this.elements.videoInput.addEventListener('change', (e) => {
if (e.target.files.length > 0) {
this.handleVideoUpload(e.target.files[0]);
}
});
this.elements.processBtn.addEventListener('click', () => {
if (this.videoFile) {
if (!this.isLiveCaptioning) {
this.startLiveCaptions();
} else {
this.stopLiveCaptions();
}
}
});
this.elements.videoPlayer.addEventListener('ended', () => {
if (this.isLiveCaptioning) {
this.stopLiveCaptions();
}
});
this.elements.videoPlayer.addEventListener('pause', () => {
if (this.isLiveCaptioning && this.captionInterval) {
clearInterval(this.captionInterval);
this.captionInterval = null;
}
});
this.elements.videoPlayer.addEventListener('play', () => {
if (this.isLiveCaptioning && !this.captionInterval) {
this.startCaptionInterval();
}
});
this.elements.copyBtn.addEventListener('click', () => {
this.copyResults();
});
this.elements.downloadBtn.addEventListener('click', () => {
this.downloadResults();
});
this.elements.changeVideoBtn.addEventListener('click', () => {
this.changeVideo();
});
this.elements.controlsToggle.addEventListener('click', () => {
this.toggleControls();
});
}
async checkWebGPUSupport() {
if (!navigator.gpu) {
this.elements.deviceSelect.querySelector('option[value="webgpu"]').disabled = true;
this.elements.deviceSelect.value = 'cpu';
} else {
// Default to WebGPU if available
this.elements.deviceSelect.value = 'webgpu';
}
}
handleVideoUpload(file) {
this.videoFile = file;
const videoURL = URL.createObjectURL(file);
this.elements.videoPlayer.src = videoURL;
this.elements.uploadArea.style.display = 'none';
this.elements.controls.style.display = 'block';
this.elements.results.style.display = 'none';
this.elements.liveCaption.style.display = 'none';
this.captionHistory = [];
}
async captureCurrentFrame() {
const video = this.elements.videoPlayer;
const canvas = document.createElement('canvas');
// Use larger resolution for better caption quality
canvas.width = Math.min(video.videoWidth, 640);
canvas.height = Math.min(video.videoHeight, 360);
const ctx = canvas.getContext('2d');
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
const blob = await new Promise(resolve => {
canvas.toBlob(resolve, 'image/jpeg', 0.5);
});
return {
blob,
timestamp: video.currentTime
};
}
async initializeModel() {
if (this.model) return;
const device = this.elements.deviceSelect.value;
const model_id = "onnx-community/FastVLM-0.5B-ONNX";
this.updateStatus('Loading AI model...');
try {
this.processor = await AutoProcessor.from_pretrained(model_id);
this.updateStatus('Initializing model...');
const modelOptions = {
dtype: {
embed_tokens: "fp16",
vision_encoder: "q4",
decoder_model_merged: "q4",
}
};
if (device === 'webgpu') {
modelOptions.device = 'webgpu';
}
this.model = await AutoModelForImageTextToText.from_pretrained(model_id, modelOptions);
// Pre-warm the model with a dummy run
this.updateStatus('Warming up model...');
await this.warmupModel();
this.updateStatus('Model ready');
} catch (error) {
console.error('Model initialization error:', error);
throw error;
}
}
updateStatus(status) {
this.elements.captionText.textContent = status;
this.elements.captionText.style.opacity = '0.6';
}
async warmupModel() {
try {
// Create a small dummy image
const canvas = document.createElement('canvas');
canvas.width = 224;
canvas.height = 224;
const ctx = canvas.getContext('2d');
ctx.fillStyle = 'black';
ctx.fillRect(0, 0, 224, 224);
const blob = await new Promise(resolve => {
canvas.toBlob(resolve, 'image/jpeg', 0.5);
});
const frameUrl = URL.createObjectURL(blob);
const image = await load_image(frameUrl);
const messages = [
{
role: "user",
content: `<image>Describe what you see in one sentence.`,
},
];
const prompt = this.processor.apply_chat_template(messages, {
add_generation_prompt: true,
});
const inputs = await this.processor(image, prompt, {
add_special_tokens: false,
});
// Run a quick generation to warm up the model
await this.model.generate({
...inputs,
max_new_tokens: 5,
do_sample: false,
});
URL.revokeObjectURL(frameUrl);
} catch (error) {
console.error('Warmup error:', error);
}
}
async startLiveCaptions() {
this.isLiveCaptioning = true;
this.elements.processBtn.classList.add('loading');
this.elements.processBtn.querySelector('.btn-text').textContent = 'Stop Captions';
this.elements.controls.classList.add('collapsed');
this.elements.liveCaption.style.display = 'block';
this.elements.results.style.display = 'block';
try {
if (!this.model) {
await this.initializeModel();
}
// Start playing the video
this.elements.videoPlayer.play();
// Start the caption interval
this.startCaptionInterval();
} catch (error) {
console.error('Error starting live captions:', error);
this.stopLiveCaptions();
alert('Failed to start live captions. Please try again.');
}
}
startCaptionInterval() {
// Use very short intervals for real-time feel
const intervalSeconds = 0.5; // Generate captions every 500ms for rapid updates
// Generate initial caption
this.generateLiveCaption();
// Set up interval for continuous captions
this.captionInterval = setInterval(() => {
if (!this.elements.videoPlayer.paused && !this.elements.videoPlayer.ended) {
this.generateLiveCaption();
}
}, intervalSeconds * 1000);
}
async generateLiveCaption() {
if (this.isProcessing) return;
this.isProcessing = true;
try {
const frame = await this.captureCurrentFrame();
const frameUrl = URL.createObjectURL(frame.blob);
const image = await load_image(frameUrl);
const messages = [
{
role: "user",
content: `<image>What's happening?`,
},
];
const prompt = this.processor.apply_chat_template(messages, {
add_generation_prompt: true,
});
const inputs = await this.processor(image, prompt, {
add_special_tokens: false,
});
let captionText = '';
let isStreaming = true;
// Clear previous caption and show streaming indicator
this.elements.captionText.style.opacity = '1';
this.elements.captionText.textContent = '';
const streamer = new TextStreamer(this.processor.tokenizer, {
skip_prompt: true,
skip_special_tokens: true,
callback_function: (text) => {
captionText += text;
// Stream the text to the live caption
if (isStreaming) {
this.elements.captionText.textContent = captionText;
}
}
});
const outputs = await this.model.generate({
...inputs,
max_new_tokens: 15, // Keep at 15 for fast generation
do_sample: false,
streamer: streamer,
temperature: 0.1, // Lower temperature for faster, more deterministic output
repetition_penalty: 1.0,
});
isStreaming = false;
// Add to history
const captionData = {
timestamp: frame.timestamp,
caption: captionText.trim()
};
this.captionHistory.push(captionData);
this.displayFrameCaption(captionData);
URL.revokeObjectURL(frameUrl);
} catch (error) {
console.error('Error generating caption:', error);
} finally {
this.isProcessing = false;
}
}
stopLiveCaptions() {
this.isLiveCaptioning = false;
this.elements.processBtn.classList.remove('loading');
this.elements.processBtn.querySelector('.btn-text').textContent = 'Start Live Captions';
if (this.captionInterval) {
clearInterval(this.captionInterval);
this.captionInterval = null;
}
this.elements.videoPlayer.pause();
this.elements.liveCaption.style.display = 'none';
this.elements.controls.classList.remove('collapsed');
}
toggleControls() {
this.elements.controls.classList.toggle('collapsed');
}
displayFrameCaption(captionData) {
const captionElement = document.createElement('div');
captionElement.className = 'frame-caption-item';
captionElement.innerHTML = `
<div class="frame-header">
<span class="frame-time">${this.formatTime(captionData.timestamp)}</span>
</div>
<p class="frame-text">${captionData.caption}</p>
`;
this.elements.frameCaptions.insertBefore(captionElement, this.elements.frameCaptions.firstChild);
// Keep only last 20 captions in view for rapid updates
while (this.elements.frameCaptions.children.length > 20) {
this.elements.frameCaptions.removeChild(this.elements.frameCaptions.lastChild);
}
}
formatTime(seconds) {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, '0')}`;
}
async copyResults() {
const captions = this.captionHistory
.map(c => `[${this.formatTime(c.timestamp)}] ${c.caption}`)
.join('\n');
try {
await navigator.clipboard.writeText(captions);
this.elements.copyBtn.classList.add('copied');
setTimeout(() => {
this.elements.copyBtn.classList.remove('copied');
}, 2000);
} catch (err) {
console.error('Failed to copy:', err);
}
}
downloadResults() {
const captions = this.captionHistory
.map(c => `[${this.formatTime(c.timestamp)}] ${c.caption}`)
.join('\n');
const blob = new Blob([captions], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `captions_${new Date().toISOString().slice(0, 19).replace(/:/g, '-')}.txt`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
this.elements.downloadBtn.classList.add('copied');
setTimeout(() => {
this.elements.downloadBtn.classList.remove('copied');
}, 2000);
}
changeVideo() {
// Stop any ongoing captioning
if (this.isLiveCaptioning) {
this.stopLiveCaptions();
}
// Reset the video source
this.elements.videoPlayer.src = '';
this.videoFile = null;
// Show upload area, hide controls
this.elements.uploadArea.style.display = 'block';
this.elements.controls.style.display = 'none';
this.elements.results.style.display = 'none';
this.elements.liveCaption.style.display = 'none';
// Clear caption history
this.captionHistory = [];
this.elements.frameCaptions.innerHTML = '';
// Trigger file input
this.elements.videoInput.click();
}
}
document.addEventListener('DOMContentLoaded', () => {
new VideoCaptionApp();
});