import gradio as gr import fitz import tempfile import zipfile from pathlib import Path import re def sanitize_filename(text: str) -> str: text = re.sub(r"[^\w\s-]", "", text) return re.sub(r"\s+", "_", text).strip("_") def split_pdf(pdf_file): doc = fitz.open(pdf_file.name) toc = doc.get_toc() if not toc: return None, "❌ No bookmarks found in this PDF." chapters = [item for item in toc if item[0] == 1] if not chapters: return None, "❌ No level-1 chapters found." temp_dir = Path(tempfile.mkdtemp()) zip_path = temp_dir / "chapters.zip" with zipfile.ZipFile(zip_path, "w") as zf: for i, (_, title, page) in enumerate(chapters): start_page = page - 1 end_page = ( chapters[i + 1][2] - 2 if i + 1 < len(chapters) else doc.page_count - 1 ) if start_page > end_page: continue new_doc = fitz.open() new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page) name = f"Chapter_{i+1:02d}_{sanitize_filename(title)}.pdf" output = temp_dir / name new_doc.save(output) new_doc.close() zf.write(output, arcname=name) doc.close() return str(zip_path), "✅ Chapters extracted successfully!" with gr.Blocks(title="📚 Smart PDF Chapter Splitter") as demo: gr.Markdown("## 📚 Smart PDF Chapter Splitter") gr.Markdown( "Upload a PDF with bookmarks and get clean chapter files — fast and deterministic." ) pdf_input = gr.File(label="📖 Upload PDF", file_types=[".pdf"]) output_zip = gr.File(label="📦 Download Chapters (ZIP)") status = gr.Markdown() split_btn = gr.Button("✂️ Split PDF") split_btn.click( fn=split_pdf, inputs=pdf_input, outputs=[output_zip, status], ) demo.launch()