Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import fitz | |
| import tempfile | |
| import zipfile | |
| from pathlib import Path | |
| import re | |
| def sanitize_filename(text: str) -> str: | |
| text = re.sub(r"[^\w\s-]", "", text) | |
| return re.sub(r"\s+", "_", text).strip("_") | |
| def split_pdf(pdf_file): | |
| doc = fitz.open(pdf_file.name) | |
| toc = doc.get_toc() | |
| if not toc: | |
| return None, "β No bookmarks found in this PDF." | |
| chapters = [item for item in toc if item[0] == 1] | |
| if not chapters: | |
| return None, "β No level-1 chapters found." | |
| temp_dir = Path(tempfile.mkdtemp()) | |
| zip_path = temp_dir / "chapters.zip" | |
| with zipfile.ZipFile(zip_path, "w") as zf: | |
| for i, (_, title, page) in enumerate(chapters): | |
| start_page = page - 1 | |
| end_page = ( | |
| chapters[i + 1][2] - 2 | |
| if i + 1 < len(chapters) | |
| else doc.page_count - 1 | |
| ) | |
| if start_page > end_page: | |
| continue | |
| new_doc = fitz.open() | |
| new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page) | |
| name = f"Chapter_{i+1:02d}_{sanitize_filename(title)}.pdf" | |
| output = temp_dir / name | |
| new_doc.save(output) | |
| new_doc.close() | |
| zf.write(output, arcname=name) | |
| doc.close() | |
| return str(zip_path), "β Chapters extracted successfully!" | |
| with gr.Blocks(title="π Smart PDF Chapter Splitter") as demo: | |
| gr.Markdown("## π Smart PDF Chapter Splitter") | |
| gr.Markdown( | |
| "Upload a PDF with bookmarks and get clean chapter files β fast and deterministic." | |
| ) | |
| pdf_input = gr.File(label="π Upload PDF", file_types=[".pdf"]) | |
| output_zip = gr.File(label="π¦ Download Chapters (ZIP)") | |
| status = gr.Markdown() | |
| split_btn = gr.Button("βοΈ Split PDF") | |
| split_btn.click( | |
| fn=split_pdf, | |
| inputs=pdf_input, | |
| outputs=[output_zip, status], | |
| ) | |
| demo.launch() | |