AhmedBou's picture
Create app.py
5392a31 verified
import gradio as gr
import fitz
import tempfile
import zipfile
from pathlib import Path
import re
def sanitize_filename(text: str) -> str:
text = re.sub(r"[^\w\s-]", "", text)
return re.sub(r"\s+", "_", text).strip("_")
def split_pdf(pdf_file):
doc = fitz.open(pdf_file.name)
toc = doc.get_toc()
if not toc:
return None, "❌ No bookmarks found in this PDF."
chapters = [item for item in toc if item[0] == 1]
if not chapters:
return None, "❌ No level-1 chapters found."
temp_dir = Path(tempfile.mkdtemp())
zip_path = temp_dir / "chapters.zip"
with zipfile.ZipFile(zip_path, "w") as zf:
for i, (_, title, page) in enumerate(chapters):
start_page = page - 1
end_page = (
chapters[i + 1][2] - 2
if i + 1 < len(chapters)
else doc.page_count - 1
)
if start_page > end_page:
continue
new_doc = fitz.open()
new_doc.insert_pdf(doc, from_page=start_page, to_page=end_page)
name = f"Chapter_{i+1:02d}_{sanitize_filename(title)}.pdf"
output = temp_dir / name
new_doc.save(output)
new_doc.close()
zf.write(output, arcname=name)
doc.close()
return str(zip_path), "βœ… Chapters extracted successfully!"
with gr.Blocks(title="πŸ“š Smart PDF Chapter Splitter") as demo:
gr.Markdown("## πŸ“š Smart PDF Chapter Splitter")
gr.Markdown(
"Upload a PDF with bookmarks and get clean chapter files β€” fast and deterministic."
)
pdf_input = gr.File(label="πŸ“– Upload PDF", file_types=[".pdf"])
output_zip = gr.File(label="πŸ“¦ Download Chapters (ZIP)")
status = gr.Markdown()
split_btn = gr.Button("βœ‚οΈ Split PDF")
split_btn.click(
fn=split_pdf,
inputs=pdf_input,
outputs=[output_zip, status],
)
demo.launch()