| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- import json
- import sys
- from pypdf import PdfReader, PdfWriter
- from pypdf.annotations import FreeText
- # Fills a PDF by adding text annotations defined in `fields.json`. See forms.md.
- def transform_coordinates(bbox, image_width, image_height, pdf_width, pdf_height):
- """Transform bounding box from image coordinates to PDF coordinates"""
- # Image coordinates: origin at top-left, y increases downward
- # PDF coordinates: origin at bottom-left, y increases upward
- x_scale = pdf_width / image_width
- y_scale = pdf_height / image_height
-
- left = bbox[0] * x_scale
- right = bbox[2] * x_scale
-
- # Flip Y coordinates for PDF
- top = pdf_height - (bbox[1] * y_scale)
- bottom = pdf_height - (bbox[3] * y_scale)
-
- return left, bottom, right, top
- def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):
- """Fill the PDF form with data from fields.json"""
-
- # `fields.json` format described in forms.md.
- with open(fields_json_path, "r") as f:
- fields_data = json.load(f)
-
- # Open the PDF
- reader = PdfReader(input_pdf_path)
- writer = PdfWriter()
-
- # Copy all pages to writer
- writer.append(reader)
-
- # Get PDF dimensions for each page
- pdf_dimensions = {}
- for i, page in enumerate(reader.pages):
- mediabox = page.mediabox
- pdf_dimensions[i + 1] = [mediabox.width, mediabox.height]
-
- # Process each form field
- annotations = []
- for field in fields_data["form_fields"]:
- page_num = field["page_number"]
-
- # Get page dimensions and transform coordinates.
- page_info = next(p for p in fields_data["pages"] if p["page_number"] == page_num)
- image_width = page_info["image_width"]
- image_height = page_info["image_height"]
- pdf_width, pdf_height = pdf_dimensions[page_num]
-
- transformed_entry_box = transform_coordinates(
- field["entry_bounding_box"],
- image_width, image_height,
- pdf_width, pdf_height
- )
-
- # Skip empty fields
- if "entry_text" not in field or "text" not in field["entry_text"]:
- continue
- entry_text = field["entry_text"]
- text = entry_text["text"]
- if not text:
- continue
-
- font_name = entry_text.get("font", "Arial")
- font_size = str(entry_text.get("font_size", 14)) + "pt"
- font_color = entry_text.get("font_color", "000000")
- # Font size/color seems to not work reliably across viewers:
- # https://github.com/py-pdf/pypdf/issues/2084
- annotation = FreeText(
- text=text,
- rect=transformed_entry_box,
- font=font_name,
- font_size=font_size,
- font_color=font_color,
- border_color=None,
- background_color=None,
- )
- annotations.append(annotation)
- # page_number is 0-based for pypdf
- writer.add_annotation(page_number=page_num - 1, annotation=annotation)
-
- # Save the filled PDF
- with open(output_pdf_path, "wb") as output:
- writer.write(output)
-
- print(f"Successfully filled PDF form and saved to {output_pdf_path}")
- print(f"Added {len(annotations)} text annotations")
- if __name__ == "__main__":
- if len(sys.argv) != 4:
- print("Usage: fill_pdf_form_with_annotations.py [input pdf] [fields.json] [output pdf]")
- sys.exit(1)
- input_pdf = sys.argv[1]
- fields_json = sys.argv[2]
- output_pdf = sys.argv[3]
-
- fill_pdf_form(input_pdf, fields_json, output_pdf)
|