convert_pdf_to_images.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435
  1. import os
  2. import sys
  3. from pdf2image import convert_from_path
  4. # Converts each page of a PDF to a PNG image.
  5. def convert(pdf_path, output_dir, max_dim=1000):
  6. images = convert_from_path(pdf_path, dpi=200)
  7. for i, image in enumerate(images):
  8. # Scale image if needed to keep width/height under `max_dim`
  9. width, height = image.size
  10. if width > max_dim or height > max_dim:
  11. scale_factor = min(max_dim / width, max_dim / height)
  12. new_width = int(width * scale_factor)
  13. new_height = int(height * scale_factor)
  14. image = image.resize((new_width, new_height))
  15. image_path = os.path.join(output_dir, f"page_{i+1}.png")
  16. image.save(image_path)
  17. print(f"Saved page {i+1} as {image_path} (size: {image.size})")
  18. print(f"Converted {len(images)} pages to PNG images")
  19. if __name__ == "__main__":
  20. if len(sys.argv) != 3:
  21. print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
  22. sys.exit(1)
  23. pdf_path = sys.argv[1]
  24. output_directory = sys.argv[2]
  25. convert(pdf_path, output_directory)