thumbnail.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. #!/usr/bin/env python3
  2. """
  3. Create thumbnail grids from PowerPoint presentation slides.
  4. Creates a grid layout of slide thumbnails with configurable columns (max 6).
  5. Each grid contains up to cols×(cols+1) images. For presentations with more
  6. slides, multiple numbered grid files are created automatically.
  7. The program outputs the names of all files created.
  8. Output:
  9. - Single grid: {prefix}.jpg (if slides fit in one grid)
  10. - Multiple grids: {prefix}-1.jpg, {prefix}-2.jpg, etc.
  11. Grid limits by column count:
  12. - 3 cols: max 12 slides per grid (3×4)
  13. - 4 cols: max 20 slides per grid (4×5)
  14. - 5 cols: max 30 slides per grid (5×6) [default]
  15. - 6 cols: max 42 slides per grid (6×7)
  16. Usage:
  17. python thumbnail.py input.pptx [output_prefix] [--cols N] [--outline-placeholders]
  18. Examples:
  19. python thumbnail.py presentation.pptx
  20. # Creates: thumbnails.jpg (using default prefix)
  21. # Outputs:
  22. # Created 1 grid(s):
  23. # - thumbnails.jpg
  24. python thumbnail.py large-deck.pptx grid --cols 4
  25. # Creates: grid-1.jpg, grid-2.jpg, grid-3.jpg
  26. # Outputs:
  27. # Created 3 grid(s):
  28. # - grid-1.jpg
  29. # - grid-2.jpg
  30. # - grid-3.jpg
  31. python thumbnail.py template.pptx analysis --outline-placeholders
  32. # Creates thumbnail grids with red outlines around text placeholders
  33. """
  34. import argparse
  35. import subprocess
  36. import sys
  37. import tempfile
  38. from pathlib import Path
  39. from inventory import extract_text_inventory
  40. from PIL import Image, ImageDraw, ImageFont
  41. from pptx import Presentation
  42. # Constants
  43. THUMBNAIL_WIDTH = 300 # Fixed thumbnail width in pixels
  44. CONVERSION_DPI = 100 # DPI for PDF to image conversion
  45. MAX_COLS = 6 # Maximum number of columns
  46. DEFAULT_COLS = 5 # Default number of columns
  47. JPEG_QUALITY = 95 # JPEG compression quality
  48. # Grid layout constants
  49. GRID_PADDING = 20 # Padding between thumbnails
  50. BORDER_WIDTH = 2 # Border width around thumbnails
  51. FONT_SIZE_RATIO = 0.12 # Font size as fraction of thumbnail width
  52. LABEL_PADDING_RATIO = 0.4 # Label padding as fraction of font size
  53. def main():
  54. parser = argparse.ArgumentParser(
  55. description="Create thumbnail grids from PowerPoint slides."
  56. )
  57. parser.add_argument("input", help="Input PowerPoint file (.pptx)")
  58. parser.add_argument(
  59. "output_prefix",
  60. nargs="?",
  61. default="thumbnails",
  62. help="Output prefix for image files (default: thumbnails, will create prefix.jpg or prefix-N.jpg)",
  63. )
  64. parser.add_argument(
  65. "--cols",
  66. type=int,
  67. default=DEFAULT_COLS,
  68. help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
  69. )
  70. parser.add_argument(
  71. "--outline-placeholders",
  72. action="store_true",
  73. help="Outline text placeholders with a colored border",
  74. )
  75. args = parser.parse_args()
  76. # Validate columns
  77. cols = min(args.cols, MAX_COLS)
  78. if args.cols > MAX_COLS:
  79. print(f"Warning: Columns limited to {MAX_COLS} (requested {args.cols})")
  80. # Validate input
  81. input_path = Path(args.input)
  82. if not input_path.exists() or input_path.suffix.lower() != ".pptx":
  83. print(f"Error: Invalid PowerPoint file: {args.input}")
  84. sys.exit(1)
  85. # Construct output path (always JPG)
  86. output_path = Path(f"{args.output_prefix}.jpg")
  87. print(f"Processing: {args.input}")
  88. try:
  89. with tempfile.TemporaryDirectory() as temp_dir:
  90. # Get placeholder regions if outlining is enabled
  91. placeholder_regions = None
  92. slide_dimensions = None
  93. if args.outline_placeholders:
  94. print("Extracting placeholder regions...")
  95. placeholder_regions, slide_dimensions = get_placeholder_regions(
  96. input_path
  97. )
  98. if placeholder_regions:
  99. print(f"Found placeholders on {len(placeholder_regions)} slides")
  100. # Convert slides to images
  101. slide_images = convert_to_images(input_path, Path(temp_dir), CONVERSION_DPI)
  102. if not slide_images:
  103. print("Error: No slides found")
  104. sys.exit(1)
  105. print(f"Found {len(slide_images)} slides")
  106. # Create grids (max cols×(cols+1) images per grid)
  107. grid_files = create_grids(
  108. slide_images,
  109. cols,
  110. THUMBNAIL_WIDTH,
  111. output_path,
  112. placeholder_regions,
  113. slide_dimensions,
  114. )
  115. # Print saved files
  116. print(f"Created {len(grid_files)} grid(s):")
  117. for grid_file in grid_files:
  118. print(f" - {grid_file}")
  119. except Exception as e:
  120. print(f"Error: {e}")
  121. sys.exit(1)
  122. def create_hidden_slide_placeholder(size):
  123. """Create placeholder image for hidden slides."""
  124. img = Image.new("RGB", size, color="#F0F0F0")
  125. draw = ImageDraw.Draw(img)
  126. line_width = max(5, min(size) // 100)
  127. draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
  128. draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
  129. return img
  130. def get_placeholder_regions(pptx_path):
  131. """Extract ALL text regions from the presentation.
  132. Returns a tuple of (placeholder_regions, slide_dimensions).
  133. text_regions is a dict mapping slide indices to lists of text regions.
  134. Each region is a dict with 'left', 'top', 'width', 'height' in inches.
  135. slide_dimensions is a tuple of (width_inches, height_inches).
  136. """
  137. prs = Presentation(str(pptx_path))
  138. inventory = extract_text_inventory(pptx_path, prs)
  139. placeholder_regions = {}
  140. # Get actual slide dimensions in inches (EMU to inches conversion)
  141. slide_width_inches = (prs.slide_width or 9144000) / 914400.0
  142. slide_height_inches = (prs.slide_height or 5143500) / 914400.0
  143. for slide_key, shapes in inventory.items():
  144. # Extract slide index from "slide-N" format
  145. slide_idx = int(slide_key.split("-")[1])
  146. regions = []
  147. for shape_key, shape_data in shapes.items():
  148. # The inventory only contains shapes with text, so all shapes should be highlighted
  149. regions.append(
  150. {
  151. "left": shape_data.left,
  152. "top": shape_data.top,
  153. "width": shape_data.width,
  154. "height": shape_data.height,
  155. }
  156. )
  157. if regions:
  158. placeholder_regions[slide_idx] = regions
  159. return placeholder_regions, (slide_width_inches, slide_height_inches)
  160. def convert_to_images(pptx_path, temp_dir, dpi):
  161. """Convert PowerPoint to images via PDF, handling hidden slides."""
  162. # Detect hidden slides
  163. print("Analyzing presentation...")
  164. prs = Presentation(str(pptx_path))
  165. total_slides = len(prs.slides)
  166. # Find hidden slides (1-based indexing for display)
  167. hidden_slides = {
  168. idx + 1
  169. for idx, slide in enumerate(prs.slides)
  170. if slide.element.get("show") == "0"
  171. }
  172. print(f"Total slides: {total_slides}")
  173. if hidden_slides:
  174. print(f"Hidden slides: {sorted(hidden_slides)}")
  175. pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
  176. # Convert to PDF
  177. print("Converting to PDF...")
  178. result = subprocess.run(
  179. [
  180. "soffice",
  181. "--headless",
  182. "--convert-to",
  183. "pdf",
  184. "--outdir",
  185. str(temp_dir),
  186. str(pptx_path),
  187. ],
  188. capture_output=True,
  189. text=True,
  190. )
  191. if result.returncode != 0 or not pdf_path.exists():
  192. raise RuntimeError("PDF conversion failed")
  193. # Convert PDF to images
  194. print(f"Converting to images at {dpi} DPI...")
  195. result = subprocess.run(
  196. ["pdftoppm", "-jpeg", "-r", str(dpi), str(pdf_path), str(temp_dir / "slide")],
  197. capture_output=True,
  198. text=True,
  199. )
  200. if result.returncode != 0:
  201. raise RuntimeError("Image conversion failed")
  202. visible_images = sorted(temp_dir.glob("slide-*.jpg"))
  203. # Create full list with placeholders for hidden slides
  204. all_images = []
  205. visible_idx = 0
  206. # Get placeholder dimensions from first visible slide
  207. if visible_images:
  208. with Image.open(visible_images[0]) as img:
  209. placeholder_size = img.size
  210. else:
  211. placeholder_size = (1920, 1080)
  212. for slide_num in range(1, total_slides + 1):
  213. if slide_num in hidden_slides:
  214. # Create placeholder image for hidden slide
  215. placeholder_path = temp_dir / f"hidden-{slide_num:03d}.jpg"
  216. placeholder_img = create_hidden_slide_placeholder(placeholder_size)
  217. placeholder_img.save(placeholder_path, "JPEG")
  218. all_images.append(placeholder_path)
  219. else:
  220. # Use the actual visible slide image
  221. if visible_idx < len(visible_images):
  222. all_images.append(visible_images[visible_idx])
  223. visible_idx += 1
  224. return all_images
  225. def create_grids(
  226. image_paths,
  227. cols,
  228. width,
  229. output_path,
  230. placeholder_regions=None,
  231. slide_dimensions=None,
  232. ):
  233. """Create multiple thumbnail grids from slide images, max cols×(cols+1) images per grid."""
  234. # Maximum images per grid is cols × (cols + 1) for better proportions
  235. max_images_per_grid = cols * (cols + 1)
  236. grid_files = []
  237. print(
  238. f"Creating grids with {cols} columns (max {max_images_per_grid} images per grid)"
  239. )
  240. # Split images into chunks
  241. for chunk_idx, start_idx in enumerate(
  242. range(0, len(image_paths), max_images_per_grid)
  243. ):
  244. end_idx = min(start_idx + max_images_per_grid, len(image_paths))
  245. chunk_images = image_paths[start_idx:end_idx]
  246. # Create grid for this chunk
  247. grid = create_grid(
  248. chunk_images, cols, width, start_idx, placeholder_regions, slide_dimensions
  249. )
  250. # Generate output filename
  251. if len(image_paths) <= max_images_per_grid:
  252. # Single grid - use base filename without suffix
  253. grid_filename = output_path
  254. else:
  255. # Multiple grids - insert index before extension with dash
  256. stem = output_path.stem
  257. suffix = output_path.suffix
  258. grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}"
  259. # Save grid
  260. grid_filename.parent.mkdir(parents=True, exist_ok=True)
  261. grid.save(str(grid_filename), quality=JPEG_QUALITY)
  262. grid_files.append(str(grid_filename))
  263. return grid_files
  264. def create_grid(
  265. image_paths,
  266. cols,
  267. width,
  268. start_slide_num=0,
  269. placeholder_regions=None,
  270. slide_dimensions=None,
  271. ):
  272. """Create thumbnail grid from slide images with optional placeholder outlining."""
  273. font_size = int(width * FONT_SIZE_RATIO)
  274. label_padding = int(font_size * LABEL_PADDING_RATIO)
  275. # Get dimensions
  276. with Image.open(image_paths[0]) as img:
  277. aspect = img.height / img.width
  278. height = int(width * aspect)
  279. # Calculate grid size
  280. rows = (len(image_paths) + cols - 1) // cols
  281. grid_w = cols * width + (cols + 1) * GRID_PADDING
  282. grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING
  283. # Create grid
  284. grid = Image.new("RGB", (grid_w, grid_h), "white")
  285. draw = ImageDraw.Draw(grid)
  286. # Load font with size based on thumbnail width
  287. try:
  288. # Use Pillow's default font with size
  289. font = ImageFont.load_default(size=font_size)
  290. except Exception:
  291. # Fall back to basic default font if size parameter not supported
  292. font = ImageFont.load_default()
  293. # Place thumbnails
  294. for i, img_path in enumerate(image_paths):
  295. row, col = i // cols, i % cols
  296. x = col * width + (col + 1) * GRID_PADDING
  297. y_base = (
  298. row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
  299. )
  300. # Add label with actual slide number
  301. label = f"{start_slide_num + i}"
  302. bbox = draw.textbbox((0, 0), label, font=font)
  303. text_w = bbox[2] - bbox[0]
  304. draw.text(
  305. (x + (width - text_w) // 2, y_base + label_padding),
  306. label,
  307. fill="black",
  308. font=font,
  309. )
  310. # Add thumbnail below label with proportional spacing
  311. y_thumbnail = y_base + label_padding + font_size + label_padding
  312. with Image.open(img_path) as img:
  313. # Get original dimensions before thumbnail
  314. orig_w, orig_h = img.size
  315. # Apply placeholder outlines if enabled
  316. if placeholder_regions and (start_slide_num + i) in placeholder_regions:
  317. # Convert to RGBA for transparency support
  318. if img.mode != "RGBA":
  319. img = img.convert("RGBA")
  320. # Get the regions for this slide
  321. regions = placeholder_regions[start_slide_num + i]
  322. # Calculate scale factors using actual slide dimensions
  323. if slide_dimensions:
  324. slide_width_inches, slide_height_inches = slide_dimensions
  325. else:
  326. # Fallback: estimate from image size at CONVERSION_DPI
  327. slide_width_inches = orig_w / CONVERSION_DPI
  328. slide_height_inches = orig_h / CONVERSION_DPI
  329. x_scale = orig_w / slide_width_inches
  330. y_scale = orig_h / slide_height_inches
  331. # Create a highlight overlay
  332. overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
  333. overlay_draw = ImageDraw.Draw(overlay)
  334. # Highlight each placeholder region
  335. for region in regions:
  336. # Convert from inches to pixels in the original image
  337. px_left = int(region["left"] * x_scale)
  338. px_top = int(region["top"] * y_scale)
  339. px_width = int(region["width"] * x_scale)
  340. px_height = int(region["height"] * y_scale)
  341. # Draw highlight outline with red color and thick stroke
  342. # Using a bright red outline instead of fill
  343. stroke_width = max(
  344. 5, min(orig_w, orig_h) // 150
  345. ) # Thicker proportional stroke width
  346. overlay_draw.rectangle(
  347. [(px_left, px_top), (px_left + px_width, px_top + px_height)],
  348. outline=(255, 0, 0, 255), # Bright red, fully opaque
  349. width=stroke_width,
  350. )
  351. # Composite the overlay onto the image using alpha blending
  352. img = Image.alpha_composite(img, overlay)
  353. # Convert back to RGB for JPEG saving
  354. img = img.convert("RGB")
  355. img.thumbnail((width, height), Image.Resampling.LANCZOS)
  356. w, h = img.size
  357. tx = x + (width - w) // 2
  358. ty = y_thumbnail + (height - h) // 2
  359. grid.paste(img, (tx, ty))
  360. # Add border
  361. if BORDER_WIDTH > 0:
  362. draw.rectangle(
  363. [
  364. (tx - BORDER_WIDTH, ty - BORDER_WIDTH),
  365. (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),
  366. ],
  367. outline="gray",
  368. width=BORDER_WIDTH,
  369. )
  370. return grid
  371. if __name__ == "__main__":
  372. main()