| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450 |
- #!/usr/bin/env python3
- """
- Create thumbnail grids from PowerPoint presentation slides.
- Creates a grid layout of slide thumbnails with configurable columns (max 6).
- Each grid contains up to cols×(cols+1) images. For presentations with more
- slides, multiple numbered grid files are created automatically.
- The program outputs the names of all files created.
- Output:
- - Single grid: {prefix}.jpg (if slides fit in one grid)
- - Multiple grids: {prefix}-1.jpg, {prefix}-2.jpg, etc.
- Grid limits by column count:
- - 3 cols: max 12 slides per grid (3×4)
- - 4 cols: max 20 slides per grid (4×5)
- - 5 cols: max 30 slides per grid (5×6) [default]
- - 6 cols: max 42 slides per grid (6×7)
- Usage:
- python thumbnail.py input.pptx [output_prefix] [--cols N] [--outline-placeholders]
- Examples:
- python thumbnail.py presentation.pptx
- # Creates: thumbnails.jpg (using default prefix)
- # Outputs:
- # Created 1 grid(s):
- # - thumbnails.jpg
- python thumbnail.py large-deck.pptx grid --cols 4
- # Creates: grid-1.jpg, grid-2.jpg, grid-3.jpg
- # Outputs:
- # Created 3 grid(s):
- # - grid-1.jpg
- # - grid-2.jpg
- # - grid-3.jpg
- python thumbnail.py template.pptx analysis --outline-placeholders
- # Creates thumbnail grids with red outlines around text placeholders
- """
- import argparse
- import subprocess
- import sys
- import tempfile
- from pathlib import Path
- from inventory import extract_text_inventory
- from PIL import Image, ImageDraw, ImageFont
- from pptx import Presentation
- # Constants
- THUMBNAIL_WIDTH = 300 # Fixed thumbnail width in pixels
- CONVERSION_DPI = 100 # DPI for PDF to image conversion
- MAX_COLS = 6 # Maximum number of columns
- DEFAULT_COLS = 5 # Default number of columns
- JPEG_QUALITY = 95 # JPEG compression quality
- # Grid layout constants
- GRID_PADDING = 20 # Padding between thumbnails
- BORDER_WIDTH = 2 # Border width around thumbnails
- FONT_SIZE_RATIO = 0.12 # Font size as fraction of thumbnail width
- LABEL_PADDING_RATIO = 0.4 # Label padding as fraction of font size
- def main():
- parser = argparse.ArgumentParser(
- description="Create thumbnail grids from PowerPoint slides."
- )
- parser.add_argument("input", help="Input PowerPoint file (.pptx)")
- parser.add_argument(
- "output_prefix",
- nargs="?",
- default="thumbnails",
- help="Output prefix for image files (default: thumbnails, will create prefix.jpg or prefix-N.jpg)",
- )
- parser.add_argument(
- "--cols",
- type=int,
- default=DEFAULT_COLS,
- help=f"Number of columns (default: {DEFAULT_COLS}, max: {MAX_COLS})",
- )
- parser.add_argument(
- "--outline-placeholders",
- action="store_true",
- help="Outline text placeholders with a colored border",
- )
- args = parser.parse_args()
- # Validate columns
- cols = min(args.cols, MAX_COLS)
- if args.cols > MAX_COLS:
- print(f"Warning: Columns limited to {MAX_COLS} (requested {args.cols})")
- # Validate input
- input_path = Path(args.input)
- if not input_path.exists() or input_path.suffix.lower() != ".pptx":
- print(f"Error: Invalid PowerPoint file: {args.input}")
- sys.exit(1)
- # Construct output path (always JPG)
- output_path = Path(f"{args.output_prefix}.jpg")
- print(f"Processing: {args.input}")
- try:
- with tempfile.TemporaryDirectory() as temp_dir:
- # Get placeholder regions if outlining is enabled
- placeholder_regions = None
- slide_dimensions = None
- if args.outline_placeholders:
- print("Extracting placeholder regions...")
- placeholder_regions, slide_dimensions = get_placeholder_regions(
- input_path
- )
- if placeholder_regions:
- print(f"Found placeholders on {len(placeholder_regions)} slides")
- # Convert slides to images
- slide_images = convert_to_images(input_path, Path(temp_dir), CONVERSION_DPI)
- if not slide_images:
- print("Error: No slides found")
- sys.exit(1)
- print(f"Found {len(slide_images)} slides")
- # Create grids (max cols×(cols+1) images per grid)
- grid_files = create_grids(
- slide_images,
- cols,
- THUMBNAIL_WIDTH,
- output_path,
- placeholder_regions,
- slide_dimensions,
- )
- # Print saved files
- print(f"Created {len(grid_files)} grid(s):")
- for grid_file in grid_files:
- print(f" - {grid_file}")
- except Exception as e:
- print(f"Error: {e}")
- sys.exit(1)
- def create_hidden_slide_placeholder(size):
- """Create placeholder image for hidden slides."""
- img = Image.new("RGB", size, color="#F0F0F0")
- draw = ImageDraw.Draw(img)
- line_width = max(5, min(size) // 100)
- draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
- draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
- return img
- def get_placeholder_regions(pptx_path):
- """Extract ALL text regions from the presentation.
- Returns a tuple of (placeholder_regions, slide_dimensions).
- text_regions is a dict mapping slide indices to lists of text regions.
- Each region is a dict with 'left', 'top', 'width', 'height' in inches.
- slide_dimensions is a tuple of (width_inches, height_inches).
- """
- prs = Presentation(str(pptx_path))
- inventory = extract_text_inventory(pptx_path, prs)
- placeholder_regions = {}
- # Get actual slide dimensions in inches (EMU to inches conversion)
- slide_width_inches = (prs.slide_width or 9144000) / 914400.0
- slide_height_inches = (prs.slide_height or 5143500) / 914400.0
- for slide_key, shapes in inventory.items():
- # Extract slide index from "slide-N" format
- slide_idx = int(slide_key.split("-")[1])
- regions = []
- for shape_key, shape_data in shapes.items():
- # The inventory only contains shapes with text, so all shapes should be highlighted
- regions.append(
- {
- "left": shape_data.left,
- "top": shape_data.top,
- "width": shape_data.width,
- "height": shape_data.height,
- }
- )
- if regions:
- placeholder_regions[slide_idx] = regions
- return placeholder_regions, (slide_width_inches, slide_height_inches)
- def convert_to_images(pptx_path, temp_dir, dpi):
- """Convert PowerPoint to images via PDF, handling hidden slides."""
- # Detect hidden slides
- print("Analyzing presentation...")
- prs = Presentation(str(pptx_path))
- total_slides = len(prs.slides)
- # Find hidden slides (1-based indexing for display)
- hidden_slides = {
- idx + 1
- for idx, slide in enumerate(prs.slides)
- if slide.element.get("show") == "0"
- }
- print(f"Total slides: {total_slides}")
- if hidden_slides:
- print(f"Hidden slides: {sorted(hidden_slides)}")
- pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
- # Convert to PDF
- print("Converting to PDF...")
- result = subprocess.run(
- [
- "soffice",
- "--headless",
- "--convert-to",
- "pdf",
- "--outdir",
- str(temp_dir),
- str(pptx_path),
- ],
- capture_output=True,
- text=True,
- )
- if result.returncode != 0 or not pdf_path.exists():
- raise RuntimeError("PDF conversion failed")
- # Convert PDF to images
- print(f"Converting to images at {dpi} DPI...")
- result = subprocess.run(
- ["pdftoppm", "-jpeg", "-r", str(dpi), str(pdf_path), str(temp_dir / "slide")],
- capture_output=True,
- text=True,
- )
- if result.returncode != 0:
- raise RuntimeError("Image conversion failed")
- visible_images = sorted(temp_dir.glob("slide-*.jpg"))
- # Create full list with placeholders for hidden slides
- all_images = []
- visible_idx = 0
- # Get placeholder dimensions from first visible slide
- if visible_images:
- with Image.open(visible_images[0]) as img:
- placeholder_size = img.size
- else:
- placeholder_size = (1920, 1080)
- for slide_num in range(1, total_slides + 1):
- if slide_num in hidden_slides:
- # Create placeholder image for hidden slide
- placeholder_path = temp_dir / f"hidden-{slide_num:03d}.jpg"
- placeholder_img = create_hidden_slide_placeholder(placeholder_size)
- placeholder_img.save(placeholder_path, "JPEG")
- all_images.append(placeholder_path)
- else:
- # Use the actual visible slide image
- if visible_idx < len(visible_images):
- all_images.append(visible_images[visible_idx])
- visible_idx += 1
- return all_images
- def create_grids(
- image_paths,
- cols,
- width,
- output_path,
- placeholder_regions=None,
- slide_dimensions=None,
- ):
- """Create multiple thumbnail grids from slide images, max cols×(cols+1) images per grid."""
- # Maximum images per grid is cols × (cols + 1) for better proportions
- max_images_per_grid = cols * (cols + 1)
- grid_files = []
- print(
- f"Creating grids with {cols} columns (max {max_images_per_grid} images per grid)"
- )
- # Split images into chunks
- for chunk_idx, start_idx in enumerate(
- range(0, len(image_paths), max_images_per_grid)
- ):
- end_idx = min(start_idx + max_images_per_grid, len(image_paths))
- chunk_images = image_paths[start_idx:end_idx]
- # Create grid for this chunk
- grid = create_grid(
- chunk_images, cols, width, start_idx, placeholder_regions, slide_dimensions
- )
- # Generate output filename
- if len(image_paths) <= max_images_per_grid:
- # Single grid - use base filename without suffix
- grid_filename = output_path
- else:
- # Multiple grids - insert index before extension with dash
- stem = output_path.stem
- suffix = output_path.suffix
- grid_filename = output_path.parent / f"{stem}-{chunk_idx + 1}{suffix}"
- # Save grid
- grid_filename.parent.mkdir(parents=True, exist_ok=True)
- grid.save(str(grid_filename), quality=JPEG_QUALITY)
- grid_files.append(str(grid_filename))
- return grid_files
- def create_grid(
- image_paths,
- cols,
- width,
- start_slide_num=0,
- placeholder_regions=None,
- slide_dimensions=None,
- ):
- """Create thumbnail grid from slide images with optional placeholder outlining."""
- font_size = int(width * FONT_SIZE_RATIO)
- label_padding = int(font_size * LABEL_PADDING_RATIO)
- # Get dimensions
- with Image.open(image_paths[0]) as img:
- aspect = img.height / img.width
- height = int(width * aspect)
- # Calculate grid size
- rows = (len(image_paths) + cols - 1) // cols
- grid_w = cols * width + (cols + 1) * GRID_PADDING
- grid_h = rows * (height + font_size + label_padding * 2) + (rows + 1) * GRID_PADDING
- # Create grid
- grid = Image.new("RGB", (grid_w, grid_h), "white")
- draw = ImageDraw.Draw(grid)
- # Load font with size based on thumbnail width
- try:
- # Use Pillow's default font with size
- font = ImageFont.load_default(size=font_size)
- except Exception:
- # Fall back to basic default font if size parameter not supported
- font = ImageFont.load_default()
- # Place thumbnails
- for i, img_path in enumerate(image_paths):
- row, col = i // cols, i % cols
- x = col * width + (col + 1) * GRID_PADDING
- y_base = (
- row * (height + font_size + label_padding * 2) + (row + 1) * GRID_PADDING
- )
- # Add label with actual slide number
- label = f"{start_slide_num + i}"
- bbox = draw.textbbox((0, 0), label, font=font)
- text_w = bbox[2] - bbox[0]
- draw.text(
- (x + (width - text_w) // 2, y_base + label_padding),
- label,
- fill="black",
- font=font,
- )
- # Add thumbnail below label with proportional spacing
- y_thumbnail = y_base + label_padding + font_size + label_padding
- with Image.open(img_path) as img:
- # Get original dimensions before thumbnail
- orig_w, orig_h = img.size
- # Apply placeholder outlines if enabled
- if placeholder_regions and (start_slide_num + i) in placeholder_regions:
- # Convert to RGBA for transparency support
- if img.mode != "RGBA":
- img = img.convert("RGBA")
- # Get the regions for this slide
- regions = placeholder_regions[start_slide_num + i]
- # Calculate scale factors using actual slide dimensions
- if slide_dimensions:
- slide_width_inches, slide_height_inches = slide_dimensions
- else:
- # Fallback: estimate from image size at CONVERSION_DPI
- slide_width_inches = orig_w / CONVERSION_DPI
- slide_height_inches = orig_h / CONVERSION_DPI
- x_scale = orig_w / slide_width_inches
- y_scale = orig_h / slide_height_inches
- # Create a highlight overlay
- overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
- overlay_draw = ImageDraw.Draw(overlay)
- # Highlight each placeholder region
- for region in regions:
- # Convert from inches to pixels in the original image
- px_left = int(region["left"] * x_scale)
- px_top = int(region["top"] * y_scale)
- px_width = int(region["width"] * x_scale)
- px_height = int(region["height"] * y_scale)
- # Draw highlight outline with red color and thick stroke
- # Using a bright red outline instead of fill
- stroke_width = max(
- 5, min(orig_w, orig_h) // 150
- ) # Thicker proportional stroke width
- overlay_draw.rectangle(
- [(px_left, px_top), (px_left + px_width, px_top + px_height)],
- outline=(255, 0, 0, 255), # Bright red, fully opaque
- width=stroke_width,
- )
- # Composite the overlay onto the image using alpha blending
- img = Image.alpha_composite(img, overlay)
- # Convert back to RGB for JPEG saving
- img = img.convert("RGB")
- img.thumbnail((width, height), Image.Resampling.LANCZOS)
- w, h = img.size
- tx = x + (width - w) // 2
- ty = y_thumbnail + (height - h) // 2
- grid.paste(img, (tx, ty))
- # Add border
- if BORDER_WIDTH > 0:
- draw.rectangle(
- [
- (tx - BORDER_WIDTH, ty - BORDER_WIDTH),
- (tx + w + BORDER_WIDTH - 1, ty + h + BORDER_WIDTH - 1),
- ],
- outline="gray",
- width=BORDER_WIDTH,
- )
- return grid
- if __name__ == "__main__":
- main()
|