| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385 |
- #!/usr/bin/env python3
- """Apply text replacements to PowerPoint presentation.
- Usage:
- python replace.py <input.pptx> <replacements.json> <output.pptx>
- The replacements JSON should have the structure output by inventory.py.
- ALL text shapes identified by inventory.py will have their text cleared
- unless "paragraphs" is specified in the replacements for that shape.
- """
- import json
- import sys
- from pathlib import Path
- from typing import Any, Dict, List
- from inventory import InventoryData, extract_text_inventory
- from pptx import Presentation
- from pptx.dml.color import RGBColor
- from pptx.enum.dml import MSO_THEME_COLOR
- from pptx.enum.text import PP_ALIGN
- from pptx.oxml.xmlchemy import OxmlElement
- from pptx.util import Pt
- def clear_paragraph_bullets(paragraph):
- """Clear bullet formatting from a paragraph."""
- pPr = paragraph._element.get_or_add_pPr()
- # Remove existing bullet elements
- for child in list(pPr):
- if (
- child.tag.endswith("buChar")
- or child.tag.endswith("buNone")
- or child.tag.endswith("buAutoNum")
- or child.tag.endswith("buFont")
- ):
- pPr.remove(child)
- return pPr
- def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
- """Apply formatting properties to a paragraph."""
- # Get the text but don't set it on paragraph directly yet
- text = para_data.get("text", "")
- # Get or create paragraph properties
- pPr = clear_paragraph_bullets(paragraph)
- # Handle bullet formatting
- if para_data.get("bullet", False):
- level = para_data.get("level", 0)
- paragraph.level = level
- # Calculate font-proportional indentation
- font_size = para_data.get("font_size", 18.0)
- level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
- hanging_indent_emu = int(-font_size * 0.8 * 12700)
- # Set indentation
- pPr.attrib["marL"] = str(level_indent_emu)
- pPr.attrib["indent"] = str(hanging_indent_emu)
- # Add bullet character
- buChar = OxmlElement("a:buChar")
- buChar.set("char", "•")
- pPr.append(buChar)
- # Default to left alignment for bullets if not specified
- if "alignment" not in para_data:
- paragraph.alignment = PP_ALIGN.LEFT
- else:
- # Remove indentation for non-bullet text
- pPr.attrib["marL"] = "0"
- pPr.attrib["indent"] = "0"
- # Add buNone element
- buNone = OxmlElement("a:buNone")
- pPr.insert(0, buNone)
- # Apply alignment
- if "alignment" in para_data:
- alignment_map = {
- "LEFT": PP_ALIGN.LEFT,
- "CENTER": PP_ALIGN.CENTER,
- "RIGHT": PP_ALIGN.RIGHT,
- "JUSTIFY": PP_ALIGN.JUSTIFY,
- }
- if para_data["alignment"] in alignment_map:
- paragraph.alignment = alignment_map[para_data["alignment"]]
- # Apply spacing
- if "space_before" in para_data:
- paragraph.space_before = Pt(para_data["space_before"])
- if "space_after" in para_data:
- paragraph.space_after = Pt(para_data["space_after"])
- if "line_spacing" in para_data:
- paragraph.line_spacing = Pt(para_data["line_spacing"])
- # Apply run-level formatting
- if not paragraph.runs:
- run = paragraph.add_run()
- run.text = text
- else:
- run = paragraph.runs[0]
- run.text = text
- # Apply font properties
- apply_font_properties(run, para_data)
- def apply_font_properties(run, para_data: Dict[str, Any]):
- """Apply font properties to a text run."""
- if "bold" in para_data:
- run.font.bold = para_data["bold"]
- if "italic" in para_data:
- run.font.italic = para_data["italic"]
- if "underline" in para_data:
- run.font.underline = para_data["underline"]
- if "font_size" in para_data:
- run.font.size = Pt(para_data["font_size"])
- if "font_name" in para_data:
- run.font.name = para_data["font_name"]
- # Apply color - prefer RGB, fall back to theme_color
- if "color" in para_data:
- color_hex = para_data["color"].lstrip("#")
- if len(color_hex) == 6:
- r = int(color_hex[0:2], 16)
- g = int(color_hex[2:4], 16)
- b = int(color_hex[4:6], 16)
- run.font.color.rgb = RGBColor(r, g, b)
- elif "theme_color" in para_data:
- # Get theme color by name (e.g., "DARK_1", "ACCENT_1")
- theme_name = para_data["theme_color"]
- try:
- run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
- except AttributeError:
- print(f" WARNING: Unknown theme color name '{theme_name}'")
- def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
- """Detect text overflow in shapes (text exceeding shape bounds).
- Returns dict of slide_key -> shape_key -> overflow_inches.
- Only includes shapes that have text overflow.
- """
- overflow_map = {}
- for slide_key, shapes_dict in inventory.items():
- for shape_key, shape_data in shapes_dict.items():
- # Check for frame overflow (text exceeding shape bounds)
- if shape_data.frame_overflow_bottom is not None:
- if slide_key not in overflow_map:
- overflow_map[slide_key] = {}
- overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
- return overflow_map
- def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
- """Validate that all shapes in replacements exist in inventory.
- Returns list of error messages.
- """
- errors = []
- for slide_key, shapes_data in replacements.items():
- if not slide_key.startswith("slide-"):
- continue
- # Check if slide exists
- if slide_key not in inventory:
- errors.append(f"Slide '{slide_key}' not found in inventory")
- continue
- # Check each shape
- for shape_key in shapes_data.keys():
- if shape_key not in inventory[slide_key]:
- # Find shapes without replacements defined and show their content
- unused_with_content = []
- for k in inventory[slide_key].keys():
- if k not in shapes_data:
- shape_data = inventory[slide_key][k]
- # Get text from paragraphs as preview
- paragraphs = shape_data.paragraphs
- if paragraphs and paragraphs[0].text:
- first_text = paragraphs[0].text[:50]
- if len(paragraphs[0].text) > 50:
- first_text += "..."
- unused_with_content.append(f"{k} ('{first_text}')")
- else:
- unused_with_content.append(k)
- errors.append(
- f"Shape '{shape_key}' not found on '{slide_key}'. "
- f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
- )
- return errors
- def check_duplicate_keys(pairs):
- """Check for duplicate keys when loading JSON."""
- result = {}
- for key, value in pairs:
- if key in result:
- raise ValueError(f"Duplicate key found in JSON: '{key}'")
- result[key] = value
- return result
- def apply_replacements(pptx_file: str, json_file: str, output_file: str):
- """Apply text replacements from JSON to PowerPoint presentation."""
- # Load presentation
- prs = Presentation(pptx_file)
- # Get inventory of all text shapes (returns ShapeData objects)
- # Pass prs to use same Presentation instance
- inventory = extract_text_inventory(Path(pptx_file), prs)
- # Detect text overflow in original presentation
- original_overflow = detect_frame_overflow(inventory)
- # Load replacement data with duplicate key detection
- with open(json_file, "r") as f:
- replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
- # Validate replacements
- errors = validate_replacements(inventory, replacements)
- if errors:
- print("ERROR: Invalid shapes in replacement JSON:")
- for error in errors:
- print(f" - {error}")
- print("\nPlease check the inventory and update your replacement JSON.")
- print(
- "You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
- )
- raise ValueError(f"Found {len(errors)} validation error(s)")
- # Track statistics
- shapes_processed = 0
- shapes_cleared = 0
- shapes_replaced = 0
- # Process each slide from inventory
- for slide_key, shapes_dict in inventory.items():
- if not slide_key.startswith("slide-"):
- continue
- slide_index = int(slide_key.split("-")[1])
- if slide_index >= len(prs.slides):
- print(f"Warning: Slide {slide_index} not found")
- continue
- # Process each shape from inventory
- for shape_key, shape_data in shapes_dict.items():
- shapes_processed += 1
- # Get the shape directly from ShapeData
- shape = shape_data.shape
- if not shape:
- print(f"Warning: {shape_key} has no shape reference")
- continue
- # ShapeData already validates text_frame in __init__
- text_frame = shape.text_frame # type: ignore
- text_frame.clear() # type: ignore
- shapes_cleared += 1
- # Check for replacement paragraphs
- replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
- if "paragraphs" not in replacement_shape_data:
- continue
- shapes_replaced += 1
- # Add replacement paragraphs
- for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
- if i == 0:
- p = text_frame.paragraphs[0] # type: ignore
- else:
- p = text_frame.add_paragraph() # type: ignore
- apply_paragraph_properties(p, para_data)
- # Check for issues after replacements
- # Save to a temporary file and reload to avoid modifying the presentation during inventory
- # (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
- import tempfile
- with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
- tmp_path = Path(tmp.name)
- prs.save(str(tmp_path))
- try:
- updated_inventory = extract_text_inventory(tmp_path)
- updated_overflow = detect_frame_overflow(updated_inventory)
- finally:
- tmp_path.unlink() # Clean up temp file
- # Check if any text overflow got worse
- overflow_errors = []
- for slide_key, shape_overflows in updated_overflow.items():
- for shape_key, new_overflow in shape_overflows.items():
- # Get original overflow (0 if there was no overflow before)
- original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
- # Error if overflow increased
- if new_overflow > original + 0.01: # Small tolerance for rounding
- increase = new_overflow - original
- overflow_errors.append(
- f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
- f'(was {original:.2f}", now {new_overflow:.2f}")'
- )
- # Collect warnings from updated shapes
- warnings = []
- for slide_key, shapes_dict in updated_inventory.items():
- for shape_key, shape_data in shapes_dict.items():
- if shape_data.warnings:
- for warning in shape_data.warnings:
- warnings.append(f"{slide_key}/{shape_key}: {warning}")
- # Fail if there are any issues
- if overflow_errors or warnings:
- print("\nERROR: Issues detected in replacement output:")
- if overflow_errors:
- print("\nText overflow worsened:")
- for error in overflow_errors:
- print(f" - {error}")
- if warnings:
- print("\nFormatting warnings:")
- for warning in warnings:
- print(f" - {warning}")
- print("\nPlease fix these issues before saving.")
- raise ValueError(
- f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
- )
- # Save the presentation
- prs.save(output_file)
- # Report results
- print(f"Saved updated presentation to: {output_file}")
- print(f"Processed {len(prs.slides)} slides")
- print(f" - Shapes processed: {shapes_processed}")
- print(f" - Shapes cleared: {shapes_cleared}")
- print(f" - Shapes replaced: {shapes_replaced}")
- def main():
- """Main entry point for command-line usage."""
- if len(sys.argv) != 4:
- print(__doc__)
- sys.exit(1)
- input_pptx = Path(sys.argv[1])
- replacements_json = Path(sys.argv[2])
- output_pptx = Path(sys.argv[3])
- if not input_pptx.exists():
- print(f"Error: Input file '{input_pptx}' not found")
- sys.exit(1)
- if not replacements_json.exists():
- print(f"Error: Replacements JSON file '{replacements_json}' not found")
- sys.exit(1)
- try:
- apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
- except Exception as e:
- print(f"Error applying replacements: {e}")
- import traceback
- traceback.print_exc()
- sys.exit(1)
- if __name__ == "__main__":
- main()
|