replace.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. #!/usr/bin/env python3
  2. """Apply text replacements to PowerPoint presentation.
  3. Usage:
  4. python replace.py <input.pptx> <replacements.json> <output.pptx>
  5. The replacements JSON should have the structure output by inventory.py.
  6. ALL text shapes identified by inventory.py will have their text cleared
  7. unless "paragraphs" is specified in the replacements for that shape.
  8. """
  9. import json
  10. import sys
  11. from pathlib import Path
  12. from typing import Any, Dict, List
  13. from inventory import InventoryData, extract_text_inventory
  14. from pptx import Presentation
  15. from pptx.dml.color import RGBColor
  16. from pptx.enum.dml import MSO_THEME_COLOR
  17. from pptx.enum.text import PP_ALIGN
  18. from pptx.oxml.xmlchemy import OxmlElement
  19. from pptx.util import Pt
  20. def clear_paragraph_bullets(paragraph):
  21. """Clear bullet formatting from a paragraph."""
  22. pPr = paragraph._element.get_or_add_pPr()
  23. # Remove existing bullet elements
  24. for child in list(pPr):
  25. if (
  26. child.tag.endswith("buChar")
  27. or child.tag.endswith("buNone")
  28. or child.tag.endswith("buAutoNum")
  29. or child.tag.endswith("buFont")
  30. ):
  31. pPr.remove(child)
  32. return pPr
  33. def apply_paragraph_properties(paragraph, para_data: Dict[str, Any]):
  34. """Apply formatting properties to a paragraph."""
  35. # Get the text but don't set it on paragraph directly yet
  36. text = para_data.get("text", "")
  37. # Get or create paragraph properties
  38. pPr = clear_paragraph_bullets(paragraph)
  39. # Handle bullet formatting
  40. if para_data.get("bullet", False):
  41. level = para_data.get("level", 0)
  42. paragraph.level = level
  43. # Calculate font-proportional indentation
  44. font_size = para_data.get("font_size", 18.0)
  45. level_indent_emu = int((font_size * (1.6 + level * 1.6)) * 12700)
  46. hanging_indent_emu = int(-font_size * 0.8 * 12700)
  47. # Set indentation
  48. pPr.attrib["marL"] = str(level_indent_emu)
  49. pPr.attrib["indent"] = str(hanging_indent_emu)
  50. # Add bullet character
  51. buChar = OxmlElement("a:buChar")
  52. buChar.set("char", "•")
  53. pPr.append(buChar)
  54. # Default to left alignment for bullets if not specified
  55. if "alignment" not in para_data:
  56. paragraph.alignment = PP_ALIGN.LEFT
  57. else:
  58. # Remove indentation for non-bullet text
  59. pPr.attrib["marL"] = "0"
  60. pPr.attrib["indent"] = "0"
  61. # Add buNone element
  62. buNone = OxmlElement("a:buNone")
  63. pPr.insert(0, buNone)
  64. # Apply alignment
  65. if "alignment" in para_data:
  66. alignment_map = {
  67. "LEFT": PP_ALIGN.LEFT,
  68. "CENTER": PP_ALIGN.CENTER,
  69. "RIGHT": PP_ALIGN.RIGHT,
  70. "JUSTIFY": PP_ALIGN.JUSTIFY,
  71. }
  72. if para_data["alignment"] in alignment_map:
  73. paragraph.alignment = alignment_map[para_data["alignment"]]
  74. # Apply spacing
  75. if "space_before" in para_data:
  76. paragraph.space_before = Pt(para_data["space_before"])
  77. if "space_after" in para_data:
  78. paragraph.space_after = Pt(para_data["space_after"])
  79. if "line_spacing" in para_data:
  80. paragraph.line_spacing = Pt(para_data["line_spacing"])
  81. # Apply run-level formatting
  82. if not paragraph.runs:
  83. run = paragraph.add_run()
  84. run.text = text
  85. else:
  86. run = paragraph.runs[0]
  87. run.text = text
  88. # Apply font properties
  89. apply_font_properties(run, para_data)
  90. def apply_font_properties(run, para_data: Dict[str, Any]):
  91. """Apply font properties to a text run."""
  92. if "bold" in para_data:
  93. run.font.bold = para_data["bold"]
  94. if "italic" in para_data:
  95. run.font.italic = para_data["italic"]
  96. if "underline" in para_data:
  97. run.font.underline = para_data["underline"]
  98. if "font_size" in para_data:
  99. run.font.size = Pt(para_data["font_size"])
  100. if "font_name" in para_data:
  101. run.font.name = para_data["font_name"]
  102. # Apply color - prefer RGB, fall back to theme_color
  103. if "color" in para_data:
  104. color_hex = para_data["color"].lstrip("#")
  105. if len(color_hex) == 6:
  106. r = int(color_hex[0:2], 16)
  107. g = int(color_hex[2:4], 16)
  108. b = int(color_hex[4:6], 16)
  109. run.font.color.rgb = RGBColor(r, g, b)
  110. elif "theme_color" in para_data:
  111. # Get theme color by name (e.g., "DARK_1", "ACCENT_1")
  112. theme_name = para_data["theme_color"]
  113. try:
  114. run.font.color.theme_color = getattr(MSO_THEME_COLOR, theme_name)
  115. except AttributeError:
  116. print(f" WARNING: Unknown theme color name '{theme_name}'")
  117. def detect_frame_overflow(inventory: InventoryData) -> Dict[str, Dict[str, float]]:
  118. """Detect text overflow in shapes (text exceeding shape bounds).
  119. Returns dict of slide_key -> shape_key -> overflow_inches.
  120. Only includes shapes that have text overflow.
  121. """
  122. overflow_map = {}
  123. for slide_key, shapes_dict in inventory.items():
  124. for shape_key, shape_data in shapes_dict.items():
  125. # Check for frame overflow (text exceeding shape bounds)
  126. if shape_data.frame_overflow_bottom is not None:
  127. if slide_key not in overflow_map:
  128. overflow_map[slide_key] = {}
  129. overflow_map[slide_key][shape_key] = shape_data.frame_overflow_bottom
  130. return overflow_map
  131. def validate_replacements(inventory: InventoryData, replacements: Dict) -> List[str]:
  132. """Validate that all shapes in replacements exist in inventory.
  133. Returns list of error messages.
  134. """
  135. errors = []
  136. for slide_key, shapes_data in replacements.items():
  137. if not slide_key.startswith("slide-"):
  138. continue
  139. # Check if slide exists
  140. if slide_key not in inventory:
  141. errors.append(f"Slide '{slide_key}' not found in inventory")
  142. continue
  143. # Check each shape
  144. for shape_key in shapes_data.keys():
  145. if shape_key not in inventory[slide_key]:
  146. # Find shapes without replacements defined and show their content
  147. unused_with_content = []
  148. for k in inventory[slide_key].keys():
  149. if k not in shapes_data:
  150. shape_data = inventory[slide_key][k]
  151. # Get text from paragraphs as preview
  152. paragraphs = shape_data.paragraphs
  153. if paragraphs and paragraphs[0].text:
  154. first_text = paragraphs[0].text[:50]
  155. if len(paragraphs[0].text) > 50:
  156. first_text += "..."
  157. unused_with_content.append(f"{k} ('{first_text}')")
  158. else:
  159. unused_with_content.append(k)
  160. errors.append(
  161. f"Shape '{shape_key}' not found on '{slide_key}'. "
  162. f"Shapes without replacements: {', '.join(sorted(unused_with_content)) if unused_with_content else 'none'}"
  163. )
  164. return errors
  165. def check_duplicate_keys(pairs):
  166. """Check for duplicate keys when loading JSON."""
  167. result = {}
  168. for key, value in pairs:
  169. if key in result:
  170. raise ValueError(f"Duplicate key found in JSON: '{key}'")
  171. result[key] = value
  172. return result
  173. def apply_replacements(pptx_file: str, json_file: str, output_file: str):
  174. """Apply text replacements from JSON to PowerPoint presentation."""
  175. # Load presentation
  176. prs = Presentation(pptx_file)
  177. # Get inventory of all text shapes (returns ShapeData objects)
  178. # Pass prs to use same Presentation instance
  179. inventory = extract_text_inventory(Path(pptx_file), prs)
  180. # Detect text overflow in original presentation
  181. original_overflow = detect_frame_overflow(inventory)
  182. # Load replacement data with duplicate key detection
  183. with open(json_file, "r") as f:
  184. replacements = json.load(f, object_pairs_hook=check_duplicate_keys)
  185. # Validate replacements
  186. errors = validate_replacements(inventory, replacements)
  187. if errors:
  188. print("ERROR: Invalid shapes in replacement JSON:")
  189. for error in errors:
  190. print(f" - {error}")
  191. print("\nPlease check the inventory and update your replacement JSON.")
  192. print(
  193. "You can regenerate the inventory with: python inventory.py <input.pptx> <output.json>"
  194. )
  195. raise ValueError(f"Found {len(errors)} validation error(s)")
  196. # Track statistics
  197. shapes_processed = 0
  198. shapes_cleared = 0
  199. shapes_replaced = 0
  200. # Process each slide from inventory
  201. for slide_key, shapes_dict in inventory.items():
  202. if not slide_key.startswith("slide-"):
  203. continue
  204. slide_index = int(slide_key.split("-")[1])
  205. if slide_index >= len(prs.slides):
  206. print(f"Warning: Slide {slide_index} not found")
  207. continue
  208. # Process each shape from inventory
  209. for shape_key, shape_data in shapes_dict.items():
  210. shapes_processed += 1
  211. # Get the shape directly from ShapeData
  212. shape = shape_data.shape
  213. if not shape:
  214. print(f"Warning: {shape_key} has no shape reference")
  215. continue
  216. # ShapeData already validates text_frame in __init__
  217. text_frame = shape.text_frame # type: ignore
  218. text_frame.clear() # type: ignore
  219. shapes_cleared += 1
  220. # Check for replacement paragraphs
  221. replacement_shape_data = replacements.get(slide_key, {}).get(shape_key, {})
  222. if "paragraphs" not in replacement_shape_data:
  223. continue
  224. shapes_replaced += 1
  225. # Add replacement paragraphs
  226. for i, para_data in enumerate(replacement_shape_data["paragraphs"]):
  227. if i == 0:
  228. p = text_frame.paragraphs[0] # type: ignore
  229. else:
  230. p = text_frame.add_paragraph() # type: ignore
  231. apply_paragraph_properties(p, para_data)
  232. # Check for issues after replacements
  233. # Save to a temporary file and reload to avoid modifying the presentation during inventory
  234. # (extract_text_inventory accesses font.color which adds empty <a:solidFill/> elements)
  235. import tempfile
  236. with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
  237. tmp_path = Path(tmp.name)
  238. prs.save(str(tmp_path))
  239. try:
  240. updated_inventory = extract_text_inventory(tmp_path)
  241. updated_overflow = detect_frame_overflow(updated_inventory)
  242. finally:
  243. tmp_path.unlink() # Clean up temp file
  244. # Check if any text overflow got worse
  245. overflow_errors = []
  246. for slide_key, shape_overflows in updated_overflow.items():
  247. for shape_key, new_overflow in shape_overflows.items():
  248. # Get original overflow (0 if there was no overflow before)
  249. original = original_overflow.get(slide_key, {}).get(shape_key, 0.0)
  250. # Error if overflow increased
  251. if new_overflow > original + 0.01: # Small tolerance for rounding
  252. increase = new_overflow - original
  253. overflow_errors.append(
  254. f'{slide_key}/{shape_key}: overflow worsened by {increase:.2f}" '
  255. f'(was {original:.2f}", now {new_overflow:.2f}")'
  256. )
  257. # Collect warnings from updated shapes
  258. warnings = []
  259. for slide_key, shapes_dict in updated_inventory.items():
  260. for shape_key, shape_data in shapes_dict.items():
  261. if shape_data.warnings:
  262. for warning in shape_data.warnings:
  263. warnings.append(f"{slide_key}/{shape_key}: {warning}")
  264. # Fail if there are any issues
  265. if overflow_errors or warnings:
  266. print("\nERROR: Issues detected in replacement output:")
  267. if overflow_errors:
  268. print("\nText overflow worsened:")
  269. for error in overflow_errors:
  270. print(f" - {error}")
  271. if warnings:
  272. print("\nFormatting warnings:")
  273. for warning in warnings:
  274. print(f" - {warning}")
  275. print("\nPlease fix these issues before saving.")
  276. raise ValueError(
  277. f"Found {len(overflow_errors)} overflow error(s) and {len(warnings)} warning(s)"
  278. )
  279. # Save the presentation
  280. prs.save(output_file)
  281. # Report results
  282. print(f"Saved updated presentation to: {output_file}")
  283. print(f"Processed {len(prs.slides)} slides")
  284. print(f" - Shapes processed: {shapes_processed}")
  285. print(f" - Shapes cleared: {shapes_cleared}")
  286. print(f" - Shapes replaced: {shapes_replaced}")
  287. def main():
  288. """Main entry point for command-line usage."""
  289. if len(sys.argv) != 4:
  290. print(__doc__)
  291. sys.exit(1)
  292. input_pptx = Path(sys.argv[1])
  293. replacements_json = Path(sys.argv[2])
  294. output_pptx = Path(sys.argv[3])
  295. if not input_pptx.exists():
  296. print(f"Error: Input file '{input_pptx}' not found")
  297. sys.exit(1)
  298. if not replacements_json.exists():
  299. print(f"Error: Replacements JSON file '{replacements_json}' not found")
  300. sys.exit(1)
  301. try:
  302. apply_replacements(str(input_pptx), str(replacements_json), str(output_pptx))
  303. except Exception as e:
  304. print(f"Error applying replacements: {e}")
  305. import traceback
  306. traceback.print_exc()
  307. sys.exit(1)
  308. if __name__ == "__main__":
  309. main()