recalc.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. #!/usr/bin/env python3
  2. """
  3. Excel Formula Recalculation Script
  4. Recalculates all formulas in an Excel file using LibreOffice
  5. """
  6. import json
  7. import sys
  8. import subprocess
  9. import os
  10. import platform
  11. from pathlib import Path
  12. from openpyxl import load_workbook
  13. def setup_libreoffice_macro():
  14. """Setup LibreOffice macro for recalculation if not already configured"""
  15. if platform.system() == 'Darwin':
  16. macro_dir = os.path.expanduser('~/Library/Application Support/LibreOffice/4/user/basic/Standard')
  17. else:
  18. macro_dir = os.path.expanduser('~/.config/libreoffice/4/user/basic/Standard')
  19. macro_file = os.path.join(macro_dir, 'Module1.xba')
  20. if os.path.exists(macro_file):
  21. with open(macro_file, 'r') as f:
  22. if 'RecalculateAndSave' in f.read():
  23. return True
  24. if not os.path.exists(macro_dir):
  25. subprocess.run(['soffice', '--headless', '--terminate_after_init'],
  26. capture_output=True, timeout=10)
  27. os.makedirs(macro_dir, exist_ok=True)
  28. macro_content = '''<?xml version="1.0" encoding="UTF-8"?>
  29. <!DOCTYPE script:module PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "module.dtd">
  30. <script:module xmlns:script="http://openoffice.org/2000/script" script:name="Module1" script:language="StarBasic">
  31. Sub RecalculateAndSave()
  32. ThisComponent.calculateAll()
  33. ThisComponent.store()
  34. ThisComponent.close(True)
  35. End Sub
  36. </script:module>'''
  37. try:
  38. with open(macro_file, 'w') as f:
  39. f.write(macro_content)
  40. return True
  41. except Exception:
  42. return False
  43. def recalc(filename, timeout=30):
  44. """
  45. Recalculate formulas in Excel file and report any errors
  46. Args:
  47. filename: Path to Excel file
  48. timeout: Maximum time to wait for recalculation (seconds)
  49. Returns:
  50. dict with error locations and counts
  51. """
  52. if not Path(filename).exists():
  53. return {'error': f'File {filename} does not exist'}
  54. abs_path = str(Path(filename).absolute())
  55. if not setup_libreoffice_macro():
  56. return {'error': 'Failed to setup LibreOffice macro'}
  57. cmd = [
  58. 'soffice', '--headless', '--norestore',
  59. 'vnd.sun.star.script:Standard.Module1.RecalculateAndSave?language=Basic&location=application',
  60. abs_path
  61. ]
  62. # Handle timeout command differences between Linux and macOS
  63. if platform.system() != 'Windows':
  64. timeout_cmd = 'timeout' if platform.system() == 'Linux' else None
  65. if platform.system() == 'Darwin':
  66. # Check if gtimeout is available on macOS
  67. try:
  68. subprocess.run(['gtimeout', '--version'], capture_output=True, timeout=1, check=False)
  69. timeout_cmd = 'gtimeout'
  70. except (FileNotFoundError, subprocess.TimeoutExpired):
  71. pass
  72. if timeout_cmd:
  73. cmd = [timeout_cmd, str(timeout)] + cmd
  74. result = subprocess.run(cmd, capture_output=True, text=True)
  75. if result.returncode != 0 and result.returncode != 124: # 124 is timeout exit code
  76. error_msg = result.stderr or 'Unknown error during recalculation'
  77. if 'Module1' in error_msg or 'RecalculateAndSave' not in error_msg:
  78. return {'error': 'LibreOffice macro not configured properly'}
  79. else:
  80. return {'error': error_msg}
  81. # Check for Excel errors in the recalculated file - scan ALL cells
  82. try:
  83. wb = load_workbook(filename, data_only=True)
  84. excel_errors = ['#VALUE!', '#DIV/0!', '#REF!', '#NAME?', '#NULL!', '#NUM!', '#N/A']
  85. error_details = {err: [] for err in excel_errors}
  86. total_errors = 0
  87. for sheet_name in wb.sheetnames:
  88. ws = wb[sheet_name]
  89. # Check ALL rows and columns - no limits
  90. for row in ws.iter_rows():
  91. for cell in row:
  92. if cell.value is not None and isinstance(cell.value, str):
  93. for err in excel_errors:
  94. if err in cell.value:
  95. location = f"{sheet_name}!{cell.coordinate}"
  96. error_details[err].append(location)
  97. total_errors += 1
  98. break
  99. wb.close()
  100. # Build result summary
  101. result = {
  102. 'status': 'success' if total_errors == 0 else 'errors_found',
  103. 'total_errors': total_errors,
  104. 'error_summary': {}
  105. }
  106. # Add non-empty error categories
  107. for err_type, locations in error_details.items():
  108. if locations:
  109. result['error_summary'][err_type] = {
  110. 'count': len(locations),
  111. 'locations': locations[:20] # Show up to 20 locations
  112. }
  113. # Add formula count for context - also check ALL cells
  114. wb_formulas = load_workbook(filename, data_only=False)
  115. formula_count = 0
  116. for sheet_name in wb_formulas.sheetnames:
  117. ws = wb_formulas[sheet_name]
  118. for row in ws.iter_rows():
  119. for cell in row:
  120. if cell.value and isinstance(cell.value, str) and cell.value.startswith('='):
  121. formula_count += 1
  122. wb_formulas.close()
  123. result['total_formulas'] = formula_count
  124. return result
  125. except Exception as e:
  126. return {'error': str(e)}
  127. def main():
  128. if len(sys.argv) < 2:
  129. print("Usage: python recalc.py <excel_file> [timeout_seconds]")
  130. print("\nRecalculates all formulas in an Excel file using LibreOffice")
  131. print("\nReturns JSON with error details:")
  132. print(" - status: 'success' or 'errors_found'")
  133. print(" - total_errors: Total number of Excel errors found")
  134. print(" - total_formulas: Number of formulas in the file")
  135. print(" - error_summary: Breakdown by error type with locations")
  136. print(" - #VALUE!, #DIV/0!, #REF!, #NAME?, #NULL!, #NUM!, #N/A")
  137. sys.exit(1)
  138. filename = sys.argv[1]
  139. timeout = int(sys.argv[2]) if len(sys.argv) > 2 else 30
  140. result = recalc(filename, timeout)
  141. print(json.dumps(result, indent=2))
  142. if __name__ == '__main__':
  143. main()