Selaa lähdekoodia

improved health check

zehe 3 kuukautta sitten
vanhempi
sitoutus
1677331aa5
2 muutettua tiedostoa jossa 67 lisäystä ja 24 poistoa
  1. 22 6
      README.md
  2. 45 18
      main.py

+ 22 - 6
README.md

@@ -23,8 +23,10 @@ The system consists of two main endpoints:
    - Fails closed if notifications cannot be sent
 
 2. **Health Check Endpoint** (`/health-check-abc456`):
-   - Serves dummy content to verify system functionality
-   - Sends health status to monitoring backends
+   - Verifies all system components required for emergency access
+   - Tests both health and emergency notification backends
+   - Validates file system access for both dummy and key files
+   - Ensures complete emergency system readiness
    - Used for regular system verification
 
 **Log Monitoring**: All application logs (WARNING level and above by default) are automatically sent to the health backends for real-time monitoring and alerting.
@@ -134,7 +136,7 @@ Edit `/etc/emergency-access/config.json`:
 
 #### File Settings
 - `key_file`: Path to the actual key part file
-- `dummy_file`: Path to dummy content for health checks
+- `dummy_file`: Path to dummy content for health checks (health check also validates key file accessibility)
 
 #### Notification Settings
 - `key_backends`: List of backend names from `/etc/emergency-access/ntfy.yml` for key access alerts
@@ -294,12 +296,26 @@ curl https://your-domain.com/emergency-key-a7f9d2e1
 # Regular health monitoring
 curl https://your-domain.com/health-check-b8e3f4a2
 
-# Expected response:
+# Expected response (all systems operational):
 {
   "status": "ok",
   "timestamp": 1703123456.789,
-  "notified_backends": ["matrix_sec"],
-  "dummy_content_length": 14
+  "health_backends_notified": ["matrix_health"],
+  "key_backends_tested": ["matrix_sec", "pushover_emergency"],
+  "dummy_content_length": 14,
+  "key_file_accessible": true,
+  "emergency_system_ready": true
+}
+
+# Error response (when components fail):
+{
+  "status": "error",
+  "message": "System components failed",
+  "details": ["key notifications failed", "key file access failed: Permission denied"],
+  "health_notifications": true,
+  "key_notifications": false,
+  "dummy_file_access": true,
+  "key_file_access": false
 }
 ```
 

+ 45 - 18
main.py

@@ -169,47 +169,74 @@ def get_key_part():
         }), 500
 
 def health_check():
-    """Health check endpoint with dummy file access"""
+    """Health check endpoint that verifies both health monitoring and key request functionality"""
     logger.info("Health check requested")
 
     try:
-        # Send notification
-        notification_success, successful_backends = send_ntfy_notification(
+        # Test health notification system
+        health_notification_success, health_backends = send_ntfy_notification(
             config.ntfy_backends_health,
             config.ntfy_health_message,
             "Health Check"
         )
 
-        if not notification_success:
-            logger.error("Health check notification failed")
-            return jsonify({
-                'status': 'error',
-                'message': 'Notification system failure'
-            }), 500
+        # Test key notification system (without triggering emergency alert)
+        key_test_message = "🔧 Emergency access system health verification - key notification test"
+        key_notification_success, key_backends = send_ntfy_notification(
+            config.ntfy_backends_key,
+            key_test_message,
+            "System Health Check"
+        )
 
-        # Read dummy file
-        file_success, content = read_file_safely(config.dummy_file_path)
+        # Test dummy file access
+        dummy_file_success, dummy_content = read_file_safely(config.dummy_file_path)
 
-        if not file_success:
-            logger.error(f"Health check file read failed: {content}")
+        # Test actual key file access (without exposing content)
+        key_file_success, key_content = read_file_safely(config.key_file_path)
+
+        # Determine overall health status
+        all_systems_ok = (health_notification_success and key_notification_success and
+                         dummy_file_success and key_file_success)
+
+        if not all_systems_ok:
+            error_details = []
+            if not health_notification_success:
+                error_details.append("health notifications failed")
+            if not key_notification_success:
+                error_details.append("key notifications failed")
+            if not dummy_file_success:
+                error_details.append(f"dummy file access failed: {dummy_content}")
+            if not key_file_success:
+                error_details.append(f"key file access failed: {key_content}")
+
+            logger.error(f"Health check failed: {', '.join(error_details)}")
             return jsonify({
                 'status': 'error',
-                'message': 'File system failure'
+                'message': 'System components failed',
+                'details': error_details,
+                'health_notifications': health_notification_success,
+                'key_notifications': key_notification_success,
+                'dummy_file_access': dummy_file_success,
+                'key_file_access': key_file_success
             }), 500
 
-        logger.info("Health check completed successfully")
+        logger.info("Health check completed successfully - all systems operational")
         return jsonify({
             'status': 'ok',
             'timestamp': time.time(),
-            'notified_backends': successful_backends,
-            'dummy_content_length': len(content)
+            'health_backends_notified': health_backends,
+            'key_backends_tested': key_backends,
+            'dummy_content_length': len(dummy_content),
+            'key_file_accessible': True,
+            'emergency_system_ready': True
         })
 
     except Exception as e:
         logger.error(f"Health check error: {str(e)}")
         return jsonify({
             'status': 'error',
-            'message': 'System error'
+            'message': 'System error',
+            'error': str(e)
         }), 500
 
 @app.errorhandler(404)