check_bounding_boxes_test.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. import unittest
  2. import json
  3. import io
  4. from check_bounding_boxes import get_bounding_box_messages
  5. # Currently this is not run automatically in CI; it's just for documentation and manual checking.
  6. class TestGetBoundingBoxMessages(unittest.TestCase):
  7. def create_json_stream(self, data):
  8. """Helper to create a JSON stream from data"""
  9. return io.StringIO(json.dumps(data))
  10. def test_no_intersections(self):
  11. """Test case with no bounding box intersections"""
  12. data = {
  13. "form_fields": [
  14. {
  15. "description": "Name",
  16. "page_number": 1,
  17. "label_bounding_box": [10, 10, 50, 30],
  18. "entry_bounding_box": [60, 10, 150, 30]
  19. },
  20. {
  21. "description": "Email",
  22. "page_number": 1,
  23. "label_bounding_box": [10, 40, 50, 60],
  24. "entry_bounding_box": [60, 40, 150, 60]
  25. }
  26. ]
  27. }
  28. stream = self.create_json_stream(data)
  29. messages = get_bounding_box_messages(stream)
  30. self.assertTrue(any("SUCCESS" in msg for msg in messages))
  31. self.assertFalse(any("FAILURE" in msg for msg in messages))
  32. def test_label_entry_intersection_same_field(self):
  33. """Test intersection between label and entry of the same field"""
  34. data = {
  35. "form_fields": [
  36. {
  37. "description": "Name",
  38. "page_number": 1,
  39. "label_bounding_box": [10, 10, 60, 30],
  40. "entry_bounding_box": [50, 10, 150, 30] # Overlaps with label
  41. }
  42. ]
  43. }
  44. stream = self.create_json_stream(data)
  45. messages = get_bounding_box_messages(stream)
  46. self.assertTrue(any("FAILURE" in msg and "intersection" in msg for msg in messages))
  47. self.assertFalse(any("SUCCESS" in msg for msg in messages))
  48. def test_intersection_between_different_fields(self):
  49. """Test intersection between bounding boxes of different fields"""
  50. data = {
  51. "form_fields": [
  52. {
  53. "description": "Name",
  54. "page_number": 1,
  55. "label_bounding_box": [10, 10, 50, 30],
  56. "entry_bounding_box": [60, 10, 150, 30]
  57. },
  58. {
  59. "description": "Email",
  60. "page_number": 1,
  61. "label_bounding_box": [40, 20, 80, 40], # Overlaps with Name's boxes
  62. "entry_bounding_box": [160, 10, 250, 30]
  63. }
  64. ]
  65. }
  66. stream = self.create_json_stream(data)
  67. messages = get_bounding_box_messages(stream)
  68. self.assertTrue(any("FAILURE" in msg and "intersection" in msg for msg in messages))
  69. self.assertFalse(any("SUCCESS" in msg for msg in messages))
  70. def test_different_pages_no_intersection(self):
  71. """Test that boxes on different pages don't count as intersecting"""
  72. data = {
  73. "form_fields": [
  74. {
  75. "description": "Name",
  76. "page_number": 1,
  77. "label_bounding_box": [10, 10, 50, 30],
  78. "entry_bounding_box": [60, 10, 150, 30]
  79. },
  80. {
  81. "description": "Email",
  82. "page_number": 2,
  83. "label_bounding_box": [10, 10, 50, 30], # Same coordinates but different page
  84. "entry_bounding_box": [60, 10, 150, 30]
  85. }
  86. ]
  87. }
  88. stream = self.create_json_stream(data)
  89. messages = get_bounding_box_messages(stream)
  90. self.assertTrue(any("SUCCESS" in msg for msg in messages))
  91. self.assertFalse(any("FAILURE" in msg for msg in messages))
  92. def test_entry_height_too_small(self):
  93. """Test that entry box height is checked against font size"""
  94. data = {
  95. "form_fields": [
  96. {
  97. "description": "Name",
  98. "page_number": 1,
  99. "label_bounding_box": [10, 10, 50, 30],
  100. "entry_bounding_box": [60, 10, 150, 20], # Height is 10
  101. "entry_text": {
  102. "font_size": 14 # Font size larger than height
  103. }
  104. }
  105. ]
  106. }
  107. stream = self.create_json_stream(data)
  108. messages = get_bounding_box_messages(stream)
  109. self.assertTrue(any("FAILURE" in msg and "height" in msg for msg in messages))
  110. self.assertFalse(any("SUCCESS" in msg for msg in messages))
  111. def test_entry_height_adequate(self):
  112. """Test that adequate entry box height passes"""
  113. data = {
  114. "form_fields": [
  115. {
  116. "description": "Name",
  117. "page_number": 1,
  118. "label_bounding_box": [10, 10, 50, 30],
  119. "entry_bounding_box": [60, 10, 150, 30], # Height is 20
  120. "entry_text": {
  121. "font_size": 14 # Font size smaller than height
  122. }
  123. }
  124. ]
  125. }
  126. stream = self.create_json_stream(data)
  127. messages = get_bounding_box_messages(stream)
  128. self.assertTrue(any("SUCCESS" in msg for msg in messages))
  129. self.assertFalse(any("FAILURE" in msg for msg in messages))
  130. def test_default_font_size(self):
  131. """Test that default font size is used when not specified"""
  132. data = {
  133. "form_fields": [
  134. {
  135. "description": "Name",
  136. "page_number": 1,
  137. "label_bounding_box": [10, 10, 50, 30],
  138. "entry_bounding_box": [60, 10, 150, 20], # Height is 10
  139. "entry_text": {} # No font_size specified, should use default 14
  140. }
  141. ]
  142. }
  143. stream = self.create_json_stream(data)
  144. messages = get_bounding_box_messages(stream)
  145. self.assertTrue(any("FAILURE" in msg and "height" in msg for msg in messages))
  146. self.assertFalse(any("SUCCESS" in msg for msg in messages))
  147. def test_no_entry_text(self):
  148. """Test that missing entry_text doesn't cause height check"""
  149. data = {
  150. "form_fields": [
  151. {
  152. "description": "Name",
  153. "page_number": 1,
  154. "label_bounding_box": [10, 10, 50, 30],
  155. "entry_bounding_box": [60, 10, 150, 20] # Small height but no entry_text
  156. }
  157. ]
  158. }
  159. stream = self.create_json_stream(data)
  160. messages = get_bounding_box_messages(stream)
  161. self.assertTrue(any("SUCCESS" in msg for msg in messages))
  162. self.assertFalse(any("FAILURE" in msg for msg in messages))
  163. def test_multiple_errors_limit(self):
  164. """Test that error messages are limited to prevent excessive output"""
  165. fields = []
  166. # Create many overlapping fields
  167. for i in range(25):
  168. fields.append({
  169. "description": f"Field{i}",
  170. "page_number": 1,
  171. "label_bounding_box": [10, 10, 50, 30], # All overlap
  172. "entry_bounding_box": [20, 15, 60, 35] # All overlap
  173. })
  174. data = {"form_fields": fields}
  175. stream = self.create_json_stream(data)
  176. messages = get_bounding_box_messages(stream)
  177. # Should abort after ~20 messages
  178. self.assertTrue(any("Aborting" in msg for msg in messages))
  179. # Should have some FAILURE messages but not hundreds
  180. failure_count = sum(1 for msg in messages if "FAILURE" in msg)
  181. self.assertGreater(failure_count, 0)
  182. self.assertLess(len(messages), 30) # Should be limited
  183. def test_edge_touching_boxes(self):
  184. """Test that boxes touching at edges don't count as intersecting"""
  185. data = {
  186. "form_fields": [
  187. {
  188. "description": "Name",
  189. "page_number": 1,
  190. "label_bounding_box": [10, 10, 50, 30],
  191. "entry_bounding_box": [50, 10, 150, 30] # Touches at x=50
  192. }
  193. ]
  194. }
  195. stream = self.create_json_stream(data)
  196. messages = get_bounding_box_messages(stream)
  197. self.assertTrue(any("SUCCESS" in msg for msg in messages))
  198. self.assertFalse(any("FAILURE" in msg for msg in messages))
  199. if __name__ == '__main__':
  200. unittest.main()