1. import argparse
  2. from bs4 import BeautifulSoup
  3. import json
  4. def validate_dom(html_content, schema_file):
  5. """
  6. Validates a DOM structure against a JSON schema.
  7. Args:
  8. html_content (str): The HTML content to validate.
  9. schema_file (str): Path to the JSON schema file.
  10. Returns:
  11. bool: True if the DOM is valid, False otherwise.
  12. """
  13. try:
  14. soup = BeautifulSoup(html_content, 'html.parser')
  15. except Exception as e:
  16. print(f"Error parsing HTML: {e}")
  17. return False
  18. with open(schema_file, 'r') as f:
  19. schema = json.load(f)
  20. def validate_element(element, schema_node):
  21. """Recursively validates an element against the schema."""
  22. if schema_node['type'] == 'string':
  23. if element.get('data-id') != schema_node['value']:
  24. print(f"Validation error: Element '{element.name}' data-id should be '{schema_node['value']}'")
  25. return False
  26. elif schema_node['type'] == 'attribute':
  27. attr_name = schema_node['attribute']
  28. attr_value = element.get(attr_name)
  29. if attr_value != schema_node['value']:
  30. print(f"Validation error: Element '{element.name}' attribute '{attr_name}' should be '{schema_node['value']}'")
  31. return False
  32. elif schema_node['type'] == 'children':
  33. if 'children' not in schema_node:
  34. return True # no children required
  35. for child in element.children:
  36. if not validate_element(child, schema_node['children']):
  37. return False
  38. return True
  39. elif schema_node['type'] == 'tag':
  40. if element.name != schema_node['value']:
  41. print(f"Validation error: Element '{element.name}' should be '{schema_node['value']}'")
  42. return False
  43. for attr in schema_node['attributes']:
  44. attr_name = attr['name']
  45. attr_value = element.get(attr_name)
  46. if attr_value != attr['value']:
  47. print(f"Validation error: Element '{element.name}' attribute '{attr_name}' should be '{attr['value']}'")
  48. return False
  49. for child in element.children:
  50. if not validate_element(child, schema_node['children']):
  51. return False
  52. return True
  53. else:
  54. print(f"Unknown schema type: {schema_node['type']}")
  55. return False
  56. return validate_element(soup.body, schema) # Start validation from the body
  57. def main():
  58. """Parses command-line arguments and performs DOM validation."""
  59. parser = argparse.ArgumentParser(description='Validate DOM structure against a JSON schema.')
  60. parser.add_argument('html_file', help='Path to the HTML file.')
  61. parser.add_argument('schema_file', help='Path to the JSON schema file.')
  62. args = parser.parse_args()
  63. try:
  64. with open(args.html_file, 'r') as f:
  65. html_content = f.read()
  66. except FileNotFoundError:
  67. print(f"Error: HTML file not found: {args.html_file}")
  68. return
  69. if validate_dom(html_content, args.schema_file):
  70. print("DOM is valid.")
  71. else:
  72. print("DOM is invalid.")
  73. if __name__ == "__main__":
  74. main()

Add your comment