import argparse
from bs4 import BeautifulSoup
import json
def validate_dom(html_content, schema_file):
"""
Validates a DOM structure against a JSON schema.
Args:
html_content (str): The HTML content to validate.
schema_file (str): Path to the JSON schema file.
Returns:
bool: True if the DOM is valid, False otherwise.
"""
try:
soup = BeautifulSoup(html_content, 'html.parser')
except Exception as e:
print(f"Error parsing HTML: {e}")
return False
with open(schema_file, 'r') as f:
schema = json.load(f)
def validate_element(element, schema_node):
"""Recursively validates an element against the schema."""
if schema_node['type'] == 'string':
if element.get('data-id') != schema_node['value']:
print(f"Validation error: Element '{element.name}' data-id should be '{schema_node['value']}'")
return False
elif schema_node['type'] == 'attribute':
attr_name = schema_node['attribute']
attr_value = element.get(attr_name)
if attr_value != schema_node['value']:
print(f"Validation error: Element '{element.name}' attribute '{attr_name}' should be '{schema_node['value']}'")
return False
elif schema_node['type'] == 'children':
if 'children' not in schema_node:
return True # no children required
for child in element.children:
if not validate_element(child, schema_node['children']):
return False
return True
elif schema_node['type'] == 'tag':
if element.name != schema_node['value']:
print(f"Validation error: Element '{element.name}' should be '{schema_node['value']}'")
return False
for attr in schema_node['attributes']:
attr_name = attr['name']
attr_value = element.get(attr_name)
if attr_value != attr['value']:
print(f"Validation error: Element '{element.name}' attribute '{attr_name}' should be '{attr['value']}'")
return False
for child in element.children:
if not validate_element(child, schema_node['children']):
return False
return True
else:
print(f"Unknown schema type: {schema_node['type']}")
return False
return validate_element(soup.body, schema) # Start validation from the body
def main():
"""Parses command-line arguments and performs DOM validation."""
parser = argparse.ArgumentParser(description='Validate DOM structure against a JSON schema.')
parser.add_argument('html_file', help='Path to the HTML file.')
parser.add_argument('schema_file', help='Path to the JSON schema file.')
args = parser.parse_args()
try:
with open(args.html_file, 'r') as f:
html_content = f.read()
except FileNotFoundError:
print(f"Error: HTML file not found: {args.html_file}")
return
if validate_dom(html_content, args.schema_file):
print("DOM is valid.")
else:
print("DOM is invalid.")
if __name__ == "__main__":
main()
Add your comment