import json
import os
from pathlib import Path
from bs4 import BeautifulSoup
def attach_metadata(html_file_path, metadata, override=None):
"""
Attaches metadata to an HTML file for sandbox usage with manual overrides.
Args:
html_file_path (str): Path to the HTML file.
metadata (dict): Dictionary containing the metadata to attach.
override (dict, optional): Dictionary containing metadata overrides. Defaults to None.
"""
try:
with open(html_file_path, 'r', encoding='utf-8') as f:
html_content = f.read()
except FileNotFoundError:
print(f"Error: File not found at {html_file_path}")
return
soup = BeautifulSoup(html_content, 'html.parser')
# Apply default metadata
for key, value in metadata.items():
if key == 'title':
soup.title.string = str(value) if value else ''
elif key == 'description':
soup.find('meta', attrs={'name': 'description'})['content'] = str(value) if value else ''
elif key == 'keywords':
soup.find('meta', attrs={'name': 'keywords'})['content'] = str(value) if value else ''
elif key == 'og:title':
soup.find('meta', attrs={'property': 'og:title'})['content'] = str(value) if value else ''
elif key == 'og:description':
soup.find('meta', attrs={'property': 'og:description'})['content'] = str(value) if value else ''
elif key == 'og:type':
soup.find('meta', attrs={'property': 'og:type'})['content'] = str(value) if value else ''
elif key == 'og:image':
soup.find('meta', attrs={'property': 'og:image'})['content'] = str(value) if value else ''
# Apply overrides
if override:
for key, value in override.items():
if key == 'title':
soup.title.string = str(value) if value else ''
elif key == 'description':
soup.find('meta', attrs={'name': 'description'})['content'] = str(value) if value else ''
elif key == 'keywords':
soup.find('meta', attrs={'name': 'keywords'})['content'] = str(value) if value else ''
elif key == 'og:title':
soup.find('meta', attrs={'property': 'og:title'})['content'] = str(value) if value else ''
elif key == 'og:description':
soup.find('meta', attrs={'property': 'og:description'})['content'] = str(value) if value else ''
elif key == 'og:type':
soup.find('meta', attrs={'property': 'og:type'})['content'] = str(value) if value else ''
elif key == 'og:image':
soup.find('meta', attrs={'property': 'og:image'})['content'] = str(value) if value else ''
# Write the modified HTML back to the file
try:
with open(html_file_path, 'w', encoding='utf-8') as f:
f.write(soup.prettify()) # Use prettify for better formatting
except Exception as e:
print(f"Error writing to file: {e}")
if __name__ == '__main__':
# Example Usage
html_file = 'example.html'
# Create a dummy HTML file for testing
with open(html_file, 'w', encoding='utf-8') as f:
f.write('<html><head><title>Original Title</title></head><body><h1>Hello World</h1></body></html>')
default_metadata = {
'title': 'Sandbox Page',
'description': 'This is a sandbox page.',
'keywords': 'sandbox, html, metadata',
'og:title': 'Sandbox Page',
'og:description': 'This is a sandbox page.',
'og:type': 'website',
'og:image': 'https://example.com/image.jpg'
}
override_metadata = {
'title': 'Overridden Title
Add your comment