1. import json
  2. import os
  3. from pathlib import Path
  4. from bs4 import BeautifulSoup
  5. def attach_metadata(html_file_path, metadata, override=None):
  6. """
  7. Attaches metadata to an HTML file for sandbox usage with manual overrides.
  8. Args:
  9. html_file_path (str): Path to the HTML file.
  10. metadata (dict): Dictionary containing the metadata to attach.
  11. override (dict, optional): Dictionary containing metadata overrides. Defaults to None.
  12. """
  13. try:
  14. with open(html_file_path, 'r', encoding='utf-8') as f:
  15. html_content = f.read()
  16. except FileNotFoundError:
  17. print(f"Error: File not found at {html_file_path}")
  18. return
  19. soup = BeautifulSoup(html_content, 'html.parser')
  20. # Apply default metadata
  21. for key, value in metadata.items():
  22. if key == 'title':
  23. soup.title.string = str(value) if value else ''
  24. elif key == 'description':
  25. soup.find('meta', attrs={'name': 'description'})['content'] = str(value) if value else ''
  26. elif key == 'keywords':
  27. soup.find('meta', attrs={'name': 'keywords'})['content'] = str(value) if value else ''
  28. elif key == 'og:title':
  29. soup.find('meta', attrs={'property': 'og:title'})['content'] = str(value) if value else ''
  30. elif key == 'og:description':
  31. soup.find('meta', attrs={'property': 'og:description'})['content'] = str(value) if value else ''
  32. elif key == 'og:type':
  33. soup.find('meta', attrs={'property': 'og:type'})['content'] = str(value) if value else ''
  34. elif key == 'og:image':
  35. soup.find('meta', attrs={'property': 'og:image'})['content'] = str(value) if value else ''
  36. # Apply overrides
  37. if override:
  38. for key, value in override.items():
  39. if key == 'title':
  40. soup.title.string = str(value) if value else ''
  41. elif key == 'description':
  42. soup.find('meta', attrs={'name': 'description'})['content'] = str(value) if value else ''
  43. elif key == 'keywords':
  44. soup.find('meta', attrs={'name': 'keywords'})['content'] = str(value) if value else ''
  45. elif key == 'og:title':
  46. soup.find('meta', attrs={'property': 'og:title'})['content'] = str(value) if value else ''
  47. elif key == 'og:description':
  48. soup.find('meta', attrs={'property': 'og:description'})['content'] = str(value) if value else ''
  49. elif key == 'og:type':
  50. soup.find('meta', attrs={'property': 'og:type'})['content'] = str(value) if value else ''
  51. elif key == 'og:image':
  52. soup.find('meta', attrs={'property': 'og:image'})['content'] = str(value) if value else ''
  53. # Write the modified HTML back to the file
  54. try:
  55. with open(html_file_path, 'w', encoding='utf-8') as f:
  56. f.write(soup.prettify()) # Use prettify for better formatting
  57. except Exception as e:
  58. print(f"Error writing to file: {e}")
  59. if __name__ == '__main__':
  60. # Example Usage
  61. html_file = 'example.html'
  62. # Create a dummy HTML file for testing
  63. with open(html_file, 'w', encoding='utf-8') as f:
  64. f.write('<html><head><title>Original Title</title></head><body><h1>Hello World</h1></body></html>')
  65. default_metadata = {
  66. 'title': 'Sandbox Page',
  67. 'description': 'This is a sandbox page.',
  68. 'keywords': 'sandbox, html, metadata',
  69. 'og:title': 'Sandbox Page',
  70. 'og:description': 'This is a sandbox page.',
  71. 'og:type': 'website',
  72. 'og:image': 'https://example.com/image.jpg'
  73. }
  74. override_metadata = {
  75. 'title': 'Overridden Title

Add your comment