1. import time
  2. from bs4 import BeautifulSoup
  3. import requests
  4. import logging
  5. # Configure logging
  6. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  7. def wrap_html_task(html_content, task_function, timeout=10, retries=3):
  8. """
  9. Wraps an HTML document processing task with timeout and retry logic.
  10. Args:
  11. html_content (str): The HTML content to process.
  12. task_function (callable): The function to apply to the HTML content.
  13. Should accept the HTML content as input.
  14. timeout (int): Timeout in seconds for the task.
  15. retries (int): Number of retries if the task fails.
  16. Returns:
  17. The result of the task_function if successful, None otherwise.
  18. """
  19. for attempt in range(retries):
  20. try:
  21. # Start timer
  22. start_time = time.time()
  23. # Execute the task
  24. result = task_function(html_content)
  25. # Check if the task completed within the timeout
  26. if time.time() - start_time > timeout:
  27. logging.error(f"Task timed out after {timeout} seconds. Attempt: {attempt + 1}")
  28. continue # Retry
  29. return result # Return result if successful
  30. except Exception as e:
  31. logging.error(f"Task failed on attempt {attempt + 1}: {e}")
  32. if attempt < retries - 1:
  33. logging.warning(f"Retrying... Attempt: {attempt + 1}")
  34. time.sleep(2) # Wait before retrying
  35. else:
  36. logging.error("Task failed after all retries.")
  37. return None # Return None if all retries fail
  38. def example_task(html):
  39. """
  40. Example task function to extract title from HTML.
  41. Simulates a potentially slow task.
  42. """
  43. try:
  44. soup = BeautifulSoup(html, 'html.parser')
  45. title = soup.title.string if soup.title else "No Title"
  46. return title
  47. except Exception as e:
  48. logging.error(f"Error extracting title: {e}")
  49. return None
  50. if __name__ == '__main__':
  51. # Example Usage
  52. html_example = """
  53. <html>
  54. <head>
  55. <title>My Webpage</title>
  56. </head>
  57. <body>
  58. <h1>Hello World</h1>
  59. </body>
  60. </html>
  61. """
  62. title = wrap_html_task(html_example, example_task)
  63. if title:
  64. print(f"Extracted Title: {title}")
  65. else:
  66. print("Failed to extract title.")
  67. # Example with a failing task (simulated)
  68. def failing_task(html):
  69. raise ValueError("Simulated error in task")
  70. result = wrap_html_task(html_example, failing_task)
  71. if result:
  72. print(f"Task Result: {result}")
  73. else:
  74. print("Task failed.")

Add your comment