import time
from bs4 import BeautifulSoup
import requests
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def wrap_html_task(html_content, task_function, timeout=10, retries=3):
"""
Wraps an HTML document processing task with timeout and retry logic.
Args:
html_content (str): The HTML content to process.
task_function (callable): The function to apply to the HTML content.
Should accept the HTML content as input.
timeout (int): Timeout in seconds for the task.
retries (int): Number of retries if the task fails.
Returns:
The result of the task_function if successful, None otherwise.
"""
for attempt in range(retries):
try:
# Start timer
start_time = time.time()
# Execute the task
result = task_function(html_content)
# Check if the task completed within the timeout
if time.time() - start_time > timeout:
logging.error(f"Task timed out after {timeout} seconds. Attempt: {attempt + 1}")
continue # Retry
return result # Return result if successful
except Exception as e:
logging.error(f"Task failed on attempt {attempt + 1}: {e}")
if attempt < retries - 1:
logging.warning(f"Retrying... Attempt: {attempt + 1}")
time.sleep(2) # Wait before retrying
else:
logging.error("Task failed after all retries.")
return None # Return None if all retries fail
def example_task(html):
"""
Example task function to extract title from HTML.
Simulates a potentially slow task.
"""
try:
soup = BeautifulSoup(html, 'html.parser')
title = soup.title.string if soup.title else "No Title"
return title
except Exception as e:
logging.error(f"Error extracting title: {e}")
return None
if __name__ == '__main__':
# Example Usage
html_example = """
<html>
<head>
<title>My Webpage</title>
</head>
<body>
<h1>Hello World</h1>
</body>
</html>
"""
title = wrap_html_task(html_example, example_task)
if title:
print(f"Extracted Title: {title}")
else:
print("Failed to extract title.")
# Example with a failing task (simulated)
def failing_task(html):
raise ValueError("Simulated error in task")
result = wrap_html_task(html_example, failing_task)
if result:
print(f"Task Result: {result}")
else:
print("Task failed.")
Add your comment