1. import requests
  2. from bs4 import BeautifulSoup
  3. import json
  4. import logging
  5. import time
  6. import random
  7. from urllib.parse import urljoin
  8. # Configure logging
  9. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  10. def batch_cookie_operations(url, cookie_data, operation_type, max_retries=3):
  11. """
  12. Performs batch operations on a given URL using provided cookies.
  13. Args:
  14. url (str): The URL to perform operations on.
  15. cookie_data (dict): A dictionary containing session cookies.
  16. operation_type (str): The type of operation (e.g., 'get_data', 'post_data').
  17. max_retries (int): Maximum number of retries for each operation.
  18. Returns:
  19. list: A list of results from the operations. Returns None if a critical error occurs.
  20. """
  21. results = []
  22. for attempt in range(max_retries):
  23. try:
  24. if operation_type == 'get_data':
  25. response = requests.get(url, cookies=cookie_data, timeout=10) # Add timeout
  26. response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
  27. results.append(response.text)
  28. logging.info(f"Successfully retrieved data from {url} (attempt {attempt + 1})")
  29. break # Exit retry loop on success
  30. elif operation_type == 'post_data':
  31. data = {'key1': 'value1', 'key2': 'value2'} #example data
  32. response = requests.post(url, cookies=cookie_data, data=data, timeout=10)
  33. response.raise_for_status()
  34. results.append(response.text)
  35. logging.info(f"Successfully posted data to {url} (attempt {attempt + 1})")
  36. break #Exit retry loop on success
  37. else:
  38. logging.error(f"Unsupported operation type: {operation_type}")
  39. return None
  40. except requests.exceptions.RequestException as e:
  41. logging.warning(f"Request failed for {url} (attempt {attempt + 1}): {e}")
  42. if attempt < max_retries - 1:
  43. sleep_time = random.uniform(1, 5) #random sleep to avoid overwhelming server
  44. logging.info(f"Retrying in {sleep_time:.2f} seconds...")
  45. time.sleep(sleep_time)
  46. else:
  47. logging.error(f"Request failed for {url} after {max_retries} attempts.")
  48. return None #Critical failure, stop further operations
  49. except Exception as e:
  50. logging.exception(f"An unexpected error occurred for {url} (attempt {attempt + 1}): {e}")
  51. return None #Critical failure, stop further operations
  52. return results
  53. def process_data_migration(migration_config):
  54. """
  55. Processes data migration operations based on the provided configuration.
  56. Args:
  57. migration_config (dict): A dictionary containing migration parameters (URLs, cookies, operation types).
  58. Returns:
  59. dict: A dictionary containing the results of the operations. Returns None if a critical error occurs.
  60. """
  61. all_results = {}
  62. for url, cookie_data, operation_type in migration_config.items():
  63. logging.info(f"Processing {url} with {operation_type} operation.")
  64. results = batch_cookie_operations(url, cookie_data, operation_type)
  65. if results is None:
  66. logging.error(f"Data migration failed for {url}. Aborting.")
  67. return None
  68. else:
  69. all_results[url] = results
  70. return all_results
  71. if __name__ == '__main__':
  72. # Example usage
  73. migration_config = {
  74. "https://example.com/data1": {
  75. "cookies": {"session_id": "12345", "user_agent": "Mozilla/5.0"},
  76. "operation_type": "get_data"
  77. },
  78. "https://example.com/data2": {
  79. "cookies": {"auth_token": "abcdefg"},
  80. "operation_type": "post_data"
  81. },
  82. "https://example.com/data3": {
  83. "cookies": {"

Add your comment