1. import json
  2. from urllib.parse import urlparse
  3. def extract_urls_from_config(config_file):
  4. """
  5. Extracts URLs from a configuration file.
  6. Args:
  7. config_file (str): Path to the configuration file (JSON).
  8. Returns:
  9. list: A list of extracted URLs. Returns an empty list if the file
  10. doesn't exist or is invalid.
  11. """
  12. try:
  13. with open(config_file, 'r') as f:
  14. config = json.load(f)
  15. except FileNotFoundError:
  16. print(f"Error: Configuration file not found: {config_file}")
  17. return []
  18. except json.JSONDecodeError:
  19. print(f"Error: Invalid JSON format in {config_file}")
  20. return []
  21. urls = []
  22. # Iterate through the configuration data. Assumes 'urls' key.
  23. if 'urls' in config:
  24. for url_data in config['urls']:
  25. if isinstance(url_data, str): # Handle simple string URLs
  26. urls.append(url_data)
  27. elif isinstance(url_data, dict) and 'value' in url_data: #Handle dictionary with a value
  28. urls.append(url_data['value'])
  29. elif isinstance(url_data, list): #Handle list of strings
  30. for item in url_data:
  31. if isinstance(item, str):
  32. urls.append(item)
  33. else:
  34. print(f"Warning: Unexpected URL data format: {url_data}")
  35. return urls
  36. def validate_url(url):
  37. """
  38. Validates a URL using urllib.parse.urlparse.
  39. Args:
  40. url (str): The URL to validate.
  41. Returns:
  42. bool: True if the URL is valid, False otherwise.
  43. """
  44. try:
  45. result = urlparse(url)
  46. return all([result.scheme, result.netloc]) # Check scheme and network location
  47. except:
  48. return False
  49. def main():
  50. """
  51. Main function to demonstrate URL extraction.
  52. """
  53. config_file = 'config.json' # Replace with your config file name
  54. extracted_urls = extract_urls_from_config(config_file)
  55. valid_urls = [url for url in extracted_urls if validate_url(url)]
  56. print("Extracted URLs:")
  57. for url in valid_urls:
  58. print(url)
  59. if __name__ == "__main__":
  60. main()

Add your comment