1. import json
  2. def deduplicate_json(json_data):
  3. """
  4. Deduplicates records in a JSON response based on a defined key.
  5. Performs basic sanity checks on the data.
  6. Args:
  7. json_data (str or list): JSON string or a list of dictionaries.
  8. Returns:
  9. list: A deduplicated list of dictionaries. Returns an empty list if input is invalid.
  10. """
  11. if isinstance(json_data, str):
  12. try:
  13. data = json.loads(json_data)
  14. except json.JSONDecodeError:
  15. print("Error: Invalid JSON string.")
  16. return []
  17. elif isinstance(json_data, list):
  18. data = json_data
  19. else:
  20. print("Error: Input must be a JSON string or a list of dictionaries.")
  21. return []
  22. if not isinstance(data, list):
  23. print("Error: JSON data must represent a list of dictionaries.")
  24. return []
  25. if not data: #handle empty list
  26. return []
  27. # Sanity check: Ensure all elements are dictionaries
  28. for item in data:
  29. if not isinstance(item, dict):
  30. print("Error: List elements must be dictionaries.")
  31. return []
  32. seen = set()
  33. deduplicated_data = []
  34. for record in data:
  35. # Define the key to check for duplicates. Adapt as needed.
  36. key_to_check = 'id' # Change this to your unique identifier key
  37. if key_to_check not in record:
  38. print(f"Warning: Record missing key '{key_to_check}'. Skipping.")
  39. continue
  40. value = record[key_to_check]
  41. if value not in seen:
  42. deduplicated_data.append(record)
  43. seen.add(value)
  44. else:
  45. print(f"Duplicate record found with key '{key_to_check}': {value}. Skipping.")
  46. return deduplicated_data
  47. if __name__ == '__main__':
  48. # Example Usage:
  49. json_string = """
  50. [
  51. {"id": 1, "name": "Alice", "age": 30},
  52. {"id": 2, "name": "Bob", "age": 25},
  53. {"id": 1, "name": "Alice", "age": 30},
  54. {"id": 3, "name": "Charlie", "age": 35},
  55. {"name": "David", "age": 40}
  56. ]
  57. """
  58. deduplicated_records = deduplicate_json(json_string)
  59. print(json.dumps(deduplicated_records, indent=4))
  60. #Example with a list of dictionaries:
  61. data = [
  62. {"id": 4, "name": "Eve", "age": 28},
  63. {"id": 5, "name": "Frank", "age": 42},
  64. {"id": 4, "name": "Eve", "age": 28}
  65. ]
  66. deduplicated_records = deduplicate_json(data)
  67. print(json.dumps(deduplicated_records, indent=4))
  68. #Example with invalid JSON:
  69. invalid_json = "{'name': 'John'}"
  70. deduplicated_records = deduplicate_json(invalid_json)
  71. print(deduplicated_records)

Add your comment