import json
def deduplicate_json(json_data):
"""
Deduplicates records in a JSON response based on a defined key.
Performs basic sanity checks on the data.
Args:
json_data (str or list): JSON string or a list of dictionaries.
Returns:
list: A deduplicated list of dictionaries. Returns an empty list if input is invalid.
"""
if isinstance(json_data, str):
try:
data = json.loads(json_data)
except json.JSONDecodeError:
print("Error: Invalid JSON string.")
return []
elif isinstance(json_data, list):
data = json_data
else:
print("Error: Input must be a JSON string or a list of dictionaries.")
return []
if not isinstance(data, list):
print("Error: JSON data must represent a list of dictionaries.")
return []
if not data: #handle empty list
return []
# Sanity check: Ensure all elements are dictionaries
for item in data:
if not isinstance(item, dict):
print("Error: List elements must be dictionaries.")
return []
seen = set()
deduplicated_data = []
for record in data:
# Define the key to check for duplicates. Adapt as needed.
key_to_check = 'id' # Change this to your unique identifier key
if key_to_check not in record:
print(f"Warning: Record missing key '{key_to_check}'. Skipping.")
continue
value = record[key_to_check]
if value not in seen:
deduplicated_data.append(record)
seen.add(value)
else:
print(f"Duplicate record found with key '{key_to_check}': {value}. Skipping.")
return deduplicated_data
if __name__ == '__main__':
# Example Usage:
json_string = """
[
{"id": 1, "name": "Alice", "age": 30},
{"id": 2, "name": "Bob", "age": 25},
{"id": 1, "name": "Alice", "age": 30},
{"id": 3, "name": "Charlie", "age": 35},
{"name": "David", "age": 40}
]
"""
deduplicated_records = deduplicate_json(json_string)
print(json.dumps(deduplicated_records, indent=4))
#Example with a list of dictionaries:
data = [
{"id": 4, "name": "Eve", "age": 28},
{"id": 5, "name": "Frank", "age": 42},
{"id": 4, "name": "Eve", "age": 28}
]
deduplicated_records = deduplicate_json(data)
print(json.dumps(deduplicated_records, indent=4))
#Example with invalid JSON:
invalid_json = "{'name': 'John'}"
deduplicated_records = deduplicate_json(invalid_json)
print(deduplicated_records)
Add your comment