1. import json
  2. import argparse
  3. import os
  4. def transform_data(input_file, output_file, schema_file, overrides):
  5. """
  6. Transforms data from an input file based on a schema and manual overrides.
  7. Args:
  8. input_file (str): Path to the input data file (JSON).
  9. output_file (str): Path to the output data file (JSON).
  10. schema_file (str): Path to the schema file (JSON).
  11. overrides (dict): Dictionary of field overrides. Key is field name, value is new value.
  12. """
  13. try:
  14. with open(input_file, 'r') as f:
  15. data = json.load(f)
  16. except FileNotFoundError:
  17. print(f"Error: Input file not found: {input_file}")
  18. return
  19. except json.JSONDecodeError:
  20. print(f"Error: Invalid JSON in input file: {input_file}")
  21. return
  22. try:
  23. with open(schema_file, 'r') as f:
  24. schema = json.load(f)
  25. except FileNotFoundError:
  26. print(f"Error: Schema file not found: {schema_file}")
  27. return
  28. except json.JSONDecodeError:
  29. print(f"Error: Invalid JSON in schema file: {schema_file}")
  30. return
  31. transformed_data = []
  32. for item in data:
  33. transformed_item = {}
  34. for field in schema['fields']:
  35. field_name = field['name']
  36. field_type = field['type']
  37. #Apply override if present
  38. if field_name in overrides:
  39. transformed_item[field_name] = overrides[field_name]
  40. continue
  41. if field_type == 'string':
  42. transformed_item[field_name] = str(item.get(field_name, "")) #Handle missing fields
  43. elif field_type == 'integer':
  44. try:
  45. transformed_item[field_name] = int(item.get(field_name, 0)) #Handle missing fields
  46. except (ValueError, TypeError):
  47. transformed_item[field_name] = 0 #Default value if conversion fails
  48. elif field_type == 'float':
  49. try:
  50. transformed_item[field_name] = float(item.get(field_name, 0.0)) #Handle missing fields
  51. except (ValueError, TypeError):
  52. transformed_item[field_name] = 0.0 #Default value if conversion fails
  53. elif field_type == 'boolean':
  54. transformed_item[field_name] = bool(item.get(field_name, False)) #Handle missing fields
  55. elif field_type == 'array':
  56. transformed_item[field_name] = item.get(field_name, []) #Handle missing fields
  57. else:
  58. transformed_item[field_name] = item.get(field_name, "") #Default to empty string for unknown types
  59. transformed_data.append(transformed_item)
  60. try:
  61. with open(output_file, 'w') as f:
  62. json.dump(transformed_data, f, indent=4)
  63. except IOError:
  64. print(f"Error: Could not write to output file: {output_file}")
  65. return
  66. print(f"Data transformed and saved to: {output_file}")
  67. if __name__ == "__main__":
  68. parser = argparse.ArgumentParser(description="Transform data from a JSON file.")
  69. parser.add_argument("input_file", help="Path to the input JSON file.")
  70. parser.add_argument("output_file", help="Path to the output JSON file.")
  71. parser.add_argument("schema_file", help="Path to the schema JSON file.")
  72. parser.add_argument("--overrides", help="Path to a JSON file containing field overrides.")
  73. args = parser.parse_args()
  74. overrides = {}
  75. if args.overrides:
  76. try:
  77. with open(args.overrides, 'r') as f:
  78. overrides = json.load(f)
  79. except FileNotFoundError:
  80. print(f"Error: Overrides file not found: {args.overrides}")
  81. exit(1)
  82. except json.JSONDecodeError:
  83. print(f"Error: Invalid JSON in overrides file: {args.overrides}")
  84. exit(1)

Add your comment