import json
import argparse
import os
def transform_data(input_file, output_file, schema_file, overrides):
"""
Transforms data from an input file based on a schema and manual overrides.
Args:
input_file (str): Path to the input data file (JSON).
output_file (str): Path to the output data file (JSON).
schema_file (str): Path to the schema file (JSON).
overrides (dict): Dictionary of field overrides. Key is field name, value is new value.
"""
try:
with open(input_file, 'r') as f:
data = json.load(f)
except FileNotFoundError:
print(f"Error: Input file not found: {input_file}")
return
except json.JSONDecodeError:
print(f"Error: Invalid JSON in input file: {input_file}")
return
try:
with open(schema_file, 'r') as f:
schema = json.load(f)
except FileNotFoundError:
print(f"Error: Schema file not found: {schema_file}")
return
except json.JSONDecodeError:
print(f"Error: Invalid JSON in schema file: {schema_file}")
return
transformed_data = []
for item in data:
transformed_item = {}
for field in schema['fields']:
field_name = field['name']
field_type = field['type']
#Apply override if present
if field_name in overrides:
transformed_item[field_name] = overrides[field_name]
continue
if field_type == 'string':
transformed_item[field_name] = str(item.get(field_name, "")) #Handle missing fields
elif field_type == 'integer':
try:
transformed_item[field_name] = int(item.get(field_name, 0)) #Handle missing fields
except (ValueError, TypeError):
transformed_item[field_name] = 0 #Default value if conversion fails
elif field_type == 'float':
try:
transformed_item[field_name] = float(item.get(field_name, 0.0)) #Handle missing fields
except (ValueError, TypeError):
transformed_item[field_name] = 0.0 #Default value if conversion fails
elif field_type == 'boolean':
transformed_item[field_name] = bool(item.get(field_name, False)) #Handle missing fields
elif field_type == 'array':
transformed_item[field_name] = item.get(field_name, []) #Handle missing fields
else:
transformed_item[field_name] = item.get(field_name, "") #Default to empty string for unknown types
transformed_data.append(transformed_item)
try:
with open(output_file, 'w') as f:
json.dump(transformed_data, f, indent=4)
except IOError:
print(f"Error: Could not write to output file: {output_file}")
return
print(f"Data transformed and saved to: {output_file}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Transform data from a JSON file.")
parser.add_argument("input_file", help="Path to the input JSON file.")
parser.add_argument("output_file", help="Path to the output JSON file.")
parser.add_argument("schema_file", help="Path to the schema JSON file.")
parser.add_argument("--overrides", help="Path to a JSON file containing field overrides.")
args = parser.parse_args()
overrides = {}
if args.overrides:
try:
with open(args.overrides, 'r') as f:
overrides = json.load(f)
except FileNotFoundError:
print(f"Error: Overrides file not found: {args.overrides}")
exit(1)
except json.JSONDecodeError:
print(f"Error: Invalid JSON in overrides file: {args.overrides}")
exit(1)
Add your comment