import re
import logging
logging.basicConfig(level=logging.INFO)
def clean_form_data(data, dry_run=False):
"""
Cleans data from web forms for hypothesis validation.
Args:
data (dict): A dictionary containing the form data.
dry_run (bool): If True, only print changes without modifying the data.
Returns:
dict: The cleaned data.
"""
cleaned_data = {}
for key, value in data.items():
if isinstance(value, str):
# Remove leading/trailing whitespace
value = value.strip()
# Convert to lowercase
value = value.lower()
# Remove special characters (keep alphanumeric and spaces)
value = re.sub(r'[^a-zA-Z0-9\s]', '', value)
cleaned_data[key] = value
elif isinstance(value, (int, float)):
cleaned_data[key] = value # Keep numeric values as they are
elif value is None:
cleaned_data[key] = None # keep null values
else:
cleaned_data[key] = str(value) # Convert other types to strings
logging.warning(f"Type conversion detected for key: {key}")
if dry_run:
print("Dry run mode: Changes will not be applied.")
print("Original Data:", data)
print("Cleaned Data:", cleaned_data)
return data
else:
return cleaned_data
if __name__ == '__main__':
# Example usage
form_data = {
"name": " John Doe! ",
"email": "john.doe@example.com",
"age": "30",
"city": None,
"phone": "123-456-7890",
"other_field": 123.45
}
cleaned_data = clean_form_data(form_data, dry_run=True)
print("Cleaned Data:", cleaned_data)
cleaned_data = clean_form_data(form_data)
print("Cleaned Data:", cleaned_data)
Add your comment