import re
import csv
def transform_file(input_filepath, output_filepath, transform_function):
"""
Transforms data in a file using a provided function.
Args:
input_filepath (str): Path to the input file.
output_filepath (str): Path to the output file.
transform_function (callable): Function to apply to each line of data.
Takes a single line (str) as input and returns the transformed line (str).
"""
try:
with open(input_filepath, 'r', encoding='utf-8') as infile, \
open(output_filepath, 'w', encoding='utf-8') as outfile:
# Iterate through each line in the input file
for line in infile:
# Apply the transformation function to the current line
transformed_line = transform_function(line)
# Write the transformed line to the output file
outfile.write(transformed_line)
except FileNotFoundError:
print(f"Error: Input file not found at {input_filepath}")
except Exception as e:
print(f"An error occurred: {e}")
def clean_data(line):
"""
Removes leading/trailing whitespace from a line.
"""
return line.strip()
def convert_to_uppercase(line):
"""
Converts a line to uppercase.
"""
return line.upper()
def replace_pattern(line, pattern, replacement):
"""
Replaces all occurrences of a pattern in a line with a replacement string.
"""
return re.sub(pattern, replacement, line)
def extract_field(line, field_regex):
"""
Extracts a specific field from a line using a regular expression.
Returns the extracted field or None if not found.
"""
match = re.search(field_regex, line)
if match:
return match.group(1) # Return the captured group
else:
return None
def split_and_combine(line, delimiter, combine_char):
"""
Splits a line based on a delimiter and combines the resulting parts with a character.
"""
parts = line.split(delimiter)
return combine_char.join(parts)
if __name__ == '__main__':
# Example Usage:
# 1. Clean data and save to a new file
transform_file("input.txt", "output_cleaned.txt", clean_data)
# 2. Convert to uppercase
transform_file("input.txt", "output_uppercase.txt", convert_to_uppercase)
# 3. Replace a pattern
transform_file("input.txt", "output_replaced.txt", lambda line: replace_pattern(line, r"old_pattern", "new_string"))
# 4. Extract a field using regex
transform_file("input.txt", "output_extracted.txt", lambda line: extract_field(line, r"(\d+)\s+name"))
# 5. Split and combine
transform_file("input.txt", "output_combined.txt", lambda line: split_and_combine(line, ",", "-"))
Add your comment