import re
def clean_log_data(log_file_path, override_file_path=None):
"""
Cleans log data from a file, allowing for manual overrides.
Args:
log_file_path (str): Path to the log file.
override_file_path (str, optional): Path to a file containing overrides
(key: value pairs for replacement).
Defaults to None.
Returns:
list: A list of cleaned log lines.
"""
cleaned_lines = []
overrides = {}
# Load overrides from file if provided
if override_file_path:
try:
with open(override_file_path, 'r') as f:
for line in f:
key, value = line.strip().split('=', 1)
overrides[key.strip()] = value.strip()
except FileNotFoundError:
print(f"Warning: Override file not found: {override_file_path}")
except ValueError:
print(f"Warning: Invalid format in override file: {override_file_path}")
try:
with open(log_file_path, 'r') as f:
for line in f:
cleaned_line = line.strip()
# 1. Remove timestamps (example: remove anything before the first space)
cleaned_line = cleaned_line.split(' ', 1)[1] if ' ' in cleaned_line else cleaned_line
# 2. Replace specific patterns (example: replace 'ERROR' with 'WARNING')
cleaned_line = re.sub(r'ERROR', 'WARNING', cleaned_line)
cleaned_line = re.sub(r'DEBUG', 'INFO', cleaned_line)
# 3. Apply overrides
for key, value in overrides.items():
cleaned_line = cleaned_line.replace(key, value)
cleaned_lines.append(cleaned_line)
except FileNotFoundError:
print(f"Error: Log file not found: {log_file_path}")
return []
except Exception as e:
print(f"An error occurred: {e}")
return []
return cleaned_lines
if __name__ == '__main__':
# Example usage:
log_file = 'sample.log'
override_file = 'overrides.txt'
# Create a sample log file
with open(log_file, 'w') as f:
f.write("2023-10-26 10:00:00 INFO This is a log message.\n")
f.write("2023-10-26 10:00:05 ERROR Something went wrong.\n")
f.write("2023-10-26 10:00:10 DEBUG This is a debug message.\n")
f.write("Another log message with ERROR.\n")
f.write("Some other text.\n")
# Create a sample override file
with open(override_file, 'w') as f:
f.write("ERROR=CRITICAL\n")
f.write("DEBUG=VERBOSE\n")
cleaned_data = clean_log_data(log_file, override_file)
for line in cleaned_data:
print(line)
Add your comment