1. import re
  2. def sanitize_file_contents(file_content, allowed_chars=None, min_length=0, max_length=None):
  3. """
  4. Sanitizes file contents for validation checks with fallback logic.
  5. Args:
  6. file_content (str): The content of the file to sanitize.
  7. allowed_chars (str, optional): A string containing the characters allowed in the file content.
  8. If None, all characters are allowed. Defaults to None.
  9. min_length (int, optional): The minimum length of the file content. Defaults to 0.
  10. max_length (int, optional): The maximum length of the file content. Defaults to None (no limit).
  11. Returns:
  12. str: The sanitized file content. Returns the original content if sanitization fails.
  13. """
  14. if allowed_chars is None:
  15. allowed_chars = r"^\w+$" # Allow alphanumeric characters
  16. if not isinstance(file_content, str):
  17. return file_content #or raise TypeError("Input must be a string")
  18. if not re.match(allowed_chars, file_content):
  19. # Fallback: Remove invalid characters
  20. sanitized_content = re.sub(r'[^\w\s]', '', file_content) #remove special characters
  21. if not re.match(allowed_chars, sanitized_content):
  22. return file_content #return original if still invalid
  23. if len(file_content) < min_length:
  24. # Fallback: Pad with spaces
  25. sanitized_content = file_content.ljust(min_length, ' ')
  26. if max_length is not None and len(file_content) > max_length:
  27. # Fallback: Truncate
  28. sanitized_content = file_content[:max_length]
  29. return sanitized_content

Add your comment