1. import re
  2. def filter_data(filepath, keywords):
  3. """
  4. Filters data from a file based on specified keywords.
  5. Args:
  6. filepath (str): Path to the input file.
  7. keywords (list): A list of keywords to filter for.
  8. Returns:
  9. list: A list of lines from the file containing at least one keyword.
  10. Returns an empty list if file not found or if no keywords are provided.
  11. """
  12. if not keywords:
  13. return []
  14. try:
  15. with open(filepath, 'r') as f:
  16. lines = f.readlines()
  17. except FileNotFoundError:
  18. print(f"Error: File not found at {filepath}")
  19. return []
  20. filtered_lines = []
  21. for line in lines:
  22. # Convert line to lowercase for case-insensitive matching
  23. line_lower = line.lower()
  24. for keyword in keywords:
  25. if re.search(r'\b' + re.escape(keyword.lower()) + r'\b', line_lower): # Use regex for word boundary
  26. filtered_lines.append(line.strip()) # Add line, removing leading/trailing whitespace
  27. break # Avoid adding the same line multiple times if multiple keywords match
  28. return filtered_lines
  29. if __name__ == '__main__':
  30. #Example Usage
  31. filepath = "data.txt" # Replace with your file path
  32. keywords = ["apple", "banana", "orange"]
  33. #Create a dummy data.txt file for testing
  34. with open(filepath, "w") as f:
  35. f.write("This is an apple.\n")
  36. f.write("I like banana smoothies.\n")
  37. f.write("Orange juice is refreshing.\n")
  38. f.write("This line has nothing.\n")
  39. f.write("Apple pie is delicious.\n")
  40. filtered_data = filter_data(filepath, keywords)
  41. if filtered_data:
  42. for line in filtered_data:
  43. print(line)
  44. else:
  45. print("No matching data found.")

Add your comment