import re
def filter_data(filepath, keywords):
"""
Filters data from a file based on specified keywords.
Args:
filepath (str): Path to the input file.
keywords (list): A list of keywords to filter for.
Returns:
list: A list of lines from the file containing at least one keyword.
Returns an empty list if file not found or if no keywords are provided.
"""
if not keywords:
return []
try:
with open(filepath, 'r') as f:
lines = f.readlines()
except FileNotFoundError:
print(f"Error: File not found at {filepath}")
return []
filtered_lines = []
for line in lines:
# Convert line to lowercase for case-insensitive matching
line_lower = line.lower()
for keyword in keywords:
if re.search(r'\b' + re.escape(keyword.lower()) + r'\b', line_lower): # Use regex for word boundary
filtered_lines.append(line.strip()) # Add line, removing leading/trailing whitespace
break # Avoid adding the same line multiple times if multiple keywords match
return filtered_lines
if __name__ == '__main__':
#Example Usage
filepath = "data.txt" # Replace with your file path
keywords = ["apple", "banana", "orange"]
#Create a dummy data.txt file for testing
with open(filepath, "w") as f:
f.write("This is an apple.\n")
f.write("I like banana smoothies.\n")
f.write("Orange juice is refreshing.\n")
f.write("This line has nothing.\n")
f.write("Apple pie is delicious.\n")
filtered_data = filter_data(filepath, keywords)
if filtered_data:
for line in filtered_data:
print(line)
else:
print("No matching data found.")
Add your comment