import pandas as pd
import os
import json
def process_batch(file_path, limit=1000):
"""Processes a single file with metadata limits.
Args:
file_path (str): Path to the file.
limit (int): Maximum number of rows to process.
"""
try:
# Read the file into a Pandas DataFrame
df = pd.read_csv(file_path)
# Check if the DataFrame exceeds the limit
if len(df) > limit:
print(f"File {file_path} exceeds processing limit ({limit}). Processing first {limit} rows.")
df = df.head(limit) #Process only the first 'limit' rows.
# Perform some operation on the DataFrame (example: calculate sum of a column)
total_value = df['value'].sum() #Assuming 'value' is a column in the dataframe
print(f"Processed {len(df)} rows from {file_path}. Total value: {total_value}")
# Save the results (optional)
with open(f"{os.path.splitext(file_path)[0]}_processed.csv", "w") as f:
df.to_csv(f, index=False)
except Exception as e:
print(f"Error processing {file_path}: {e}")
def main():
"""Main function to process multiple files with metadata limits."""
# Define the directory containing the files
input_dir = "data" # Replace with your directory
# Define the processing limit
processing_limit = 1000
# Iterate through all files in the directory
for filename in os.listdir(input_dir):
if filename.endswith(".csv"): # Process only CSV files
file_path = os.path.join(input_dir, filename)
process_batch(file_path, limit=processing_limit)
if __name__ == "__main__":
# Create the 'data' directory if it doesn't exist, and populate it with sample files
if not os.path.exists("data"):
os.makedirs("data")
#Sample CSV file creation
data1 = {'value': [1, 2, 3, 4, 5] for _ in range(2000)}
df1 = pd.DataFrame(data1)
df1.to_csv("data/file1.csv", index=False)
data2 = {'value': [6, 7, 8, 9, 10] for _ in range(500)}
df2 = pd.DataFrame(data2)
df2.to_csv("data/file2.csv", index=False)
main()
Add your comment