1. import pandas as pd
  2. import os
  3. import json
  4. def process_batch(file_path, limit=1000):
  5. """Processes a single file with metadata limits.
  6. Args:
  7. file_path (str): Path to the file.
  8. limit (int): Maximum number of rows to process.
  9. """
  10. try:
  11. # Read the file into a Pandas DataFrame
  12. df = pd.read_csv(file_path)
  13. # Check if the DataFrame exceeds the limit
  14. if len(df) > limit:
  15. print(f"File {file_path} exceeds processing limit ({limit}). Processing first {limit} rows.")
  16. df = df.head(limit) #Process only the first 'limit' rows.
  17. # Perform some operation on the DataFrame (example: calculate sum of a column)
  18. total_value = df['value'].sum() #Assuming 'value' is a column in the dataframe
  19. print(f"Processed {len(df)} rows from {file_path}. Total value: {total_value}")
  20. # Save the results (optional)
  21. with open(f"{os.path.splitext(file_path)[0]}_processed.csv", "w") as f:
  22. df.to_csv(f, index=False)
  23. except Exception as e:
  24. print(f"Error processing {file_path}: {e}")
  25. def main():
  26. """Main function to process multiple files with metadata limits."""
  27. # Define the directory containing the files
  28. input_dir = "data" # Replace with your directory
  29. # Define the processing limit
  30. processing_limit = 1000
  31. # Iterate through all files in the directory
  32. for filename in os.listdir(input_dir):
  33. if filename.endswith(".csv"): # Process only CSV files
  34. file_path = os.path.join(input_dir, filename)
  35. process_batch(file_path, limit=processing_limit)
  36. if __name__ == "__main__":
  37. # Create the 'data' directory if it doesn't exist, and populate it with sample files
  38. if not os.path.exists("data"):
  39. os.makedirs("data")
  40. #Sample CSV file creation
  41. data1 = {'value': [1, 2, 3, 4, 5] for _ in range(2000)}
  42. df1 = pd.DataFrame(data1)
  43. df1.to_csv("data/file1.csv", index=False)
  44. data2 = {'value': [6, 7, 8, 9, 10] for _ in range(500)}
  45. df2 = pd.DataFrame(data2)
  46. df2.to_csv("data/file2.csv", index=False)
  47. main()

Add your comment