1. import pandas as pd
  2. import numpy as np
  3. def flag_cookie_anomalies(df, threshold=3):
  4. """
  5. Flags potential cookie anomalies in a DataFrame.
  6. Args:
  7. df (pd.DataFrame): DataFrame containing cookie data. Expected columns: 'cookie_name', 'cookie_value', 'timestamp'.
  8. threshold (int): Number of standard deviations from the mean to consider a value anomalous.
  9. Returns:
  10. pd.DataFrame: DataFrame with an added 'is_anomalous' column, indicating if a cookie value is anomalous.
  11. """
  12. # Input validation: Check if DataFrame is valid and contains required columns
  13. if not isinstance(df, pd.DataFrame):
  14. raise TypeError("Input must be a pandas DataFrame.")
  15. required_columns = ['cookie_name', 'cookie_value', 'timestamp']
  16. if not all(col in df.columns for col in required_columns):
  17. raise ValueError(f"DataFrame must contain columns: {required_columns}")
  18. # Convert cookie_value to numeric, handling potential errors
  19. try:
  20. df['cookie_value'] = pd.to_numeric(df['cookie_value'])
  21. except ValueError:
  22. raise ValueError("The 'cookie_value' column must contain numeric values.")
  23. # Calculate mean and standard deviation of cookie values
  24. mean = df['cookie_value'].mean()
  25. std = df['cookie_value'].std()
  26. # Identify anomalous cookie values
  27. df['is_anomalous'] = (df['cookie_value'] > (mean + threshold * std)) | (df['cookie_value'] < (mean - threshold * std))
  28. return df
  29. if __name__ == '__main__':
  30. #Example Usage
  31. data = {'cookie_name': ['session_id', 'user_id', 'preferences', 'login_status', 'cart_items'],
  32. 'cookie_value': [12345, 67890, 10, 'true', 5],
  33. 'timestamp': ['2024-01-01 10:00:00', '2024-01-01 10:01:00', '2024-01-01 10:02:00', '2024-01-01 10:03:00', '2024-01-01 10:04:00']}
  34. df = pd.DataFrame(data)
  35. try:
  36. anomalies_df = flag_cookie_anomalies(df)
  37. print(anomalies_df)
  38. except (TypeError, ValueError) as e:
  39. print(f"Error: {e}")

Add your comment