import pandas as pd
import numpy as np
def flag_cookie_anomalies(df, threshold=3):
"""
Flags potential cookie anomalies in a DataFrame.
Args:
df (pd.DataFrame): DataFrame containing cookie data. Expected columns: 'cookie_name', 'cookie_value', 'timestamp'.
threshold (int): Number of standard deviations from the mean to consider a value anomalous.
Returns:
pd.DataFrame: DataFrame with an added 'is_anomalous' column, indicating if a cookie value is anomalous.
"""
# Input validation: Check if DataFrame is valid and contains required columns
if not isinstance(df, pd.DataFrame):
raise TypeError("Input must be a pandas DataFrame.")
required_columns = ['cookie_name', 'cookie_value', 'timestamp']
if not all(col in df.columns for col in required_columns):
raise ValueError(f"DataFrame must contain columns: {required_columns}")
# Convert cookie_value to numeric, handling potential errors
try:
df['cookie_value'] = pd.to_numeric(df['cookie_value'])
except ValueError:
raise ValueError("The 'cookie_value' column must contain numeric values.")
# Calculate mean and standard deviation of cookie values
mean = df['cookie_value'].mean()
std = df['cookie_value'].std()
# Identify anomalous cookie values
df['is_anomalous'] = (df['cookie_value'] > (mean + threshold * std)) | (df['cookie_value'] < (mean - threshold * std))
return df
if __name__ == '__main__':
#Example Usage
data = {'cookie_name': ['session_id', 'user_id', 'preferences', 'login_status', 'cart_items'],
'cookie_value': [12345, 67890, 10, 'true', 5],
'timestamp': ['2024-01-01 10:00:00', '2024-01-01 10:01:00', '2024-01-01 10:02:00', '2024-01-01 10:03:00', '2024-01-01 10:04:00']}
df = pd.DataFrame(data)
try:
anomalies_df = flag_cookie_anomalies(df)
print(anomalies_df)
except (TypeError, ValueError) as e:
print(f"Error: {e}")
Add your comment