1. import pandas as pd
  2. def merge_date_datasets(df1, df2, date_column1, date_column2, merge_type='inner'):
  3. """
  4. Merges two pandas DataFrames based on date columns, handling edge cases.
  5. Args:
  6. df1 (pd.DataFrame): The first DataFrame.
  7. df2 (pd.DataFrame): The second DataFrame.
  8. date_column1 (str): Name of the date column in df1.
  9. date_column2 (str): Name of the date column in df2.
  10. merge_type (str, optional): Type of merge ('inner', 'outer', 'left', 'right'). Defaults to 'inner'.
  11. Returns:
  12. pd.DataFrame: The merged DataFrame. Returns None if either dataframe is invalid.
  13. Raises:
  14. TypeError: if df1 or df2 are not pandas DataFrames.
  15. ValueError: if date_column1 or date_column2 are not strings.
  16. """
  17. if not isinstance(df1, pd.DataFrame) or not isinstance(df2, pd.DataFrame):
  18. raise TypeError("df1 and df2 must be pandas DataFrames.")
  19. if not isinstance(date_column1, str) or not isinstance(date_column2, str):
  20. raise ValueError("date_column1 and date_column2 must be strings.")
  21. # Convert date columns to datetime objects, handling potential errors
  22. try:
  23. df1[date_column1] = pd.to_datetime(df1[date_column1], errors='coerce') #coerce invalid values to NaT
  24. df2[date_column2] = pd.to_datetime(df2[date_column2], errors='coerce') #coerce invalid values to NaT
  25. except KeyError as e:
  26. print(f"Error: Date column not found: {e}")
  27. return None
  28. except Exception as e:
  29. print(f"Error converting date columns: {e}")
  30. return None
  31. #Handle cases where date columns might be empty
  32. if df1[date_column1].empty or df2[date_column2].empty:
  33. print("Warning: One or both date columns are empty. Returning empty dataframe")
  34. return pd.DataFrame() #Return empty dataframe
  35. # Perform the merge
  36. try:
  37. merged_df = pd.merge(df1, df2, left_on=date_column1, right_on=date_column2, how=merge_type)
  38. except Exception as e:
  39. print(f"Error during merge: {e}")
  40. return None
  41. return merged_df

Add your comment