1. import pandas as pd
  2. def normalize_metadata(df, columns_to_normalize):
  3. """
  4. Normalizes metadata columns in a pandas DataFrame.
  5. Args:
  6. df (pd.DataFrame): The input DataFrame containing metadata.
  7. columns_to_normalize (list): A list of column names to normalize.
  8. Returns:
  9. pd.DataFrame: The DataFrame with normalized metadata. Returns original df if columns_to_normalize is empty.
  10. """
  11. if not columns_to_normalize:
  12. return df # Return original DataFrame if no columns to normalize
  13. for col in columns_to_normalize:
  14. if col in df.columns:
  15. # Convert column to numeric if possible
  16. try:
  17. df[col] = pd.to_numeric(df[col], errors='coerce')
  18. except ValueError:
  19. pass #if conversion fails, keep as is
  20. # Normalize the column (example: min-max scaling)
  21. min_val = df[col].min()
  22. max_val = df[col].max()
  23. if max_val - min_val > 0: # Avoid division by zero
  24. df[col] = (df[col] - min_val) / (max_val - min_val)
  25. else:
  26. df[col] = 0 # If min and max are equal, set to zero
  27. return df
  28. if __name__ == '__main__':
  29. # Example Usage
  30. data = {'col1': [10, 20, 30, 40, 50],
  31. 'col2': ['a', 'b', 'c', 'd', 'e'],
  32. 'col3': [1, 2, 3, 4, 5]}
  33. df = pd.DataFrame(data)
  34. columns_to_normalize = ['col1', 'col3'] # specify columns for normalization
  35. normalized_df = normalize_metadata(df.copy(), columns_to_normalize) # create a copy to avoid modifying original
  36. print(normalized_df)

Add your comment