import pandas as pd
def normalize_metadata(df, columns_to_normalize):
"""
Normalizes metadata columns in a pandas DataFrame.
Args:
df (pd.DataFrame): The input DataFrame containing metadata.
columns_to_normalize (list): A list of column names to normalize.
Returns:
pd.DataFrame: The DataFrame with normalized metadata. Returns original df if columns_to_normalize is empty.
"""
if not columns_to_normalize:
return df # Return original DataFrame if no columns to normalize
for col in columns_to_normalize:
if col in df.columns:
# Convert column to numeric if possible
try:
df[col] = pd.to_numeric(df[col], errors='coerce')
except ValueError:
pass #if conversion fails, keep as is
# Normalize the column (example: min-max scaling)
min_val = df[col].min()
max_val = df[col].max()
if max_val - min_val > 0: # Avoid division by zero
df[col] = (df[col] - min_val) / (max_val - min_val)
else:
df[col] = 0 # If min and max are equal, set to zero
return df
if __name__ == '__main__':
# Example Usage
data = {'col1': [10, 20, 30, 40, 50],
'col2': ['a', 'b', 'c', 'd', 'e'],
'col3': [1, 2, 3, 4, 5]}
df = pd.DataFrame(data)
columns_to_normalize = ['col1', 'col3'] # specify columns for normalization
normalized_df = normalize_metadata(df.copy(), columns_to_normalize) # create a copy to avoid modifying original
print(normalized_df)
Add your comment