import json
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
def deserialize_dataset(data, data_type="json"):
"""
Deserializes dataset input based on the specified data type.
Args:
data (str): The input data as a string.
data_type (str): The type of data (e.g., "json", "parquet").
Returns:
pandas.DataFrame or pyarrow.Table or None: The deserialized dataset
as a pandas DataFrame or
pyarrow Table, or None if
deserialization fails.
"""
try:
if data_type == "json":
# Deserialize JSON data to pandas DataFrame
df = pd.read_json(data)
return df
elif data_type == "parquet":
# Deserialize Parquet data to pyarrow Table
table = pq.read_table(data)
return table
else:
print(f"Error: Unsupported data type: {data_type}")
return None # Return None for unsupported types
except json.JSONDecodeError as e:
print(f"Error decoding JSON: {e}")
return None
except Exception as e:
print(f"Error deserializing dataset: {e}")
return None
if __name__ == '__main__':
# Example Usage (for testing)
# JSON Example
json_data = '{"name": ["Alice", "Bob"], "age": [30, 25]}'
df = deserialize_dataset(json_data, "json")
if df is not None:
print("JSON Deserialized DataFrame:")
print(df)
# Parquet Example
parquet_data = "data=your_parquet_file.parquet" #replace with path to your parquet file
table = deserialize_dataset(parquet_data, "parquet")
if table is not None:
print("\nParquet Deserialized Table:")
print(table)
print(table.schema)
#Example of error handling
invalid_json = '{"name": "Alice", "age": 30' #missing closing bracket
df = deserialize_dataset(invalid_json, "json")
if df is None:
print("\nJSON deserialization failed as expected.")
Add your comment