#!/usr/bin/env python # coding: utf-8 # In[1]: # Add the facets overview python code to the python path import sys sys.path.append('./python') # In[2]: # Load UCI census train and test data into dataframes. import pandas as pd features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Martial Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country", "Target"] train_data = pd.read_csv( "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", names=features, sep=r'\s*,\s*', engine='python', na_values="?") test_data = pd.read_csv( "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", names=features, sep=r'\s*,\s*', skiprows=[0], engine='python', na_values="?") # In[3]: # Calculate the feature statistics proto from the datasets and stringify it for use in facets overview from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator import base64 gfsg = GenericFeatureStatisticsGenerator() proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': train_data}, {'name': 'test', 'table': test_data}]) protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8") # In[4]: # Display the facets overview visualization for this data from IPython.core.display import display, HTML HTML_TEMPLATE = """ """ html = HTML_TEMPLATE.format(protostr=protostr) display(HTML(html))