Photo by Joshua Aragon on Unsplash
def initial_eda(df):
# List of categorical columns
cat_cols = df.select_dtypes('object').columns

for col in cat_cols:
# Formatting
column_name = col.title().replace('_', ' ')
title= 'Distribution of ' + column_name

# Unique values <= 12 to avoid overcrowding
if len(df[col].value_counts())<=12:
plt.figure(figsize = (8, 6))
sns.countplot(x=df[col],
data=df,
palette="Paired",
order = df[col].value_counts().index)
plt.title(title, fontsize = 18, pad = 12)
plt.xlabel(column_name, fontsize = 15)
plt.xticks(rotation=20)
plt.ylabel("Frequency",fontsize = 15)
plt.show();
else:
print(f'{column_name} has {len(df[col].value_counts())} unique values. Alternative EDA should be considered.')
return

Continue reading: https://towardsdatascience.com/a-function-that-makes-your-initial-eda-a-breeze-8e9549d69fb3?source=rss—-7f60cf5620c9—4

Source: towardsdatascience.com