Source code for unipy.plots.boxplot

"""Complexed Plotting Toolkit.

"""


import numpy as np
import pandas as pd
import collections
import matplotlib.pyplot as plt


__all__ = ['point_boxplot',
           'point_boxplot_axis',
           'mosaic_plot']


[docs]def point_boxplot(data, groupby=None, value=None, rot=90, spread=.2, dot_size=15., dot_color='b', dot_alpha=.2, figsize=(12, 9), *args, **kwargs): """Boxplot with points. Draw boxplots by given keys(groupby, value). Parameters ---------- data: pandas.DataFrame a dataset. groupby: str or list-like (default: None) A key column to separate. (X-axis, categorical) When ``str``, it should be a column name to groupby. When ``list-like``, it contains a column name to groupby. value: str or list-like (default: None) A key column to get values. (Y-axis, numerical) When ``str``, it should be a column name of values. When ``list-like``, it contains a column name of values. rot: int (default: 90) A rotation angle to show X-axis labels. spread: float (default: .2) A spread ratio of points. The bigger, the pointing distribution width are broader. dot_size: float (default: 15.) A size of each points. dot_color: int (default: 'b') A color name of each points. dot_alpha: float (default: .2) A transparency value of each points. Returns ------- matplotlib.figure.Figure A plot figure. Exceptions ---------- AssertionError It is raised when two or more names are given to ``groupby`` or ``value``. See also -------- ``pandas.DataFrame.boxplot`` ``matplotlib.pyplot`` Examples -------- >>> import unipy.dataset.api as dm >>> from unipy.plots import point_boxplot >>> dm.init() >>> data = dm.load('iris') Dataset : iris >>> tmp = point_boxplot(data, groupby='species', value='sepal_length') """ if isinstance(groupby, (list, tuple)): groupby_list = groupby elif isinstance(groupby, str): groupby_list = [groupby] if isinstance(value, (list, tuple)): value_list = value elif isinstance(value, str): value_list = [value] assert len(groupby_list) == 1, "'groupby': should be a single column" assert len(value_list) == 1, "'value': should be a single column" flierprops = dict(marker='o', markerfacecolor='white', alpha=1., markersize=5, linestyle='none', markeredgewidth=.7) grouped = data.groupby(groupby_list)[value_list] fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize) data.boxplot(by=groupby_list, column=value_list, rot=rot, ax=ax, flierprops=flierprops, showfliers=True, showmeans=True) for i, (key, grp) in enumerate(grouped): axis_y_val = grp[value] axis_x_loc = np.random.normal(i + 1, spread / len(grouped), len(axis_y_val)) ax.scatter(x=axis_x_loc, y=axis_y_val, s=dot_size, c=dot_color, alpha=dot_alpha) fig.tight_layout() return fig
[docs]def point_boxplot_axis(data, groupby=None, value=None, rot=90, spread=.2, dot_size=15., dot_color='b', dot_alpha=.2, share_yrange=True, figsize=(12, 9), *args, **kwargs): """Boxplot with points, horizontally seperated. Draw boxplots by given keys(groupby, value). Parameters ---------- data: pandas.DataFrame a dataset. groupby: str or list-like (default: None) A key column to separate. (X-axis, categorical) When ``str``, it should be a column name to groupby. When ``list-like``, it contains a column name to groupby. value: str or list-like (default: None) A key column to get values. (Y-axis, numerical) When ``str``, it should be a column name of values. When ``list-like``, it contains a column name of values. rot: int (default: 90) A rotation angle to show X-axis labels. spread: float (default: .2) A spread ratio of points. The bigger, the pointing distribution width are broader. dot_size: float (default: 15.) A size of each points. dot_color: int (default: 'b') A color name of each points. dot_alpha: float (default: .2) A transparency value of each points. share_yrange: Boolean (defalut: True) False then each Y-axis limit of boxplots will draw independent. Returns ------- matplotlib.figure.Figure A plot figure. Exceptions ---------- AssertionError It is raised when two or more names are given to ``groupby`` or ``value``. See also -------- ``pandas.DataFrame.boxplot`` ``matplotlib.pyplot`` Examples -------- >>> import unipy.dataset.api as dm >>> from unipy.plots import point_boxplot_axis >>> dm.init() >>> data = dm.load('iris') Dataset : iris >>> tmp = point_boxplot_axis(data, ... groupby='species', ... value='sepal_length', ... share_yrange=True) """ if isinstance(groupby, (list, tuple)): groupby_list = groupby elif isinstance(groupby, str): groupby_list = [groupby] if isinstance(value, (list, tuple)): value_list = value elif isinstance(value, str): value_list = [value] assert len(groupby_list) == 1, "'groupby': should be a single column" assert len(value_list) == 1, "'value': should be a single column" flierprops = dict(marker='o', markerfacecolor='white', alpha=1., markersize=5, linestyle='none', markeredgewidth=.7) grouped = data.groupby(groupby_list)[value_list] ylim_min = data[value_list].min()[0] ylim_max = data[value_list].max()[0] fig, axes = plt.subplots(nrows=1, ncols=len(grouped), figsize=figsize) for ax, (i, (key, subdata)) in zip(axes.flatten(), enumerate(grouped)): subdata.boxplot(column=value_list, rot=rot, ax=ax, flierprops=flierprops, showfliers=True, showmeans=True) axis_y_val = subdata[value_list] axis_x_loc = np.random.normal(1, spread / len(grouped), len(axis_y_val)) ax.scatter(x=axis_x_loc, y=axis_y_val, s=dot_size, c=dot_color, alpha=dot_alpha) if share_yrange: ax.set_ylim(ylim_min, ylim_max) fig.tight_layout() return fig
[docs]def mosaic_plot( data, groupby=None, col_list=None, show_values=True, rot=90, width=.9, figsize=(12, 9), *args, **kwargs): """Mosaic Plot via Stacked bar plots. Draw plots by given keys(groupby, value). Parameters ---------- data: pandas.DataFrame a dataset. groupby: str or list-like (default: None) A key column to separate. (X-axis, categorical) When ``str``, it should be a column name to groupby. When ``list-like``, it contains a column name to groupby. col_list: str or list-like (default: None) A key column to get values. (Y-axis, numerical) When ``str``, it should be column names of values. When ``list-like``, it contains column names of values. rot: int (default: 90) A rotation angle to show X-axis labels. show_values: boolean (default: True) Choose If `n` is annotated. Returns ------- matplotlib.figure.Figure A plot figure. Exceptions ---------- AssertionError It is raised when two or more names are given to ``groupby`` or ``value``. See also -------- ``pandas.DataFrame.barplot`` ``matplotlib.pyplot`` Examples -------- >>> import unipy.dataset.api as dm >>> from unipy.plots import mosaic_plot >>> dm.init() >>> data = dm.load('adult') Dataset : iris >>> tmp = mosaic_plot(data, groupby='native_country', ... col_list=['workclass', 'education']) """ if isinstance(groupby, (list, tuple)): groupby_list = groupby elif isinstance(groupby, str): groupby_list = [groupby] if isinstance(col_list, (list, tuple)): col_list = col_list elif isinstance(col_list, str): col_list = [col_list] assert len(groupby_list) == 1, "'groupby': should be a single column" grouped = data[groupby_list + col_list].groupby(groupby_list) fig, ax = plt.subplots( nrows=len(grouped), ncols=1, figsize=figsize, ) for ax, (key, grp) in zip(ax, grouped): tmp_table = grp[col_list] freq_table = tmp_table.apply(pd.value_counts) #freq_ratio_table = freq_table.apply(lambda x: x / np.nansum(x)) freq_table.T.plot( kind='bar', stacked=True, #figsize=(1.5 * tmp_table.shape[1], 12), ax=ax, rot=rot, width=width, edgecolor='white', ) ax.legend( title=groupby_list[0], loc='center right', bbox_to_anchor=(1.11, .5), ) if show_values: for p in ax.patches: width, height = p.get_width(), p.get_height() x, y = p.get_xy() ax.annotate( '{height:.0f}'.format(height=height), ( p.get_x() + .5 * width, p.get_y() + .5 * height, ), ha='center', va='bottom', ) return fig
def sector_plot(): pass