r"""
.. include:: tutorials/retention/Retention_Example.rst
Retention Class
===============
"""
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import pickle
import sys
from scipy.optimize import curve_fit
from loguru import logger
from typing import List, Union, Callable
from pyfreya.retention.fit_functions import power
from uncertainties.core import Variable, correlated_values
from uncertainties.unumpy import nominal_values, std_devs
plt.style.use('ggplot')
font = {'size': 20}
matplotlib.rc('font', **font)
[docs]class Retention:
r"""
Retention class Start
"""
def __init__(self,
days_since_install: List[int],
retention_values: List[Union[float, Variable]]):
r"""
Initializes the retention class by setting retention values and corresponding days since
install values. Retention values can either be values below 1 like 0.8 or in hundreds like
80, both denoting 80%.
:param retention_values: Retention values.
:param days_since_install:
"""
self.logger = logger
try:
self.logger.remove()
except ValueError:
pass
finally:
self.logger.add(sys.stderr, level='INFO')
logger.disable(__name__)
if isinstance(retention_values[0], Variable):
retention_values = np.array(retention_values)
else:
retention_values = pd.to_numeric(retention_values)
days_since_install = pd.to_numeric(days_since_install)
assert len(retention_values) == len(days_since_install), \
'retention values and days since install must have equal length.'
if retention_values[0] > 1:
self.logger.debug('Normalizing retention values')
retention_values = retention_values / 100
if (retention_values > 1).sum() > 0:
self.logger.error('retention value determination error')
raise ValueError(
'There are retention values above one - is input values of mixed notation? '
'(10 and 0.1 for 10%)')
if std_devs(retention_values).sum() > 0:
std = std_devs(retention_values)
else:
std = None
self.fit_data = {'dsi': days_since_install,
'ret': nominal_values(retention_values),
'ret_unc': std
}
self.df_retention = pd.DataFrame(data=retention_values,
index=days_since_install,
columns=['Retention'])
self.df_retention.index.name = 'DaysSinceInstall'
self.fit_func = power
self.fitted_params = np.array([])
def __call__(self, days_since_install: Union[list, np.ndarray]) -> np.ndarray:
r"""
Returns retention from a fitted function for the days given in *days_since_install*.
:param days_since_install: Days since install that retention is to be calculated for.
:return: Retention.
"""
self.logger.debug('Calling retention func')
return self.fit_func(days_since_install, *self.fitted_params)
def __str__(self):
r"""
Print the dataframe with retention as percent. This is done here with specific options for
width and length, therefore, return is emtpy.
:return: Empty string.
"""
retention_print = self.df_retention.copy()
retention_print['Retention'] = retention_print['Retention'].map('{0:3.1%}'.format)
if 'RetentionFit' in retention_print.columns:
retention_print['RetentionFit'] = retention_print['RetentionFit'].map('{0:3.1%}'.format)
with pd.option_context('display.max_rows', 40, 'display.max_columns', None,
'display.width', 200):
return retention_print.__str__()
def __repr__(self):
r"""
Print the dataframe with retention as percent. This is done here with specific options for
width and length, therefore, return is emtpy.
:return: Empty string.
"""
retention_print = self.df_retention.copy()
retention_print['Retention'] = retention_print['Retention'].map('{0:3.1%}'.format)
if 'RetentionFit' in retention_print.columns:
retention_print['RetentionFit'] = retention_print['RetentionFit'].map('{0:3.1%}'.format)
with pd.option_context('display.max_rows', 40, 'display.max_columns', None,
'display.width', 200):
return retention_print.__str__()
[docs] def fit(self, function: Union[str, Callable] = 'power', **kwargs):
r"""
Fits given values to a function. *function* can either be an identifier (string) of these:
* *power*: Calls a power function.
*function* can also be a custom callable function. Additional arguments can be passed to
**scipy** s curve fitting tool: `curve fitting function <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html>`_
:param function: String (identifier) or callable function.
:return: 0
"""
self.logger.debug('Fitting')
if function == 'power':
self.logger.debug('Using power fit function')
self.fit_func = power
else:
assert callable(function), 'Custom provided function must be callable'
self.logger.debug('Using custom fitting function')
self.fit_func = function
if 'p0' in kwargs.keys():
self.logger.debug('Using custom start guess')
start_guess = kwargs['p0']
kwargs.pop('p0')
else:
self.logger.debug('Using standard start guess')
start_guess = self.fit_func.fit_start_guess
self.fitted_params, cov_matrix = curve_fit(self.fit_func,
self.fit_data['dsi'],
self.fit_data['ret'],
start_guess,
sigma=self.fit_data['ret_unc'],
**kwargs)
if self.fit_data['ret_unc'] is not None:
self.fitted_params = correlated_values(self.fitted_params, cov_matrix)
if self.df_retention.index.max() < 30:
self.logger.debug('Using standard mac days since install')
index = np.arange(1, 31)
else:
self.logger.debug('Using custom max days since install')
index = np.arange(1, self.df_retention.index.max() + 1)
df_retention = pd.DataFrame(index=index, columns=['Retention', 'RetentionFit'])
df_retention.loc[self.df_retention.index, 'Retention'] = self.df_retention['Retention']
df_retention['RetentionFit'] = self.fit_func(index, *self.fitted_params)
df_retention.index.name = 'DaysSinceInstall'
self.df_retention = df_retention
return 0
[docs] def plot(self):
r"""
Plots the retention. If a fit have been performed it that is plotted too.
:return: 0
"""
self.logger.debug('Plotting')
plt.figure(figsize=(16, 9))
ret = nominal_values(self.df_retention['Retention'])
ret_unc = std_devs(self.df_retention['Retention'])
if ret_unc.sum() > 0:
plt.errorbar(self.df_retention.index, ret, ret_unc, fmt='o', markersize=10, capsize=10,
capthick=5)
else:
plt.plot(self.df_retention.index, ret, 'o')
ax = plt.gca()
if 'RetentionFit' in self.df_retention.columns:
self.logger.debug('Plotting fit')
ret = nominal_values(self.df_retention['RetentionFit'])
ret_unc = std_devs(self.df_retention['RetentionFit'])
if ret_unc.sum() > 0:
plt.errorbar(self.df_retention.index, ret, ret_unc, capsize=10, capthick=5)
else:
plt.plot(self.df_retention.index, ret)
ax.set_xlim([-1, self.df_retention.index.max() + 1])
ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1))
if len(self.fitted_params) > 0:
self.logger.debug('Calculating sum of retention')
try:
title = f'Sum of Retention day 180 = {self.retention_sum(180):.1f} days'
except Exception as error:
self.logger.error(f'Not possible to calculate integration, gave message: {error}')
title = 'Retention'
else:
title = 'Retention'
plt.title(title)
plt.tight_layout()
plt.show()
return 0
[docs] def retention_sum(self, dsi_end=180) -> float:
r"""
Calculates the sum of retention (mean average days the game have been opened at least once).
It uses the parameters from a fit to calculate it and is only possible if the fit function
is of the :class:`BaseFitFunction` class.
:param dsi_end: The last day in the integration.
:return: Sum of retention.
"""
self.logger.debug('Calculating sum of retention.')
return self.fit_func.integrate_func(dsi_end, self.fitted_params)
[docs] def save(self, filename: str):
r"""
Saves an instance to the retention class as a pickled object.
:param filename: Filename to be used.
:return: 0
"""
self.logger.debug('Saving instance.')
# WTF some bug that only exist on linux (and windows?)
# seems to make it impossible to pickle with the logger. WTF!?!
self.logger = None
with open(filename, 'wb') as file_handle:
pickle.dump(self, file_handle, pickle.HIGHEST_PROTOCOL)
return 0