Source code for datto.Experiments

import hashlib

import numpy as np


[docs]class Experiments: """ Design & run experiments """
[docs] def assign_condition_by_id( self, user_id, conditions, proportions_by_conditions, random_state ): """ Assign a given id to the same experimental condition every time for a consistent user experience. I.e. customer #15 will always be in the treatment condition. Parameters -------- id: int conditions: numpy array E.g. ['treatment', 'control'] proportions_by_conditions: numpy array Should add up to 1, e.g. [0.8, 0.2] random_state: int Divisor used for consistent assignment Returns -------- chosen_condition: str Chooses one of the conditions you provided """ assert len(conditions) == len( proportions_by_conditions ), "Need a proportion of assignment for each condition (and vice versa)." assert ( np.array(proportions_by_conditions).sum() == 1.0 ), "Need proportions to add up to 1." md5_result = hashlib.md5(str(user_id).encode()) hex_string = md5_result.hexdigest() # Each hexadecimal character carries 4 bits of information. # The integers in Python are 32 bits or 64 bits depending on system architecture. # To be safe, we'll assume a 32 bit architecure, even though it is almost certainly 64 bits. # That means we can process only 8 characters of hex into int without fear of losing fidelity. hex_string_truncated = hex_string[-8:] # Hexadecimal is a base 16 representation, so convert the hex characters to integers. numeric_result = int(hex_string_truncated, 16) # From the numeric, select a condition. # We force this integer between 0 and random_state-1 via the modulo. remainder = numeric_result % random_state thresholds_for_condition_assignment = np.floor( np.array(proportions_by_conditions).cumsum() * random_state ) # Get the first index where the remainder is less than the condition boundry condition_index = np.where(remainder < thresholds_for_condition_assignment)[0][ 0 ] chosen_condition = np.array(conditions)[condition_index] return chosen_condition