import hashlib
import numpy as np
[docs]class Experiments:
"""
Design & run experiments
"""
[docs] def assign_condition_by_id(
self, user_id, conditions, proportions_by_conditions, random_state
):
"""
Assign a given id to the same experimental condition every time for a consistent user experience.
I.e. customer #15 will always be in the treatment condition.
Parameters
--------
id: int
conditions: numpy array
E.g. ['treatment', 'control']
proportions_by_conditions: numpy array
Should add up to 1, e.g. [0.8, 0.2]
random_state: int
Divisor used for consistent assignment
Returns
--------
chosen_condition: str
Chooses one of the conditions you provided
"""
assert len(conditions) == len(
proportions_by_conditions
), "Need a proportion of assignment for each condition (and vice versa)."
assert (
np.array(proportions_by_conditions).sum() == 1.0
), "Need proportions to add up to 1."
md5_result = hashlib.md5(str(user_id).encode())
hex_string = md5_result.hexdigest()
# Each hexadecimal character carries 4 bits of information.
# The integers in Python are 32 bits or 64 bits depending on system architecture.
# To be safe, we'll assume a 32 bit architecure, even though it is almost certainly 64 bits.
# That means we can process only 8 characters of hex into int without fear of losing fidelity.
hex_string_truncated = hex_string[-8:]
# Hexadecimal is a base 16 representation, so convert the hex characters to integers.
numeric_result = int(hex_string_truncated, 16)
# From the numeric, select a condition.
# We force this integer between 0 and random_state-1 via the modulo.
remainder = numeric_result % random_state
thresholds_for_condition_assignment = np.floor(
np.array(proportions_by_conditions).cumsum() * random_state
)
# Get the first index where the remainder is less than the condition boundry
condition_index = np.where(remainder < thresholds_for_condition_assignment)[0][
0
]
chosen_condition = np.array(conditions)[condition_index]
return chosen_condition