From b209a9b7c1c4ab9e2941aa33e415dacfcc44486e Mon Sep 17 00:00:00 2001 From: Christos Natsis Date: Sat, 2 Apr 2022 22:05:31 +0300 Subject: [PATCH] added documentation --- faker_clickstream/clickstream.py | 92 ++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/faker_clickstream/clickstream.py b/faker_clickstream/clickstream.py index bc59ff4..5046eb1 100644 --- a/faker_clickstream/clickstream.py +++ b/faker_clickstream/clickstream.py @@ -15,6 +15,7 @@ class ClickstreamProvider(BaseProvider): """ A Provider for clickstream related test data. + >>> from faker import Faker >>> from faker_clickstream import ClickstreamProvider >>> fake = Faker() @@ -23,15 +24,39 @@ class ClickstreamProvider(BaseProvider): """ def user_agent(self): + """ + Generate random user agent. + + :return: User agent string + """ return choice(user_agents) def event(self): + """ + Generate random event type name for e-commerce site. + + :return: Event type string + """ return choice(events) def weighted_event(self): + """ + Generate a random event object according to popularity weight. Higher popularity increases the + chances of occurrence. + + :return: Event object (JSON) + """ return random.choices(weighted_events, weights=[e['popularity'] for e in weighted_events], k=1)[0] def session_clickstream(self, rand_session_max_size: int = 25): + """ + Generate session clickstream events. + + :param rand_session_max_size: Max number of possible events in session. Defaults to 25. + :return: List of session events + """ + + # Initialize static session values session_events = [] user_id = _get_user_id() user_agent = self.user_agent() @@ -40,11 +65,17 @@ def session_clickstream(self, rand_session_max_size: int = 25): channel_type = _get_channel() random_session_size = randint(1, rand_session_max_size) incremental_delta_delay = randint(1, 60) + + # Keep track of unique values in a session unique_session_events = set() product_codes = set() + for s in range(random_session_size): + # Mock time delay between events incremental_delta_delay = incremental_delta_delay + (s * randint(1, 60)) event_time = _format_time(_get_event_time(delta=incremental_delta_delay)) + + # Fetch weighted event event = self.weighted_event() if (event['name'] == 'Login' and event['name'] in unique_session_events) \ @@ -85,6 +116,7 @@ def session_clickstream(self, rand_session_max_size: int = 25): if 'DecreaseQuantity' in unique_session_events: unique_session_events.remove('DecreaseQuantity') + # Fill metadata object conditionally metadata = {} if event['name'] == 'Search': sample_product = _get_weighted_mobile_phone() @@ -106,6 +138,7 @@ def session_clickstream(self, rand_session_max_size: int = 25): if event['name'] == 'CheckOrderStatus': metadata['order_id'] = _get_order_id() + # Construct final event object r = { "ip": ip, "user_id": user_id, @@ -121,6 +154,11 @@ def session_clickstream(self, rand_session_max_size: int = 25): def _get_session_id(): + """ + Generate session ID + + :return: Session ID string + """ return hashlib.sha256( ('%s%s%s' % ( datetime.now().strftime("%d/%m/%Y %H:%M:%S.%f"), @@ -131,40 +169,86 @@ def _get_session_id(): def _get_product_code(): + """ + Generate random product code from range 1 to 999999. + + :return: Random integer number + """ return randint(1, 999999) def _get_order_id(): + """ + Generate random order id from range 1 to 999999. + + :return: Random integer number + """ return randint(1, 999999) def _get_user_id(start: int = 0, end: int = 999999): + """ + Generate random user id from range 0 to 999999. Zero value may identify null user. + + :param start: Index start (Default: 0) + :param end: Index end (Default: 999999) + :return: + """ return randint(start, end) def _get_event_time(delta): + """ + Generate current event time, added by some delta value. + + :param delta: Delta time value in seconds + :return: Event time + """ return datetime.now() + timedelta(seconds=delta) def _format_time(t): - return t.strftime("%d/%m/%Y %H:%M:%S.%f") - + """ + Format time to string. -def _get_event_name(): - return choice(events) + :param t: Time object + :return: Time string in format like 28/03/2022 23:22:15.360252 + """ + return t.strftime("%d/%m/%Y %H:%M:%S.%f") def _get_quantity(): + """ + Get random product order quantity from 1 to 5. Values are given a weight, decreasing as the quantity number + increases. + + :return: Product quantity number + """ return random.choices([1, 2, 3, 4, 5], weights=[50, 20, 20, 5, 5], k=1)[0] def _get_weighted_mobile_phone(): + """ + Get mobile phone object according to popularity + + :return: Mobile phone object + """ return random.choices(mobile_phones, weights=[e['popularity'] for e in mobile_phones], k=1)[0] def _get_ip(): + """ + Get random IP address from list. + + :return: IP address string + """ return choice(ip_list) def _get_channel(): + """ + Get user origin channel (e.g. "Organic search", "Direct", "Social media", "Referral", "Other") + + :return: Origin channel string + """ return choice(channel)