import sys from typing import List, Dict import boto3 from boto3.dynamodb.conditions import Key from collections import deque import datetime import calendar import json import os import statistics dynamo_region_map = { 'us-west-1': 'us-west-1', 'us-west-2': 'us-west-2', 'us-east-1': 'us-east-1', 'us-east-2': 'us-east-2', 'ap-south-1': 'eu-north-1', 'ap-northeast-3': 'ap-northeast-1', 'ap-northeast-2': 'ap-northeast-1', 'ap-southeast-1': 'ap-southeast-1', 'ap-southeast-2': 'ap-southeast-2', 'ap-northeast-1': 'ap-northeast-1', 'ca-central-1': 'us-east-1', 'eu-central-1': 'eu-north-1', 'eu-west-1': 'eu-west-1', 'eu-west-2': 'eu-west-1', 'eu-west-3': 'eu-west-3', 'eu-north-1': 'eu-north-1', 'sa-east-1': 'sa-east-1', 'eu-south-1': 'eu-north-1' } # This is a rough first pass at an intelligent region selector based on what is replicated local_region = '' if os.environ['AWS_REGION'] in dynamo_region_map: local_dynamo_region = dynamo_region_map[os.environ['AWS_REGION']] else: local_dynamo_region = 'eu-central-1' local_timestream_region = 'eu-central-1' timestream_client = boto3.client('timestream-query', region_name='us-east-1') dynamodb_client = boto3.resource('dynamodb', region_name=local_dynamo_region) tables = { 'retail': { 'recent': 'wow-token-price-recent', 'current': 'wow-token-price', 'compacted': 'wow-token-compacted', 'timestream': 'wow-token-price-history' }, 'classic': { 'recent': 'wow-token-classic-price-recent', 'current': 'wow-token-classic-price', 'compacted': 'wow-token-compacted', 'timestream': 'wow-token-classic-price-history' } } def historical_data(time, region, version): # This shim is to permanently change the URL of 30d to 720h for local caching, # There seems to be at least 1 person using 30d (strangely with no .json) which was deprecated # as the data source for 1 month of data years ago if time == '30d': time = '720h' if time[-1] == 'h': return dynamo_data(time, region, version) else: return dynamo_compacted(time, region, version) def _get_dynamo_compacted(time: str, region: str, version: str) -> List[Dict[str, int|str]]: table = dynamodb_client.Table(tables[version]['compacted']) pk = f'{region}-{version}-{time}' response = table.query( KeyConditionExpression=( Key('region-flavor-timestamp').eq(pk) ) ) response_data = sorted(response['Items'][0]['data'].items()) data = [] for item in response_data: data.append({ 'time': datetime.datetime.fromtimestamp( int(item[0]), tz=datetime.UTC).isoformat(), 'value': int(item[1]) }) return data def dynamo_compacted(time: str, region: str, version: str) -> List[Dict[str, int]]: return _get_dynamo_compacted(time, region, version) def dynamo_data(time, region, version): print(f"Function region: {os.environ['AWS_REGION']}\t Dynamo Region: {local_region}") time_stripped = int(time[:-1]) start_time = datetime.datetime.utcnow() - datetime.timedelta(hours=time_stripped) start_time_utc = start_time.replace(tzinfo=datetime.timezone.utc) table = dynamodb_client.Table(tables[version]['recent']) response = table.query( KeyConditionExpression=( Key('region').eq(region) & Key('timestamp').gte(int(start_time_utc.timestamp())))) data = [] last_price = 0 for item in response['Items']: price = int(int(item['price']) / 10000) if last_price != price: item_time = datetime.datetime.utcfromtimestamp(int(item['timestamp'])).replace( tzinfo=datetime.timezone.utc).isoformat() data.append({ 'time': item_time, 'value': price }) last_price = price return data def aggregate_data(aggregate_function: str, data: list): if aggregate_function == 'daily_max': return max_min(1, 1, data) elif aggregate_function == 'daily_min': return max_min(-1, 1, data) elif aggregate_function == 'daily_mean': return mean(1, data) elif aggregate_function == 'weekly_max': return max_min(1, 7, data) elif aggregate_function == 'weekly_min': return max_min(-1, 7, data) elif aggregate_function == 'weekly_mean': return mean(7, data) def date_in_range(day_range: tuple, date: datetime.datetime): month_range = calendar.monthrange(date.year, date.month) if day_range[0] <= date.day < day_range[1]: return True elif date.day < day_range[1] and date.day < day_range[0]: # TODO: I am probably missing a sanity check here, come back to it return True else: return False def day_bucket(bucket_size: int, date: datetime.datetime) -> tuple[datetime.datetime, datetime.datetime]: month_range = calendar.monthrange(date.year, date.month) days_to_reset = {0: 1, 1: 0, 2: 6, 3: 5, 4: 4, 5: 3, 6: 2} # We want the bucket boundaries for a bucket size of 7 to fall on # reset day (index 1), and for a month (31) to fall on the actual boundaries of that month # this means month-to-month, there are dynamic sizing of buckets # TODO: Monthly boundaries if bucket_size == 7 and date.weekday() != 1: # This is WoW, the week starts on Tuesday (datetime index 1) bucket_size = days_to_reset[date.weekday()] return tuple((date, date + datetime.timedelta(days=bucket_size))) def is_new_bucket(d_datetime: datetime.datetime, current_bucket_day: datetime.datetime.day, bucket: tuple) -> bool: if d_datetime.day != current_bucket_day and (d_datetime >= bucket[1] or d_datetime.weekday() == 1): return True return False def __sum_total(__data: list) -> int: __total = 0 for __d in __data: __total += __d['value'] return __total def max_min(fn: int, bucket_size: int, data: list) -> list: new_data = [] first_date = datetime.datetime.fromisoformat(data[0]['time']) current_bucket_day = first_date.day # I hate working with dates bucket = day_bucket(bucket_size, first_date) min_max = {'minimum': 999_999_999, 'maximum': 0} min_max_date = {'minimum_date': datetime.datetime.min, 'maximum_date': datetime.datetime.max} for d in data: d_datetime = datetime.datetime.fromisoformat(d['time']) # current_day is used to check if this 'if' has triggered for a new bucket and bypass if it has if is_new_bucket(d_datetime, current_bucket_day, bucket): current_bucket_day = d_datetime.day bucket = day_bucket(bucket_size, d_datetime) if fn == -1: # Minimum function new_data.append({'time': min_max_date['minimum_date'], 'value': min_max['minimum']}) elif fn == 1: # Maximum function new_data.append({'time': min_max_date['maximum_date'], 'value': min_max['maximum']}) min_max = {'minimum': 999_999_999, 'maximum': 0} min_max_date = { 'minimum_date': datetime.datetime.min.isoformat(), 'maximum_date': datetime.datetime.max.isoformat() } if d['value'] < min_max['minimum']: min_max['minimum'] = d['value'] min_max_date['minimum_date'] = d_datetime.isoformat() if d['value'] > min_max['maximum']: min_max['maximum'] = d['value'] min_max_date['maximum_date'] = d_datetime.isoformat() return new_data def mean(bucket_size: int, data: list) -> list: new_data = [] first_date = datetime.datetime.fromisoformat(data[0]['time']) current_bucket_day = first_date.day bucket = day_bucket(bucket_size, first_date) mean_bucket = [] bucket_date = first_date for d in data: d_datetime = datetime.datetime.fromisoformat(d['time']) if is_new_bucket(d_datetime, current_bucket_day, bucket): current_bucket_day = d_datetime.day bucket = day_bucket(bucket_size, d_datetime) new_data.append({'time': bucket[0].isoformat(), 'value': int(statistics.mean(mean_bucket))}) mean_bucket = [] mean_bucket.append(d['value']) return new_data # TODO FIXME def simple_moving_average(hours: int, data: list) -> list: # The cyclomatic complexity of this function is getting high, I need to figure out a more elegant solution new_data = [] queue = deque() hours_in_queue = 0 head_date = datetime.datetime.fromisoformat(data[8]['time']) for datum in data: datum_datetime = datetime.datetime.fromisoformat(datum['time']) if datum_datetime.hour == head_date.hour: queue.append(datum) elif datum_datetime.hour != head_date.hour: if hours_in_queue == hours: q_list = list(queue) total = __sum_total(q_list) new_datum = { 'value': int(total / len(q_list)), 'time': head_date.isoformat() } new_data.append(new_datum) deque_val = 0 for d in q_list: __dt = datetime.datetime.fromisoformat(d['time']) if __dt.hour == head_date.hour and __dt.day == __dt.day: deque_val += 1 while deque_val != 0: queue.pop() deque_val -= 1 hours_in_queue -= 1 head_date = datum_datetime elif hours_in_queue < 5: queue.append(datum) hours_in_queue += 1 return new_data def moving_weighted_average(days: int, data: list) -> list: pass def validate_path(split_uri: list) -> bool: if not split_uri[-1].endswith('json'): return False if not validate_region(split_uri[-2]): return False if not validate_time(split_uri[-1].split('.')[0]): return False return True def validate_time(time: str) -> bool: # These can probably be rewritten as a lambda but at the time I am writing this I am just doing a first pass if time[-1] == 'h': hours = int(time[0:-1]) return (hours >= 24) and (hours < 1000) if time[-1] == 'd': days = int(time[0:-1]) return (days >= 30) and (days <= 100) if time[-1] == 'm': months = int(time[0:-1]) return (months >= 1) and (months <= 12) if time[-1] == 'y': years = int(time[0:-1]) return (years >= 1) and (years <= 10) return time == 'all' def validate_region(region: str) -> bool: valid_regions = ['us', 'eu', 'tw', 'kr'] return region in valid_regions def validate_aggregate(aggregate_function: str) -> bool: valid_aggregates = ['daily_max', 'daily_min', 'daily_mean', 'weekly_max', 'weekly_min', 'weekly_mean'] return aggregate_function in valid_aggregates def lambda_handler(event, context): uri = event['Records'][0]['cf']['request']['uri'] split_uri = uri.split('/') if validate_path(split_uri): if 'classic' in split_uri: version = 'classic' else: version = 'retail' time = split_uri[-1].split('.')[0] region = split_uri[-2] aggregate_function = split_uri[-3] data = historical_data(time, region, version) if validate_aggregate(aggregate_function): data = aggregate_data(aggregate_function, data) response = {'status': '200', 'statusDescription': 'OK', 'headers': {}} response['headers']['content-type'] = [{'key': 'Content-Type', 'value': 'application/json'}] response['body'] = json.dumps(data) return response else: return {'status': '404', 'statusDescription': 'NotFound', 'headers': {}} def main(): pass #data = dynamo_compacted('1y', 'us', 'retail') #print(data) if __name__ == '__main__': main()