import numpy as np
import pandas as pd
from segment import segment_byparameters
from processdata import get_lengthofstaylist

''' This function obtains the duration and probility lookup table from the historical data
	Inputs:
	adt_dataframe- pandas dataframe of historical adt information
	Outputs:
	duration_array- a 4x7x4x2 array which is divided into 4 seasons, 7 days of the week, and 4
					time ranges, it contains a duration distribution list and a probability
					distribution for the number of patients arriving per hour
'''
def create_lookup(adt_dataframe):
	# Obtain ADT dataframe from input
	ADTs = adt_dataframe

	# Create a list to hold the distribution of all 4P durations
	full_list = []

	# Create an array to hold the duration and number of patients entering probabilities for each
	# different time
	duration_array = np.empty([4,7,4,2],dtype = object)
	count = 0
	for season in [0,1,2,3]:
		for dayofweek in [0,1,2,3,4,5,6]:
			for time_range in [0,1,2,3]:
				# Segment by temporal parameters given, using set time ranges
				if time_range == 0:
					Return_ADTs, pat_probs = segment_byparameters(ADTs,season,dayofweek,7,15)
				elif time_range == 1:
					Return_ADTs, pat_probs = segment_byparameters(ADTs,season,dayofweek,15,19)
				elif time_range == 2:
					Return_ADTs, pat_probs = segment_byparameters(ADTs,season,dayofweek,19,23)
				else:
					Return_ADTs1, pat_probs1 = segment_byparameters(ADTs,season,dayofweek,23,24)
					Return_ADTs2, pat_probs2 = segment_byparameters(ADTs,season,dayofweek,0,7)
					Return_ADTs = pd.concat([Return_ADTs1, Return_ADTs2])
					# Account for difference in max number of patients per hour
					if len(pat_probs1) < len(pat_probs2):
						shorter = pat_probs1
						sl = len(pat_probs1)
						longer = pat_probs2
						ll = len(pat_probs2)
					else:
						shorter = pat_probs2
						sl = len(pat_probs2)
						longer = pat_probs1
						ll = len(pat_probs1)
					first = np.add(shorter,longer[0:sl])
					pat_probs = np.concatenate([first, longer[sl:]])
					pat_probs = pat_probs / np.sum(pat_probs)
				# Obtain the duration distribution for the correct subset of ADTs
				durationlist = get_lengthofstaylist(Return_ADTs)
				# Add all durations to the full duration list
				full_list.append(durationlist)
				# Add duration distribution to correct location in array temporally
				duration_array[season][dayofweek][time_range][0] = durationlist
				# Also add number of patient probability distribution to the array
				duration_array[season][dayofweek][time_range][1] = pat_probs
				count = count+1
				print(count)

	return duration_array



