import numpy as np
import pandas as pd

''' This file uses the historical adt dataframe in order to create dictionaries that will be used
    in creating the census array used to determine an initial census for the simulation.
	Inputs:
	adt_dataframe-  a pandas dataframe of historical adt data
	Outputs:
	In_Time_Dict-        a dictionary whose keys are each hour of arrival of patients and whose values are a list 
						 of the visit IDs of patients entering at that hour
	FourPDurations_Dict- a dictionary whose keys are visit IDs and values are the corresponding total duration in 4P
'''
def get_dictionaries(adt_dataframe):
	# Gets datafram from input and obtains unique visitIDs from that dataframe
	ADTs_In = adt_dataframe
	UniqueIDs = ADTs_In.VisitID.unique()
	ADTs_In.is_copy = False
	
	# Creates a column for the duration of each event
	Duration = ADTs_In['OUT_DTTM'] - ADTs_In['IN_DTTM']
	# Adds this column into the ADT dataframe
	ADTs_In['Duration'] = Duration.astype('timedelta64[m]')
	ADTs_In['In_Hour'] = ADTs_In['IN_DTTM'].values.astype('datetime64[h]')

	# ADT_visits is a dataframe containing the VisitID, departments, in-times, duration, and hour of arrival
	ADTs_In = ADTs_In.sort_values('IN_DTTM')
	ADT_visits = ADTs_In[['VisitID', 'ADT_DEPARTMENT_NAME', 'IN_DTTM', 'Duration', 'In_Hour']]

	# Group by VisitID and obtain a list of locations and durations for each unique visit ordered by in-time
	VisitGroup = ADT_visits.groupby('VisitID')
	Locations = ADT_visits.groupby('VisitID')['ADT_DEPARTMENT_NAME'].apply(list)
	Durations_List = ADT_visits.groupby('VisitID')['Duration'].apply(list)

	# Creates a dataframe of ADT information whose department is 4P
	FourPs_Only = ADTs_In[ ADTs_In['ADT_DEPARTMENT_NAME'].isin(['HCGH 4P ACUTE']) ]
	# Sorts the 4P dataframe by time
	FourPs_Only = FourPs_Only.sort_values('IN_DTTM')

	# Group by VisitID and obtain a list of hours of arrival for each unique visit ordered by in-time
	Times_Visit_Series = FourPs_Only.groupby('VisitID')['In_Hour'].apply(list)

	# Creates a dictionary whose key is each VisitID and value is the arrival time into 4P
	intime_dict_visitKey = dict( (key, min(Times_Visit_Series.get(key))) for key in UniqueIDs)

	# Creates a dictionary whose keys are each hour of arrival of a patient and whose values are
	# a list of the visit IDs of patients entering at that hour
	In_Time_Dict = dict()
	for ID, Hour_In in intime_dict_visitKey.items():
		if Hour_In in In_Time_Dict:
			In_Time_Dict[Hour_In].append(ID)
		else:
			In_Time_Dict[Hour_In] = [ID]

	# Loop through locations and combine duplicate entries in location lists and add consecutive event durations 
	# in the same department
	for key, group in Locations.items():
		i = 0
		# Examine each department in the list
		while i<len(group):
			check = i
			# This conditional is satified if there is a duplicate location in the list
			if (check<len(group)-1) and (group[check] == group[check+1]):
				start_this = i
				this = i
				# Finds the last consecutive duplicate location
				while ( (this<len(group)-1) and (group[this] == group[this+1]) ):
					this = this+1

				# Creates a new location list with consecutive duplicates removed
				new_group = group[0:start_this] + [group[this]] + group[this+1:]
				# Updates Locations grouping
				Locations.set_value(key, new_group)

				# Gets the corresponding duration list
				od_group = Durations_List.get(key)
				# Creates an updated list combining consecutive duplicate durations
				new_duration_group = od_group[0:start_this] + [ sum(od_group[start_this:this+1]) ] + od_group[this+1:]
				# Updates Durations_List with corrected duration list
				Durations_List.set_value(key, new_duration_group)

				# Increments location to check and updates group variable
				i = start_this+1
				group = Locations.get(key)
			else:
				i = i+1

	# Dictionary whose keys are visit IDs and values are the corresponding total duration
	FourPDurations_Dict = dict()

	# Loops through location list accounting for patients who leave 4P and return again
	for key, group in Locations.items():
		# Keeps track of indices in list that are 4P
		fourp_index_list = []
		for i, dept in enumerate(group):
			if dept == 'HCGH 4P ACUTE':
				fourp_index_list.append(i)
		# Sums up all 4P durations for a given list
		fourp_sum = 0
		for j in fourp_index_list:
			fourp_sum = fourp_sum + Durations_List.get(key)[j]
		FourPDurations_Dict[key] = fourp_sum

	return In_Time_Dict, FourPDurations_Dict


