import numpy as np
import pandas as pd

''' This file converts csv files of basic info and adt data into dataframes.
	Inputs:
	bi_csv_name-  the name of a csv file which contains basic patient information
	adt_csv_name- the name of a csv file which contains adt information
	cutoff-       the minimum number of patients that must visit a unit for it to be included
	Outputs:
	BIs-  a pandas dataframe with the basic info for only patients who were in 4P
	ADTs- a pandas dataframe with the adt info for only patients who were in 4P
'''
def import_datafromfiles(bi_csv_name, adt_csv_name, cutoff):
	# Reads in data from csv files
	bi = pd.read_csv(bi_csv_name, delimiter = ',')
	adt = pd.read_csv(adt_csv_name, delimiter = ',')

	# Converts time variables to datetime type
	bi['HospitalAdmissionOrEDRoomedTime'] = pd.to_datetime( bi['HospitalAdmissionOrEDRoomedTime']) 
	bi['EDArrivalTime']                   = pd.to_datetime( bi['EDArrivalTime']) 
	bi['TimePatientGotInpatientStatus']   = pd.to_datetime( bi['TimePatientGotInpatientStatus']) 
	bi['ED_DEPARTURE_TIME']               = pd.to_datetime( bi['ED_DEPARTURE_TIME']) 
	bi['HOSP_DISCH_time']                 = pd.to_datetime( bi['HOSP_DISCH_time']) 
	bi['EXP_DISCHARGE_DATE']              = pd.to_datetime( bi['EXP_DISCHARGE_DATE']) 
	bi['ExpectedDisDateEntered']          = pd.to_datetime( bi['ExpectedDisDateEntered']) 
	bi['ExpectedDisTime']                 = (pd.to_datetime( bi['ExpectedDisTime'])).dt.time
	bi['ExpectedDisTimeEntered']          = pd.to_datetime( bi['ExpectedDisTimeEntered']) 

	adt['IN_DTTM']  = pd.to_datetime( adt['IN_DTTM']) 
	adt['OUT_DTTM'] = pd.to_datetime( adt['OUT_DTTM']) 

	# Gets all relevant VisitIDs (any visit that included 4P)
	# VisitIDs_4P is a list of visit IDs corresponding to patients who were in 4P at some point
	FourP_adt_NaN1 = adt.where( adt['ADT_DEPARTMENT_NAME'] == 'HCGH 4P ACUTE' )   # Makes dept name NaN if not 4P
	FourP_adt = FourP_adt_NaN1[pd.notnull(FourP_adt_NaN1['ADT_DEPARTMENT_NAME'])] # Removes non-4P entries
	VisitIDs_4P = FourP_adt.VisitID.unique()                                      # Obtains relevant VisitIDs

	# Gets all ADT info of patients who were in 4P during their visit
	# ADTs is the all of the adt info of any patient who was in 4P during their visit
	FourP_adt_NaN2 = adt.where( adt['VisitID'].isin(VisitIDs_4P)) # Sets entries without relevant VisitIDs to Nan
	ADTs = FourP_adt_NaN2[pd.notnull(FourP_adt_NaN2['VisitID'])]  # Removes irrelevant ADT info

	# Gets all relevant VisitIDs (any visit that included 4P)
	# VisitIDs_4P_bi is a list of visit IDs corresponding to patients who were in 4P at some point
	FourP_bi_NaN1 = bi.where( bi['DEPARTMENT_NAME'] == 'HCGH 4P ACUTE' )   # Makes dept name NaN if not 4P
	FourP_bi = FourP_bi_NaN1[pd.notnull(FourP_bi_NaN1['DEPARTMENT_NAME'])] # Removes non-4P entries
	VisitIDs_4P_bi = FourP_bi.VisitID.unique()                             # Obtains relevant VisitIDs

	# Gets all basic info of patients who were in 4P
	# BIs is the all of the basic info of any patient who was in 4P during their visit
	FourP_bi_NaN2 = bi.where( bi['VisitID'].isin(VisitIDs_4P_bi)) # Sets entries without relevant VisitIDs to Nan
	BIs = FourP_bi_NaN2[pd.notnull(FourP_bi_NaN2['VisitID'])]  # Removes irrelevant basic info

	# Groups the duration dataframe by department
	LocGroup = ADTs.groupby('ADT_DEPARTMENT_NAME')

	# Creates list that will include the locations with small number of visits
	to_del = list()

	# Goes through each group (location) and checks if number of visits is less than cutoff
	for key, group in LocGroup:
		if len(group) < cutoff:
			to_del.append(key) # adds the location name to the list of deparments to be excluded

	# ADT data that has departments that will be excluded
	ADTs_Del = ADTs[ ADTs['ADT_DEPARTMENT_NAME'].isin(to_del)]

	# Gets the VisitIDs of patients that went to departments to be excluded
	VisitID_to_del = ADTs_Del.VisitID.unique()

	# Remove any ADTs of patients that went to excluded departments
	# These patients are considered outliers because they go to "unusual" departments
	ADTs = ADTs[~ADTs['VisitID'].isin(VisitID_to_del)]
	ADTs.to_pickle('ADTs.pkl')

	# Remove any basic info of patients that went to excluded departments
	Relevant_VisitIDs = ADTs.VisitID.unique()
	BIs = BIs[BIs['VisitID'].isin(Relevant_VisitIDs)]

	return BIs, ADTs
