Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
officeAvailability.py 13.16 KiB
# coding=utf-8
import datetime
import logging

# only for plot method
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd

from web.models.appointment import Appointment
from web.models.appointment_type_link import AppointmentTypeLink
from web.models.availability import Availability
from web.models.constants import AVAILABILITY_EXTRA
from web.models.holiday import Holiday
from web.utils import get_today_midnight_date

matplotlib.pyplot.switch_backend('Agg')

logger = logging.getLogger(__name__)


class OfficeAvailability:
    """
        start: datetime-like indicating when the range starts. If none, then today midnight
        end: datetime-like indicating when the range ends. If none, then tomorrow midnight
        office_start: when the office hours begin
        office_end: when the office hours finish
        minimum_slot: frequency of the pandas series. T stands of minutes.
        Docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html
    """

    def __init__(self, name, start=None, end=None, office_start='8:00', office_end='18:00', minimum_slot='1T'):
        self.business_hours = None

        today_midnight = get_today_midnight_date()
        tomorrow_midnight = today_midnight + datetime.timedelta(days=1)

        if start is None:
            self.start = today_midnight
        else:
            self.start = start

        if end is None:
            self.end = tomorrow_midnight
        else:
            self.end = end

        self.name = name
        self.office_start = office_start
        self.office_end = office_end
        self.minimum_slot = minimum_slot
        self.range = pd.date_range(start=self.start, end=self.end, freq=self.minimum_slot)
        logger.debug('Name: %s. Min index: %s. Max index: %s', self.name, self.start, self.end)
        self.availability = pd.Series(index=self.range, data=0)  # initialize range at 0

    def _get_duration(self):
        '''
        Private method. Returns the differ
        '''
        return self.availability.index[-1] - self.availability.index[0]

    def add_availability(self, availability_range, only_working_hours=False):
        '''
        Receives a pandas date_range `pd.date_range` object.
        Sets the availability to one for the specific interval of the provided range.
        '''
        availability_range = availability_range.round(self.minimum_slot)
        if only_working_hours:
            availability_range = availability_range.to_series().between_time(self.office_start, self.office_end).index
        self.availability[availability_range] = 1

    def remove_availability(self, availability_range, only_working_hours=False):
        '''
        Receives a pandas date_range `pd.date_range` object.
        Sets the availability to zero for the specific interval of the provided range.
        '''
        availability_range = availability_range.round(self.minimum_slot)
        if only_working_hours:
            availability_range = availability_range.to_series().between_time(self.office_start, self.office_end).index
        self.availability[availability_range] = 0

    def _ensure_dates_are_in_bounds(self, given_start, given_end):
        '''
        given_start and given_end should not be a string but if so, they must comply with pd.Timestamp requirements
        '''
        # sort dates to ensure start < end
        start, end = sorted([pd.Timestamp(given_start), pd.Timestamp(given_end)])

        if start < self.availability.index.min():
            start = self.availability.index.min()

        #  check if end is in bounds
        if end > self.availability.index.max():
            end = self.availability.index.max()

        # this could only happen if both start and end dates are higher or lower than index.max/min since start and
        # end dates are sorted this means that the two dates are out of bounds and then its time range doesn't
        # overlap with the self.availability
        if start > self.availability.index.max() or end < self.availability.index.min():
            raise ValueError

        return start, end

    def consider_this(self, appointment_availability_or_holiday, only_working_hours=False):
        """
            :appointment_availability_or_holiday can be an object from the following classes: Availability, Holiday,
            Appointment, AppointmentTypeLink.
            :only_working_hours if true, only consider the defined working hours

            Availability repeat every week.
            Availability always refers to a moment in which the worker should be working. Never the opposite.

            Holiday has higher preference because it refers to extraordinary events like extra availability or lack of
            availability.
            Holiday modifies the status of Availability for specific periods of time.

            Only_working_hours: If true changed are limited to the provided working hours.

            Known Issues: If the range to be added extends beyond the limits of the given time series range, the call
            to self.availability[portion.index] = set_to will fail.
            It fails because there are keys missing within the time series of the object.

            Two solutions are possible:
            - First, limit the time periods of the ranges to be considered to the object time space. (current solution)
            - Second, extend the object time space.

            Notwithstanding, this issue shouldn't exist because in previous steps we should receive the availabilities
            queried to the limits of this objects time space.
            First proposal should be the solution to consider.
        """
        if isinstance(appointment_availability_or_holiday, Availability):
            start = appointment_availability_or_holiday.available_from
            end = appointment_availability_or_holiday.available_till
            weekday = appointment_availability_or_holiday.day_number
            logger.debug('Considering Availability from %s to %s for weekday %d', start, end, weekday)
            # selects the weekdays and then the specific hours
            portion = self.availability[self.availability.index.weekday == (weekday - 1)].between_time(start, end)
            set_to = 1
        elif isinstance(appointment_availability_or_holiday, Holiday):
            start = appointment_availability_or_holiday.datetime_start
            end = appointment_availability_or_holiday.datetime_end
            # ensure the start and end dates are in the same range to avoid memory issues (for example, someone asking
            # from 1960 to 2120 creating a huge pd.Range)
            logger.debug('Considering %s from %s to %s',
                         'Extra Availability' if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else
                         'Holiday',
                         start, end)
            try:
                start, end = self._ensure_dates_are_in_bounds(start, end)
            except ValueError:
                logger.debug('Holiday range does not overlap the availability range. Ignoring Holiday.')
                return
            portion = self.availability[
                pd.date_range(start=start, end=end, freq=self.minimum_slot)]  # select the specific range
            set_to = 1 if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else 0
        elif isinstance(appointment_availability_or_holiday, Appointment):
            start = appointment_availability_or_holiday.datetime_when
            end = start + datetime.timedelta(minutes=appointment_availability_or_holiday.length)
            logger.debug('Considering General Appointment from %s to %s', start, end)
            try:
                start, end = self._ensure_dates_are_in_bounds(start, end)
            except ValueError:
                logger.debug('Appointment range does not overlap the availability range. Ignoring Appointment.')
                return
            portion = self.availability[
                pd.date_range(start=start, end=end, freq=self.minimum_slot)]  # select the specific range
            set_to = 0
        elif isinstance(appointment_availability_or_holiday, AppointmentTypeLink):
            start = appointment_availability_or_holiday.date_when
            end = start + datetime.timedelta(
                minutes=appointment_availability_or_holiday.appointment_type.default_duration)
            logger.debug('Considering Subject Appointment from %s to %s', start, end)
            try:
                start, end = self._ensure_dates_are_in_bounds(start, end)
            except ValueError:
                logger.debug(
                    'AppointmentTypeLink range does not overlap the availability range. Ignoring AppointmentTypeLink.')
                return
            portion = self.availability[
                pd.date_range(start=start, end=end, freq=self.minimum_slot)]  # select the specific range
            set_to = 0
        else:
            logger.error('Expected Availability, Holiday, Appointment or AppointmentTypeLink objects.')
            raise TypeError

        if only_working_hours:
            portion = portion.between_time(self.office_start, self.office_end)

        # limit portion to be changed to the bounds of the object time space (solution 1 of the aforementioned problem)
        portion = portion[
            (self.availability.index.min() <= portion.index) & (portion.index <= self.availability.index.max())]

        self.availability[portion.index] = set_to

    def get_availability_percentage(self, only_working_hours=False):
        """
        For multiple values this is the solution: return self.availability.value_counts().div(len(s))[1] * 100
        But since it's 0 or 1, this works as well and is faster: return self.availability.mean() * 100

        To test it:
        import pandas as pd
        range = pd.date_range(start='2018-10-1', end='2018-10-2 01:00', freq='5T', closed=None)
        s = pd.Series(index=range, data=0)
        range2 = pd.date_range(start='2018-10-1 1:00', end='2018-10-1 2:30', freq='5T')
        s[range2] = 1
        print(s.value_counts().div(len(s))[1]*100)   # prints 6.312292358803987
        print(s.mean()*100)							 # prints 6.312292358803987
        %timeit s.value_counts().div(len(s))[1]*100  # 504 µs ± 19.2 µs per loop (mean ± std. dev. of 7 runs,
                                                       1000 loops each)
        %timeit s.mean()*100                         # 56.3 µs ± 1.66 µs per loop (mean ± std. dev. of 7 runs,
                                                       10000 loops each)
        """
        if only_working_hours:
            availability = self.availability.between_time(self.office_start, self.office_end)
        else:
            availability = self.availability

        return availability.mean() * 100  # better to isolate the operation in case we change it later

    def is_available(self, only_working_hours=False):
        """
        Returns True if on the selected period is available at least 50% of the time
        Otherwise returns False
        """
        return self.get_availability_percentage(only_working_hours=only_working_hours) > 50.0

    def plot_availability(self):
        """
        Plot availability chart.
        """
        fig = plt.figure()  # create new figure. This should ensure thread safe method
        axes = fig.gca()  # get current axes
        matplotlib.rcParams['hatch.linewidth'] = 1
        logger.debug('business_hours: %s %s', self.office_start, self.office_end)
        business_hours = self.business_hours = pd.Series(index=self.range, data=0)
        mask = business_hours.between_time(self.office_start, self.office_end).index
        business_hours[mask] = 1
        axes = business_hours.plot(kind='area', alpha=0.33, color='#1190D8', label='Business Hours', legend=True,
                                   ax=axes)

        # calculate good xticks
        hours = self._get_duration().total_seconds() / 3600
        n_ticks = int(hours / 24)
        if n_ticks == 0:
            minutes = self._get_duration().total_seconds() / 60
            n_ticks = int(minutes / 60)
            if n_ticks == 0:
                n_ticks = 1
            xticks = self.availability.asfreq(f'{n_ticks}T').index
        else:
            xticks = self.availability.asfreq(f'{n_ticks}H').index

        title = f'Availability for {self.name} from {self.start.strftime("%Y/%m/%d %H:%M")}'\
                 ' to {self.end.strftime("%Y/%m/%d %H:%M")}'

        axes = self.availability.plot(figsize=(16, 8), grid=True,
                                      title=title, legend=True, label='Availability', color='#00af52',
                                      xticks=xticks, ax=axes, yticks=[0, 1])

        axes.fill_between(self.availability.index, self.availability.tolist(), facecolor="none", hatch='//',
                          edgecolor="#00af52", alpha=1, linewidth=0.5)
        axes.set_axisbelow(True)
        axes.yaxis.grid(color='gray', linewidth=0.5, alpha=0)
        axes.xaxis.grid(color='gray', linewidth=0.5, alpha=1)
        axes.set_yticklabels(['False', 'True'])
        axes.set_ylabel('Is Available ?')
        axes.set_xlabel('Date & Time')

        fig.tight_layout()
        fig.savefig(
            f"{self.name}_{self.start.strftime('%Y%m%d%H%M')}_{self.end.strftime('%Y%m%d%H%M')}.pdf")