#!/bin/bash

### Include ymclibnettools
. /usr/lib/lib-fliwi/ymc-networktools.bash || exit 1

conf_dir="/etc/fliwi/host/ha_only_one_running"
HA_ENABLED_FILE=/var/run/fliwi-ha-only-one-running/enabled

if [ ! -f "$HA_ENABLED_FILE" ]; then
  echo "WARNING: fliwi-ha-only-one-running is not enabled (,yet)!" 1>&2
  exit 0
fi

if [ ! -d "$conf_dir" ]; then
  echo "WARNING: Directory $conf_dir does not exists!" 1>&2
  exit 0
fi

lockFile="/tmp/fliwiHA_only_one_running.lock"

### Do not run if an other one is running
if [ -e $lockFile ]; then
  hostname=$(/bin/hostname)
  echo "$(/bin/date '+%F %T') The only_one_running HA-check on '$hostname' is running too slow!"
  exit 0
fi

### generate lock
/bin/date +%s > $lockFile


for conf_file in $(ls $conf_dir)
do
  ### Source the config it provides:
  ###  string  SERVICE_NAME
  ###  string  PROTOCOL
  ###  integer CHECK_PORT
  ###  string  TRACK_IP
  ###  string  INIT_D
  ###  string  PID_FILE
  ###  string  PROCESS_USER
  ###  string  PROCESS_NAME
  . $conf_dir/$conf_file

  own_services=$(fliwi-get-my-services)

  ### Check if the TRACK_IP is online on the host
  if [ $(ymc_contains $TRACK_IP $(ymc_get_all_configured_ips)) -gt 0 ]; then
    ### The ip is online - bring up the service
    echo "INFO: Service $SERVICE_NAME needs to be started..."

    ### Before starting the service - make sure no other is running on the cluster
    start_allowed=1
    start_needed=1
    for service_target in $(fliwi-get-services $SERVICE_NAME)
    do
      echo "INFO: Checking service-node $service_target..."
      if [ $(ymc_is_host_alive $service_target) -ne 1 ]; then
        if [ $(ymc_contains $service_target $own_services) -gt 0 ]; then
          echo "WARNING: Impossible to bring up service $SERVICE_NAME on this host, as the service-ip is offline !" 1>&2
          start_allowed=0
          break
        else
          echo "INFO: Service-node $service_target does not run the service, as the service-ip seems to be offline."
        fi
      else
        protocol_switch=''
        if [ "$PROTOCOL" == 'udp' ]; then
          protocol_switch='-u'
        fi

        ### Run netcat to check the target using PROTOCOL and port
        nc -w 2 -z $protocol_switch $service_target $CHECK_PORT 2>/dev/null 1>/dev/null
        if [ $? -eq 0 ]; then
          if [ $(ymc_contains $service_target $own_services) -gt 0 ]; then
            echo "INFO: Service is already online on the local host - no start need..."
            start_needed=0
            break
          else
            echo "WARNING: Service on service-node $service_target using port $CHECK_PORT and protocol $PROTOCOL is still running !" 1>&2
            start_allowed=0
            break
          fi
        else
          echo "INFO: Service on service-node $service_target using port $CHECK_PORT and protocol $PROTOCOL is offline."
        fi
      fi
    done

    if [ $start_allowed -eq 1 ] && [ $start_needed -eq 1 ]; then
      echo "INFO: Staring service $SERVICE_NAME on this host..."
      if [ -x "/etc/init.d/$INIT_D" ]; then
        /etc/init.d/$INIT_D start
      else
        echo "ERROR: The init-script /etc/init.d/$INIT_D is not executable or does not exists" 1>&2
      fi
    elif [ $start_needed -eq 0 ]; then
      echo "INFO: Not starting service $SERVICE_NAME as it is already running on this host..."
    else
      echo "WARNING: Not staring service $SERVICE_NAME on this host due to previous checks !" 1>&2
    fi
  else
    ### The ip is offline - stop the service
    echo "INFO: Checking if service $SERVICE_NAME is running on this host..."

    ### Before stopping the service - make sure it is not running running on the local host
    stop_needed=1
    stop_allowed=1
    service_target=$(fliwi-get-my-services --limit=$SERVICE_NAME)
    if [ $(ymc_is_host_alive $service_target) -ne 1 ]; then
      echo "WARNING: Impossible to get the state of service $SERVICE_NAME on this host, as the service-ip is offline !" 1>&2
      stop_allowed=0
    else
      protocol_switch=''
      if [ "$PROTOCOL" == 'udp' ]; then
        protocol_switch='-u'
      fi

      ### Run netcat to check the target using PROTOCOL and port
      nc -w 2 -z $protocol_switch $service_target $CHECK_PORT 2>/dev/null 1>/dev/null
      if [ $? -eq 0 ]; then
        echo "INFO: Service is online on the local host..."
      else
        echo "INFO: Service is not online on the local host - no stop needed..."
        stop_needed=0
      fi
    fi

    ### Check if the process really isn't running based on the pid and the process-user or process-name...
    if [ -n "$PID_FILE" ] && \
       [ -r "$PID_FILE" ]; then
      if [ -n "$PROCESS_USER" ]; then
        if [ $(LC_ALL=C /bin/ps -f --pid $(cat $PID_FILE) --ppid $(cat $PID_FILE) | grep -c -E -e '^'$PROCESS_USER'[[:space:]]') -gt 0 ]; then
          echo "INFO: Service seems to be running (based on PID_FILE and PROCESS_USER)..."
          stop_needed=1
        fi
      fi

      if [ -n "$PROCESS_NAME" ]; then
        if [ $(LC_ALL=C /bin/ps --pid $(cat $PID_FILE) --ppid $(cat $PID_FILE) | grep -c -E -e '[[:space:]]'$PROCESS_NAME'$') -gt 0 ]; then
          echo "INFO: Service seems to be running (based on PID_FILE and PROCESS_NAME)..."
          stop_needed=1
        fi
      fi
    fi

    if [ $stop_allowed -eq 1 ] && [ $stop_needed -eq 1 ]; then
      echo "INFO: Stopping service $SERVICE_NAME on this host..."
      if [ -x "/etc/init.d/$INIT_D" ]; then
        /etc/init.d/$INIT_D stop
      else
        echo "ERROR: The init-script /etc/init.d/$INIT_D is not executable or does not exists" 1>&2
      fi
    elif [ $stop_needed -eq 0 ]; then
      echo "INFO: Not stopping service $SERVICE_NAME as it is not running on this host..."
    else
      echo "WARNING: Not stopping service $SERVICE_NAME on this host due to previous checks !" 1>&2
    fi
  fi
done

### unlook
/bin/rm -f $lockFile

exit 0

