/*
 * Decompiled with CFR 0.152.
 */
package io.imply.cloud.manager.notice.operations;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.Metric;
import com.codahale.metrics.MetricRegistry;
import com.google.common.collect.ImmutableMap;
import com.google.inject.Singleton;
import io.imply.cloud.manager.ManagerToolbox;
import io.imply.cloud.manager.notice.operations.OperationsNotice;
import io.imply.cloud.model.CloudResources;
import io.imply.cloud.model.Cluster;
import io.imply.cloud.model.EntityType;
import io.imply.cloud.model.ImplyNodeType;
import io.imply.cloud.model.Info;
import io.imply.cloud.model.Instance;
import io.imply.cloud.model.InstanceHealth;
import io.imply.cloud.model.ServiceHealth;
import io.imply.cloud.model.ServiceType;
import io.imply.cloud.model.State;
import io.imply.cloud.model.User;
import io.imply.cloud.telemetry.CloudEvent;
import io.imply.cloud.util.ISE;
import io.imply.cloud.util.Logger;
import io.imply.cloud.util.ThreadLocalContext;
import io.imply.cloud.util.ThreadUtils;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.inject.Inject;
import org.joda.time.DateTime;

@Singleton
public class CheckInstanceHealthNotice
extends OperationsNotice {
    private static final Logger log = new Logger(CheckInstanceHealthNotice.class);
    private final Map<String, Integer> failureCache = new HashMap<String, Integer>();
    private final MetricRegistry metrics;

    @Inject
    public CheckInstanceHealthNotice(ManagerToolbox toolbox, MetricRegistry metrics) {
        super(toolbox);
        this.metrics = metrics;
    }

    @Override
    public void innerHandle() {
        List infos = this.toolbox.getEntityStateDataManager().getAllWithType(EntityType.CLUSTER).stream().filter(x -> x.getState().isRunning()).collect(Collectors.toList());
        for (Info info : infos) {
            try {
                CloudResources clusterStackInfo;
                String clusterId = info.getEntityId();
                Cluster cluster = this.toolbox.getClusterDataManager().get(clusterId);
                if (this.toolbox.getRefreshableConstants().getClusterHealthCheckExclusions() != null && this.toolbox.getRefreshableConstants().getClusterHealthCheckExclusions().contains(clusterId)) {
                    log.info("Skipping health check for cluster [%s] which is in exclusion list", new Object[]{clusterId});
                    continue;
                }
                ThreadLocalContext.setSystemContext((String)"isMock", (Object)info.isMockEntity());
                List clusterInstancesHealth = this.toolbox.getInstanceHealthDataManager().getAllForCluster(clusterId);
                boolean hasFailedCheck = clusterInstancesHealth.stream().anyMatch(InstanceHealth::hasFailedCheck);
                DateTime lastCheck = info.getLastInstanceHealthCheck();
                if (lastCheck != null && !lastCheck.plusSeconds(hasFailedCheck ? this.getFailingInstanceHealthCheckPeriodInSeconds() : (info.getState().isTransitionState() ? this.getUpdatingInstanceHealthCheckPeriodInSeconds() : this.getRunningInstanceHealthCheckPeriodInSeconds())).isBeforeNow()) continue;
                log.debug("Running instance health checks for cluster [%s]", new Object[]{clusterId});
                try {
                    clusterStackInfo = this.toolbox.getClusterStackHelper().getClusterDetails(clusterId, false, false, false);
                }
                catch (Exception e) {
                    log.info((Throwable)e, "Exception while retrieving stack details for cluster [%s], skipping health check: ", new Object[]{clusterId});
                    continue;
                }
                if (clusterStackInfo == null) {
                    log.warn("Failed to retrieve stack details for cluster [%s], skipping health check", new Object[]{clusterId});
                    continue;
                }
                Map hosts = clusterStackInfo.getHosts();
                if (hosts == null) {
                    log.debug("No hosts found for cluster [%s], skipping health check", new Object[]{clusterId});
                    continue;
                }
                this.scheduleMasterNodeChecks(cluster, (List)hosts.get(ImplyNodeType.MASTER));
                hosts.entrySet().stream().filter(entry -> ((ImplyNodeType)entry.getKey()).getNodeServiceType().equals((Object)ImplyNodeType.DATA.getNodeServiceType())).forEach(entry -> this.scheduleDataNodeChecks(cluster, (List)entry.getValue(), (ImplyNodeType)entry.getKey()));
                this.scheduleQueryNodeChecks(cluster, (List)hosts.get(ImplyNodeType.QUERY));
                this.scheduleColdQueryNodeChecks(cluster, (List)hosts.get(ImplyNodeType.COLD_TIER_QUERY));
                this.scheduleColdDataNodeChecks(cluster, (List)hosts.get(ImplyNodeType.COLD_TIER_DATA));
                List knownInstanceIds = hosts.values().stream().flatMap(Collection::stream).map(Instance::getInstanceId).collect(Collectors.toList());
                List entriesToRemove = clusterInstancesHealth.stream().filter(x -> !knownInstanceIds.contains(x.getInstanceId())).collect(Collectors.toList());
                for (InstanceHealth instanceHealth : entriesToRemove) {
                    String key = instanceHealth.getKey();
                    log.info("Removing entry [%s] no longer in cluster from instance health table", new Object[]{key});
                    try {
                        if (instanceHealth.getConsecutiveFailureCount() != null && instanceHealth.getConsecutiveFailureCount() >= 5) {
                            log.info(true, "CLU [%s] INS [%s] IP [%s] can no longer be found", new Object[]{clusterId, instanceHealth.getInstanceId(), instanceHealth.getHost()});
                        }
                        String metricName = CloudEvent.named(CheckInstanceHealthNotice.class, (String)"failure_consecutive", (Cluster)cluster, (String[])new String[]{instanceHealth.getInstanceId()});
                        this.metrics.remove(metricName);
                        this.failureCache.remove(metricName);
                        this.toolbox.getInstanceHealthDataManager().delete(key);
                    }
                    catch (Exception e) {
                        log.warn((Throwable)e, "Exception while removing [%s] from instance health table", new Object[]{key});
                    }
                }
                this.toolbox.getEntityStateDataManager().insert(info, Info.builder().withLastInstanceHealthCheck(DateTime.now()).build());
            }
            catch (Exception e) {
                log.warn((Throwable)e, "Exception while checking health of instances in cluster [%s]", new Object[]{info != null ? info.getEntityId() : "null"});
            }
        }
    }

    protected int getFailingInstanceHealthCheckPeriodInSeconds() {
        return 105;
    }

    protected int getUpdatingInstanceHealthCheckPeriodInSeconds() {
        return 105;
    }

    protected int getRunningInstanceHealthCheckPeriodInSeconds() {
        return 225;
    }

    protected void scheduleNodeChecks(Cluster cluster, List<Instance> instances, ImplyNodeType implyNodeType, Function<String, Map<ServiceType, Boolean>> fn) {
        if (instances == null) {
            return;
        }
        String clusterId = cluster.getClusterId();
        User principal = ThreadLocalContext.getPrincipal();
        String requestId = ThreadLocalContext.getRequestId();
        String parentSpanId = ThreadLocalContext.getParentSpanId();
        String spanId = ThreadLocalContext.getSpanId();
        String cacheId = ThreadLocalContext.getCacheId();
        HashMap userContext = ThreadLocalContext.getUserContext() != null ? new HashMap(ThreadLocalContext.getUserContext()) : null;
        Class<?> noticeOrActionClass = this.getClass();
        HashMap systemContext = ThreadLocalContext.getUserContext() != null ? new HashMap(ThreadLocalContext.getSystemContext()) : null;
        for (Instance instance : instances) {
            String ipAddress;
            log.trace("Scheduling health check for cluster [%s] instance [%s]", new Object[]{clusterId, instance.getInstanceId()});
            String string = ipAddress = this.toolbox.getManagerConfig().isUseInstancePublicIp() != false ? instance.getPublicIpAddress() : instance.getPrivateIpAddress();
            if (ipAddress == null) {
                log.info("No IP address available for instance [%s], skipping health check", new Object[]{instance.getInstanceId()});
                continue;
            }
            this.toolbox.getWorkerExecutor().submit(() -> {
                String originalThreadName = Thread.currentThread().getName();
                Thread.currentThread().setName(ThreadUtils.getExtendedThreadName((String)originalThreadName, (String)(principal != null ? principal.getDetailedUserId() : null), (String)requestId));
                try {
                    ThreadLocalContext.setPrincipal((User)principal);
                    ThreadLocalContext.setRequestId((String)requestId);
                    ThreadLocalContext.setParentSpanId((String)parentSpanId);
                    ThreadLocalContext.setSpanId((String)spanId);
                    ThreadLocalContext.setCacheId((String)cacheId);
                    ThreadLocalContext.setUserContext((Map)userContext);
                    ThreadLocalContext.setNoticeOrActionClass((Class)noticeOrActionClass);
                    ThreadLocalContext.setSystemContext((Map)systemContext);
                    InstanceHealth.Builder updatedHealthBuilder = InstanceHealth.builder().withClusterId(clusterId).withInstanceId(instance.getInstanceId()).withHost(ipAddress).withImplyNodeType(implyNodeType);
                    InstanceHealth existingHealth = this.toolbox.getInstanceHealthDataManager().getOrNull(clusterId, instance.getInstanceId(), implyNodeType);
                    if (existingHealth == null || existingHealth.getCreated() == null || existingHealth.getCreated().plusSeconds(180).isAfterNow()) {
                        log.debug("Skipping CLU [%s] INS [%s] IP [%s] in grace period", new Object[]{clusterId, instance.getInstanceId(), ipAddress});
                    } else {
                        boolean allGood;
                        String healthMessage;
                        if (instance.isOnline() != null && !instance.isOnline().booleanValue()) {
                            healthMessage = "Agent offline";
                            allGood = false;
                        } else if (instance.isImplyRunning() != null && !instance.isImplyRunning().booleanValue()) {
                            healthMessage = "Imply not running";
                            allGood = false;
                        } else {
                            Map results = (Map)fn.apply(ipAddress);
                            updatedHealthBuilder.withServiceHealth(results.entrySet().stream().map(x -> this.buildServiceHealthWithLastHealthy((ServiceType)x.getKey(), (Boolean)x.getValue(), existingHealth)).collect(Collectors.toList()));
                            allGood = results.values().stream().allMatch(x -> x);
                            healthMessage = this.getHealthMessage(results);
                        }
                        updatedHealthBuilder.withLastStatusCheck(DateTime.now()).withLastMessage(healthMessage);
                        if (allGood) {
                            int consecutiveSuccessCount;
                            log.debug(String.format("CLU [%s] INS [%s] IP [%s]: %s", clusterId, instance.getInstanceId(), ipAddress, healthMessage));
                            int n = consecutiveSuccessCount = existingHealth.getConsecutiveSuccessCount() == null ? 1 : Math.min(existingHealth.getConsecutiveSuccessCount() + 1, Short.MAX_VALUE);
                            int failureCount = existingHealth.getFailureCount() == null || existingHealth.getFailureCount() <= 0 ? 0 : (consecutiveSuccessCount % 2 == 0 ? Math.min(existingHealth.getFailureCount() - 1, 4) : Math.min(existingHealth.getFailureCount(), 4));
                            updatedHealthBuilder.withConsecutiveSuccessCount(Integer.valueOf(consecutiveSuccessCount)).withFailureCount(Integer.valueOf(failureCount)).withConsecutiveFailureCount(Integer.valueOf(0));
                            if (existingHealth.getConsecutiveFailureCount() != null && existingHealth.getConsecutiveFailureCount() >= 5) {
                                log.info(true, "CLU [%s] INS [%s] IP [%s] OK: %s", new Object[]{clusterId, existingHealth.getInstanceId(), existingHealth.getHost(), healthMessage});
                            }
                        } else {
                            log.info(String.format("CLU [%s] INS [%s] IP [%s]: %s", clusterId, instance.getInstanceId(), ipAddress, healthMessage));
                            updatedHealthBuilder.withFailureCount(Integer.valueOf(existingHealth.getFailureCount() == null ? 1 : Math.min(existingHealth.getFailureCount() + 1, Short.MAX_VALUE))).withConsecutiveFailureCount(Integer.valueOf(existingHealth.getConsecutiveFailureCount() == null ? 1 : existingHealth.getConsecutiveFailureCount() + 1)).withConsecutiveSuccessCount(Integer.valueOf(0));
                            String name = CloudEvent.named(CheckInstanceHealthNotice.class, (String)"failure_consecutive", (Cluster)cluster, (String[])new String[]{instance.getInstanceId()});
                            this.failureCache.put(name, updatedHealthBuilder.build().getConsecutiveFailureCount());
                            if (!this.metrics.getGauges().containsKey(name)) {
                                this.metrics.register(name, (Metric)((Gauge)() -> this.failureCache.get(name)));
                            }
                        }
                    }
                    this.toolbox.getInstanceHealthDataManager().insert(updatedHealthBuilder.build());
                }
                catch (Throwable e) {
                    log.error(e, "Exception checking health of instance [%s] @ [%s]", new Object[]{instance.getInstanceId(), ipAddress});
                }
                finally {
                    Thread.currentThread().setName(originalThreadName);
                    ThreadLocalContext.clear();
                }
            });
        }
    }

    private void scheduleMasterNodeChecks(Cluster cluster, List<Instance> instances) {
        this.scheduleNodeChecks(cluster, instances, ImplyNodeType.MASTER, ipAddress -> ImmutableMap.of((Object)ServiceType.COORDINATOR, (Object)this.toolbox.getDruidApiClient().isCoordinatorStatusOK(ipAddress, cluster.getClusterId()), (Object)ServiceType.OVERLORD, (Object)this.toolbox.getDruidApiClient().isOverlordStatusOK(ipAddress, cluster.getClusterId())));
    }

    protected void scheduleDataNodeChecks(Cluster cluster, List<Instance> instances, ImplyNodeType nodeType) {
        if (instances == null) {
            return;
        }
        this.scheduleNodeChecks(cluster, instances, nodeType, ipAddress -> ImmutableMap.of((Object)ServiceType.HISTORICAL, (Object)this.toolbox.getDruidApiClient().isHistoricalStatusOK(ipAddress, cluster.getClusterId()), (Object)ServiceType.MIDDLE_MANAGER, (Object)this.toolbox.getDruidApiClient().isMiddleManagerStatusOK(ipAddress, cluster.getClusterId())));
    }

    protected void scheduleQueryNodeChecks(Cluster cluster, List<Instance> instances) {
        this.scheduleNodeChecks(cluster, instances, ImplyNodeType.QUERY, ipAddress -> ImmutableMap.of((Object)ServiceType.BROKER, (Object)this.toolbox.getDruidApiClient().isBrokerStatusOK(ipAddress, cluster.getClusterId()), (Object)ServiceType.PIVOT, (Object)this.toolbox.getDruidApiClient().isPivotHealthOK(ipAddress, cluster.getClusterId()), (Object)ServiceType.ROUTER, (Object)this.toolbox.getDruidApiClient().isRouterStatusOK(ipAddress, cluster.getClusterId())));
    }

    protected void scheduleColdQueryNodeChecks(Cluster cluster, List<Instance> instances) {
        if (instances == null) {
            return;
        }
        this.scheduleNodeChecks(cluster, instances, ImplyNodeType.COLD_TIER_QUERY, ipAddress -> ImmutableMap.of((Object)ServiceType.BROKER, (Object)this.toolbox.getDruidApiClient().isBrokerStatusOK(ipAddress, cluster.getClusterId())));
    }

    protected void scheduleColdDataNodeChecks(Cluster cluster, List<Instance> instances) {
        if (instances == null) {
            return;
        }
        this.scheduleNodeChecks(cluster, instances, ImplyNodeType.COLD_TIER_DATA, ipAddress -> ImmutableMap.of((Object)ServiceType.COLD_HISTORICAL, (Object)this.toolbox.getDruidApiClient().isHistoricalStatusOK(ipAddress, cluster.getClusterId())));
    }

    private String getHealthMessage(Map<ServiceType, Boolean> results) {
        return String.join((CharSequence)" / ", results.entrySet().stream().map(x -> String.format("%s [%s]", ((ServiceType)x.getKey()).getShortName(), (Boolean)x.getValue() != false ? "OK" : "FAIL")).sorted().collect(Collectors.toList()));
    }

    private ServiceHealth buildServiceHealthWithLastHealthy(ServiceType serviceType, boolean healthy, InstanceHealth existingHealth) {
        Optional<ServiceHealth> existingServiceHealthOpt;
        if (healthy) {
            return new ServiceHealth(serviceType, true, DateTime.now());
        }
        DateTime lastHealthy = null;
        if (existingHealth != null && existingHealth.getServiceHealth() != null && (existingServiceHealthOpt = existingHealth.getServiceHealth().stream().filter(x -> x.getService().equals((Object)serviceType)).findFirst()).isPresent()) {
            lastHealthy = existingServiceHealthOpt.get().getLastHealthy();
        }
        return new ServiceHealth(serviceType, false, lastHealthy);
    }

    @Override
    protected State getTimeoutState() {
        throw new ISE("getTimeoutState() should not be getting called for CheckInstanceHealthNotice", new Object[0]);
    }
}

