171 lines
4.8 KiB
Go
171 lines
4.8 KiB
Go
/*
|
|
Copyright IBM Corp. All Rights Reserved.
|
|
|
|
SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
// Package healthz provides an HTTP handler which returns the health status of
|
|
// one or more components of an application or service.
|
|
package healthz
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
type AlreadyRegisteredError string
|
|
|
|
func (are AlreadyRegisteredError) Error() string {
|
|
return fmt.Sprintf("'%s' is already registered", are)
|
|
}
|
|
|
|
const (
|
|
// StatusOK is returned if all health checks pass.
|
|
StatusOK = "OK"
|
|
// StatusUnavailable is returned if any health check fails.
|
|
StatusUnavailable = "Service Unavailable"
|
|
)
|
|
|
|
//go:generate counterfeiter -o mock/health_checker.go -fake-name HealthChecker . HealthChecker
|
|
|
|
// HealthChecker defines the interface components must implement in order to
|
|
// register with the Handler in order to be included in the health status.
|
|
// HealthCheck is passed a context with a Done channel which is closed due to
|
|
// a timeout or cancellation.
|
|
type HealthChecker interface {
|
|
HealthCheck(context.Context) error
|
|
}
|
|
|
|
// FailedCheck represents a failed status check for a component.
|
|
type FailedCheck struct {
|
|
Component string `json:"component"`
|
|
Reason string `json:"reason"`
|
|
}
|
|
|
|
// HealthStatus represents the current health status of all registered components.
|
|
type HealthStatus struct {
|
|
Status string `json:"status"`
|
|
Time time.Time `json:"time"`
|
|
FailedChecks []FailedCheck `json:"failed_checks,omitempty"`
|
|
}
|
|
|
|
// NewHealthHandler returns a new HealthHandler instance.
|
|
func NewHealthHandler() *HealthHandler {
|
|
return &HealthHandler{
|
|
healthCheckers: map[string]HealthChecker{},
|
|
now: time.Now,
|
|
timeout: 30 * time.Second,
|
|
}
|
|
}
|
|
|
|
// HealthHandler is responsible for executing registered health checks. It
|
|
// provides an HTTP handler which returns the health status for all registered
|
|
// components.
|
|
type HealthHandler struct {
|
|
mutex sync.RWMutex
|
|
healthCheckers map[string]HealthChecker
|
|
now func() time.Time
|
|
timeout time.Duration
|
|
}
|
|
|
|
// RegisterChecker registers a HealthChecker for a named component and adds it to
|
|
// the list of status checks to run. It returns an error if the component has
|
|
// already been registered.
|
|
func (h *HealthHandler) RegisterChecker(component string, checker HealthChecker) error {
|
|
h.mutex.Lock()
|
|
defer h.mutex.Unlock()
|
|
|
|
if _, ok := h.healthCheckers[component]; ok {
|
|
return AlreadyRegisteredError(component)
|
|
}
|
|
h.healthCheckers[component] = checker
|
|
return nil
|
|
}
|
|
|
|
// DeregisterChecker deregisters a named HealthChecker.
|
|
func (h *HealthHandler) DeregisterChecker(component string) {
|
|
h.mutex.Lock()
|
|
defer h.mutex.Unlock()
|
|
delete(h.healthCheckers, component)
|
|
}
|
|
|
|
// SetTimeout sets the timeout for handling HTTP requests. If not explicitly
|
|
// set, defaults to 30 seconds.
|
|
func (h *HealthHandler) SetTimeout(timeout time.Duration) {
|
|
h.timeout = timeout
|
|
}
|
|
|
|
// ServerHTTP is an HTTP handler (see http.Handler) which can be used as
|
|
// an HTTP endpoint for health checks. If all registered checks pass, it returns
|
|
// an HTTP status `200 OK` with a JSON payload of `{"status": "OK"}`. If all
|
|
// checks do not pass, it returns an HTTP status `503 Service Unavailable` with
|
|
// a JSON payload of `{"status": "Service Unavailable","failed_checks":[...]}.
|
|
func (h *HealthHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
|
|
if req.Method != "GET" {
|
|
rw.WriteHeader(http.StatusMethodNotAllowed)
|
|
return
|
|
}
|
|
checksCtx, cancel := context.WithTimeout(req.Context(), h.timeout)
|
|
defer cancel()
|
|
|
|
failedCheckCh := make(chan []FailedCheck)
|
|
go func() {
|
|
failedCheckCh <- h.RunChecks(checksCtx)
|
|
}()
|
|
|
|
select {
|
|
case failedChecks := <-failedCheckCh:
|
|
hs := HealthStatus{
|
|
Status: StatusOK,
|
|
Time: h.now(),
|
|
}
|
|
if len(failedChecks) > 0 {
|
|
hs.Status = StatusUnavailable
|
|
hs.FailedChecks = failedChecks
|
|
}
|
|
writeHTTPResponse(rw, hs)
|
|
case <-checksCtx.Done():
|
|
if checksCtx.Err() == context.DeadlineExceeded {
|
|
rw.WriteHeader(http.StatusRequestTimeout)
|
|
}
|
|
}
|
|
}
|
|
|
|
// RunChecks runs all healthCheckers and returns any failures.
|
|
func (h *HealthHandler) RunChecks(ctx context.Context) []FailedCheck {
|
|
h.mutex.RLock()
|
|
defer h.mutex.RUnlock()
|
|
|
|
var failedChecks []FailedCheck
|
|
for component, checker := range h.healthCheckers {
|
|
if err := checker.HealthCheck(ctx); err != nil {
|
|
failedCheck := FailedCheck{
|
|
Component: component,
|
|
Reason: err.Error(),
|
|
}
|
|
failedChecks = append(failedChecks, failedCheck)
|
|
}
|
|
}
|
|
return failedChecks
|
|
}
|
|
|
|
// write the HTTP response
|
|
func writeHTTPResponse(rw http.ResponseWriter, hs HealthStatus) {
|
|
var resp []byte
|
|
rc := http.StatusOK
|
|
rw.Header().Set("Content-Type", "application/json")
|
|
if len(hs.FailedChecks) > 0 {
|
|
rc = http.StatusServiceUnavailable
|
|
}
|
|
resp, err := json.Marshal(hs)
|
|
if err != nil {
|
|
rc = http.StatusInternalServerError
|
|
}
|
|
rw.WriteHeader(rc)
|
|
rw.Write(resp)
|
|
}
|