Details

    • Type: Task
    • Status: Resolved
    • Resolution: Done
    • Affects Version/s: None
    • Fix Version/s: 0.9.0
    • Component/s: None
    • Labels:

      Description

      • HC manager now tracks checks by appId + version
      • Added TaskTracker.getVersion
      • Call healthCheckManager.addAllFor from
        DeploymentActor during start and restart to
        avoid a race between health check reconciliation
        and the initial task health update from the
        executor health check system
      • Added missing health check reconciliation calls
      • Fixes MGI-688
      • Fixes MGI-697

        Here is an app running with command health checks:

      connor@iota:marathon (697-health-checks-by-app-version) $ http mesos.vm:8080/v2/apps?embed=apps.tasks
      HTTP/1.1 200 OK
      Content-Type: application/json
      Server: Jetty(8.y.z-SNAPSHOT)
      Transfer-Encoding: chunked
      
      {
          "apps": [
              {
                  "args": null, 
                  "backoffFactor": 1.15, 
                  "backoffSeconds": 1, 
                  "cmd": "python3 -m http.server 8080", 
                  "constraints": [], 
                  "container": {
                      "docker": {
                          "image": "python:3", 
                          "network": "BRIDGE", 
                          "portMappings": [
                              {
                                  "containerPort": 8080, 
                                  "hostPort": 0, 
                                  "protocol": "tcp", 
                                  "servicePort": 9000
                              }, 
                              {
                                  "containerPort": 161, 
                                  "hostPort": 0, 
                                  "protocol": "udp", 
                                  "servicePort": 10000
                              }
                          ]
                      }, 
                      "type": "DOCKER", 
                      "volumes": []
                  }, 
                  "cpus": 0.25, 
                  "dependencies": [], 
                  "deployments": [], 
                  "disk": 0.0, 
                  "env": {}, 
                  "executor": "", 
                  "healthChecks": [
                      {
                          "command": {
                              "value": "curl -f -X GET http://$HOST:$PORT"
                          }, 
                          "gracePeriodSeconds": 5, 
                          "intervalSeconds": 20, 
                          "maxConsecutiveFailures": 3, 
                          "path": "/", 
                          "portIndex": 0, 
                          "protocol": "COMMAND", 
                          "timeoutSeconds": 20
                      }
                  ], 
                  "id": "/bridged-webapp", 
                  "instances": 2, 
                  "mem": 64.0, 
                  "ports": [
                      9000, 
                      10000
                  ], 
                  "requirePorts": false, 
                  "storeUrls": [], 
                  "tasks": [
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:34:16.686Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:34:16.686Z", 
                                  "taskId": "bridged-webapp.b5da4126-5fbb-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.b5da4126-5fbb-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31002, 
                              31003
                          ], 
                          "stagedAt": "2014-10-29T22:34:14.248Z", 
                          "startedAt": "2014-10-29T22:34:31.572Z", 
                          "version": "2014-10-29T22:29:38.355Z"
                      }, 
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:34:22.728Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:34:22.728Z", 
                                  "taskId": "bridged-webapp.b83d8588-5fbb-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.b83d8588-5fbb-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31004, 
                              31005
                          ], 
                          "stagedAt": "2014-10-29T22:34:18.255Z", 
                          "startedAt": "2014-10-29T22:34:31.579Z", 
                          "version": "2014-10-29T22:29:38.355Z"
                      }
                  ], 
                  "tasksRunning": 2, 
                  "tasksStaged": 0, 
                  "upgradeStrategy": {
                      "minimumHealthCapacity": 1.0
                  }, 
                  "uris": [], 
                  "user": null, 
                  "version": "2014-10-29T22:29:38.355Z"
              }
          ]
      }
      

      Swapping the command health check for an HTTP health check:

      connor@iota:marathon (697-health-checks-by-app-version) $ http put mesos.vm:8080/v2/apps/bridged-webapp < examples/bridge-http.json
      HTTP/1.1 200 OK
      Content-Type: application/json
      Server: Jetty(8.y.z-SNAPSHOT)
      Transfer-Encoding: chunked
      
      {
          "deploymentId": "cb8cfa2d-93e6-4108-83e6-458bd1dc7581", 
          "version": "2014-10-29T22:35:14.032Z"
      }
      
      connor@iota:marathon (697-health-checks-by-app-version) $ http mesos.vm:8080/v2/apps?embed=apps.tasks
      HTTP/1.1 200 OK
      Content-Type: application/json
      Server: Jetty(8.y.z-SNAPSHOT)
      Transfer-Encoding: chunked
      
      {
          "apps": [
              {
                  "args": null, 
                  "backoffFactor": 1.15, 
                  "backoffSeconds": 1, 
                  "cmd": "python3 -m http.server 8080", 
                  "constraints": [], 
                  "container": {
                      "docker": {
                          "image": "python:3", 
                          "network": "BRIDGE", 
                          "portMappings": [
                              {
                                  "containerPort": 8080, 
                                  "hostPort": 0, 
                                  "protocol": "tcp", 
                                  "servicePort": 9000
                              }, 
                              {
                                  "containerPort": 161, 
                                  "hostPort": 0, 
                                  "protocol": "udp", 
                                  "servicePort": 10000
                              }
                          ]
                      }, 
                      "type": "DOCKER", 
                      "volumes": []
                  }, 
                  "cpus": 0.25, 
                  "dependencies": [], 
                  "deployments": [], 
                  "disk": 0.0, 
                  "env": {}, 
                  "executor": "", 
                  "healthChecks": [
                      {
                          "command": null, 
                          "gracePeriodSeconds": 5, 
                          "intervalSeconds": 20, 
                          "maxConsecutiveFailures": 3, 
                          "path": "/", 
                          "portIndex": 0, 
                          "protocol": "HTTP", 
                          "timeoutSeconds": 20
                      }
                  ], 
                  "id": "/bridged-webapp", 
                  "instances": 2, 
                  "mem": 64.0, 
                  "ports": [
                      9000, 
                      10000
                  ], 
                  "requirePorts": false, 
                  "storeUrls": [], 
                  "tasks": [
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:35:34.102Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:35:54.123Z", 
                                  "taskId": "bridged-webapp.dad89f41-5fbb-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.dad89f41-5fbb-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31006, 
                              31007
                          ], 
                          "stagedAt": "2014-10-29T22:35:16.313Z", 
                          "startedAt": "2014-10-29T22:36:01.568Z", 
                          "version": "2014-10-29T22:35:14.032Z"
                      }, 
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:35:54.125Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:35:54.125Z", 
                                  "taskId": "bridged-webapp.e62e62d5-5fbb-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.e62e62d5-5fbb-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31008, 
                              31009
                          ], 
                          "stagedAt": "2014-10-29T22:35:35.330Z", 
                          "startedAt": "2014-10-29T22:36:01.576Z", 
                          "version": "2014-10-29T22:35:14.032Z"
                      }
                  ], 
                  "tasksRunning": 2, 
                  "tasksStaged": 0, 
                  "upgradeStrategy": {
                      "minimumHealthCapacity": 1.0
                  }, 
                  "uris": [], 
                  "user": null, 
                  "version": "2014-10-29T22:35:14.032Z"
              }
          ]
      }
      

      Swapping the HTTP health check back to a command health check:

      connor@iota:marathon (697-health-checks-by-app-version) $ http put mesos.vm:8080/v2/apps/bridged-webapp < examples/bridge-command.json
      HTTP/1.1 200 OK
      Content-Type: application/json
      Server: Jetty(8.y.z-SNAPSHOT)
      Transfer-Encoding: chunked
      
      {
          "deploymentId": "9fbb78ca-bf9a-4669-8703-8e8ede78e057", 
          "version": "2014-10-29T22:36:28.015Z"
      }
      
      connor@iota:marathon (697-health-checks-by-app-version) $ http mesos.vm:8080/v2/apps?embed=apps.tasks
      HTTP/1.1 200 OK
      Content-Type: application/json
      Server: Jetty(8.y.z-SNAPSHOT)
      Transfer-Encoding: chunked
      
      {
          "apps": [
              {
                  "args": null, 
                  "backoffFactor": 1.15, 
                  "backoffSeconds": 1, 
                  "cmd": "python3 -m http.server 8080", 
                  "constraints": [], 
                  "container": {
                      "docker": {
                          "image": "python:3", 
                          "network": "BRIDGE", 
                          "portMappings": [
                              {
                                  "containerPort": 8080, 
                                  "hostPort": 0, 
                                  "protocol": "tcp", 
                                  "servicePort": 9000
                              }, 
                              {
                                  "containerPort": 161, 
                                  "hostPort": 0, 
                                  "protocol": "udp", 
                                  "servicePort": 10000
                              }
                          ]
                      }, 
                      "type": "DOCKER", 
                      "volumes": []
                  }, 
                  "cpus": 0.25, 
                  "dependencies": [], 
                  "deployments": [], 
                  "disk": 0.0, 
                  "env": {}, 
                  "executor": "", 
                  "healthChecks": [
                      {
                          "command": {
                              "value": "curl -f -X GET http://$HOST:$PORT"
                          }, 
                          "gracePeriodSeconds": 5, 
                          "intervalSeconds": 20, 
                          "maxConsecutiveFailures": 3, 
                          "path": "/", 
                          "portIndex": 0, 
                          "protocol": "COMMAND", 
                          "timeoutSeconds": 20
                      }
                  ], 
                  "id": "/bridged-webapp", 
                  "instances": 2, 
                  "mem": 64.0, 
                  "ports": [
                      9000, 
                      10000
                  ], 
                  "requirePorts": false, 
                  "storeUrls": [], 
                  "tasks": [
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:36:33.121Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:36:33.121Z", 
                                  "taskId": "bridged-webapp.07000729-5fbc-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.07000729-5fbc-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31010, 
                              31011
                          ], 
                          "stagedAt": "2014-10-29T22:36:30.391Z", 
                          "startedAt": "2014-10-29T22:37:01.534Z", 
                          "version": "2014-10-29T22:36:28.015Z"
                      }, 
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:36:49.214Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:36:49.214Z", 
                                  "taskId": "bridged-webapp.108c2e4f-5fbc-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.108c2e4f-5fbc-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31002, 
                              31003
                          ], 
                          "stagedAt": "2014-10-29T22:36:46.408Z", 
                          "startedAt": "2014-10-29T22:37:01.540Z", 
                          "version": "2014-10-29T22:36:28.015Z"
                      }
                  ], 
                  "tasksRunning": 2, 
                  "tasksStaged": 0, 
                  "upgradeStrategy": {
                      "minimumHealthCapacity": 1.0
                  }, 
                  "uris": [], 
                  "user": null, 
                  "version": "2014-10-29T22:36:28.015Z"
              }
          ]
      }
      

      Adding the HTTP health check:

      connor@iota:marathon (697-health-checks-by-app-version) $ http put mesos.vm:8080/v2/apps/bridged-webapp < examples/bridge.json
      HTTP/1.1 200 OK
      Content-Type: application/json
      Server: Jetty(8.y.z-SNAPSHOT)
      Transfer-Encoding: chunked
      
      {
          "deploymentId": "c170724e-a8db-4883-8275-dc05faec6cc7", 
          "version": "2014-10-29T22:42:37.485Z"
      }
      
      connor@iota:marathon (697-health-checks-by-app-version) $ http mesos.vm:8080/v2/apps?embed=apps.tasks
      HTTP/1.1 200 OK
      Content-Type: application/json
      Server: Jetty(8.y.z-SNAPSHOT)
      Transfer-Encoding: chunked
      
      {
          "apps": [
              {
                  "args": null, 
                  "backoffFactor": 1.15, 
                  "backoffSeconds": 1, 
                  "cmd": "python3 -m http.server 8080", 
                  "constraints": [], 
                  "container": {
                      "docker": {
                          "image": "python:3", 
                          "network": "BRIDGE", 
                          "portMappings": [
                              {
                                  "containerPort": 8080, 
                                  "hostPort": 0, 
                                  "protocol": "tcp", 
                                  "servicePort": 9000
                              }, 
                              {
                                  "containerPort": 161, 
                                  "hostPort": 0, 
                                  "protocol": "udp", 
                                  "servicePort": 10000
                              }
                          ]
                      }, 
                      "type": "DOCKER", 
                      "volumes": []
                  }, 
                  "cpus": 0.25, 
                  "dependencies": [], 
                  "deployments": [], 
                  "disk": 0.0, 
                  "env": {}, 
                  "executor": "", 
                  "healthChecks": [
                      {
                          "command": null, 
                          "gracePeriodSeconds": 5, 
                          "intervalSeconds": 20, 
                          "maxConsecutiveFailures": 3, 
                          "path": "/", 
                          "portIndex": 0, 
                          "protocol": "HTTP", 
                          "timeoutSeconds": 20
                      }, 
                      {
                          "command": {
                              "value": "curl -f -X GET http://$HOST:$PORT"
                          }, 
                          "gracePeriodSeconds": 5, 
                          "intervalSeconds": 20, 
                          "maxConsecutiveFailures": 3, 
                          "path": "/", 
                          "portIndex": 0, 
                          "protocol": "COMMAND", 
                          "timeoutSeconds": 20
                      }
                  ], 
                  "id": "/bridged-webapp", 
                  "instances": 2, 
                  "mem": 64.0, 
                  "ports": [
                      9000, 
                      10000
                  ], 
                  "requirePorts": false, 
                  "storeUrls": [], 
                  "tasks": [
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:42:57.587Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:43:37.608Z", 
                                  "taskId": "bridged-webapp.e6256cc3-5fbc-11e4-ad9b-6e94cab8a4fe"
                              }, 
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:42:47.031Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:42:47.031Z", 
                                  "taskId": "bridged-webapp.e6256cc3-5fbc-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.e6256cc3-5fbc-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31000, 
                              31001
                          ], 
                          "stagedAt": "2014-10-29T22:42:44.768Z", 
                          "startedAt": "2014-10-29T22:43:31.519Z", 
                          "version": "2014-10-29T22:42:37.485Z"
                      }, 
                      {
                          "appId": "/bridged-webapp", 
                          "healthCheckResults": [
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:42:57.585Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:43:37.614Z", 
                                  "taskId": "bridged-webapp.e290d451-5fbc-11e4-ad9b-6e94cab8a4fe"
                              }, 
                              {
                                  "alive": true, 
                                  "consecutiveFailures": 0, 
                                  "firstSuccess": "2014-10-29T22:42:40.988Z", 
                                  "lastFailure": null, 
                                  "lastSuccess": "2014-10-29T22:42:40.988Z", 
                                  "taskId": "bridged-webapp.e290d451-5fbc-11e4-ad9b-6e94cab8a4fe"
                              }
                          ], 
                          "host": "10.141.141.10", 
                          "id": "bridged-webapp.e290d451-5fbc-11e4-ad9b-6e94cab8a4fe", 
                          "ports": [
                              31004, 
                              31005
                          ], 
                          "stagedAt": "2014-10-29T22:42:38.761Z", 
                          "startedAt": "2014-10-29T22:43:31.527Z", 
                          "version": "2014-10-29T22:42:37.485Z"
                      }
                  ], 
                  "tasksRunning": 2, 
                  "tasksStaged": 0, 
                  "upgradeStrategy": {
                      "minimumHealthCapacity": 1.0
                  }, 
                  "uris": [], 
                  "user": null, 
                  "version": "2014-10-29T22:42:37.485Z"
              }
          ]
      }
      

        Attachments

          Activity

            People

            • Assignee:
              Unassigned
              Reporter:
              GitHub_ConnorDoyle Connor Doyle (Inactive)
              Team:
              Orchestration Team
              Watchers:
            • Watchers:
              0 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved: