Uploaded image for project: 'Marathon'
  1. Marathon
  2. MARATHON-7375

Marathon incorrectly passes port 0 to Mesos for health check and discovery info

    Details

      Description

      Background

      Marathon will automatically assign host and container ports for tasks when a value of 0 is provided. For example, the following app definition would tell Marathon to allocate any available port from the agent's offer, and assign it to the task for both the container port and the host port:

      {
        "id": "/container-host-port-0",
        "cmd": "set; nc -kl -p $PORT_HTTP -e sh -c $'sleep 1; echo -e \"HTTP/1.1 200 OK\\\\r\\\\nContent-Length: 3\\\\r\\\\n\\\\r\\\\nHi\"'",
        "env": {},
        "instances": 1,
        "cpus": 0.001,
        "mem": 64,
        "container": {
          "type": "DOCKER",
          "docker": {
            "image": "alpine",
            "network": "BRIDGE",
            "portMappings": [
              {
                "containerPort": 0,
                "hostPort": 0,
                "servicePort": 10000,
                "protocol": "tcp",
                "name": "http"
              }
            ],
            "forcePullImage": false
          }
        },
        "healthChecks": [
          {
            "gracePeriodSeconds": 300,
            "intervalSeconds": 60,
            "timeoutSeconds": 20,
            "maxConsecutiveFailures": 3,
            "portIndex": 0,
            "path": "/test",
            "protocol": "MESOS_HTTP",
            "delaySeconds": 15
          }
        ]
      }
      

      Mesos will perform the health check from within the container networking space against the specified container port. However, with this bug, Marathon incorrectly passes `0` as the value for the health check, leading the health check to fail.

      Original issue

      To reproduce, deploy the following app definition with Marathon:

      {
        "id": "mesos-http",
        "cpus": 0.1,
        "mem": 256,
        "cmd": "cat <<EOF > /etc/Caddyfile\n0.0.0.0:$PORT0\nbrowse\nlog stdout\nerrors stdout\nEOF\n/usr/bin/caddy --conf /etc/Caddyfile --log stdout\n",
        "container": {
          "type": "DOCKER",
          "docker": {
            "image": "abiosoft/caddy",
            "network": "BRIDGE",
            "portMappings": [
              {
                "containerPort": 0,
                "hostPort": 0,
                "protocol": "tcp",
                "name": "default"
              }
            ],
            "privileged": false,
            "forcePullImage": false
          }
        },
        "healthChecks": [
          {
            "gracePeriodSeconds": 300,
            "intervalSeconds": 60,
            "timeoutSeconds": 20,
            "maxConsecutiveFailures": 3,
            "portIndex": 0,
            "path": "/",
            "protocol": "MESOS_HTTP",
            "delaySeconds": 15
          }
        ]
      }
      

      I intercept curl commands by replacing curl with the following shim:

      #!/bin/bash
      
      echo $0 "$@" >> /tmp/curl-invocations
      /bin/curl-orig "$@"
      

      (and moving /bin/curl to /bin/curl-orig, respectively)

      I see the following invocations to curl are attempted:

      /usr/bin/curl -s -S -L -k -w %{http_code} -o /dev/null http://127.0.0.1:0/
      

      Mesos has the following data on the task:

      {
        "id": "mesos-http.e5d4308f-3b6a-11e7-8d3e-acde48001122",
        "name": "mesos-http",
        "host": "mesos-2.dev.vagrant",
        "framework_id": "5dd6ed11-d5fc-49a5-9014-097133156fad-0000",
        "executor_id": "",
        "slave_id": "93dcd16c-d525-446c-943f-fe0a627b6c89-S0",
        "state": "TASK_RUNNING",
        "resources": {
          "disk": 0,
          "mem": 256,
          "gpus": 0,
          "cpus": 0.1,
          "ports": "[31864-31864]"
        },
        "statuses": [
          {
            "state": "TASK_RUNNING",
            "timestamp": 1495071597.57199,
            "labels": [
              {
                "key": "Docker.NetworkSettings.IPAddress",
                "value": "172.17.0.2"
              }
            ],
            "container_status": {
              "container_id": {
                "value": "acef6ca0-9e82-459a-93d1-c2c7d0007e8d"
              },
              "network_infos": [
                {
                  "ip_addresses": [
                    {
                      "ip_address": "172.17.0.2"
                    }
                  ]
                }
              ]
            }
          }
        ],
        "discovery": {
          "visibility": "FRAMEWORK",
          "name": "mesos-http",
          "ports": {
            "ports": [
              {
                "number": 31864,
                "name": "default",
                "protocol": "tcp",
                "labels": {
                  "labels": [
                    {
                      "key": "network-scope",
                      "value": "host"
                    }
                  ]
                }
              }
            ]
          }
        },
        "container": {
          "type": "DOCKER",
          "docker": {
            "image": "abiosoft/caddy",
            "network": "BRIDGE",
            "port_mappings": [
              {
                "host_port": 31864,
                "container_port": 31864,
                "protocol": "tcp"
              }
            ],
            "privileged": false,
            "parameters": [
              {
                "key": "label",
                "value": "MESOS_TASK_ID=mesos-http.e5d4308f-3b6a-11e7-8d3e-acde48001122"
              }
            ],
            "force_pull_image": false
          }
        }
      }
      

      Assuming that the containerIP is routable from the host is probably true in most cases, but it is not guaranteed for all cases. Probably safer to rely on the CNI port mapping functionality to be implemented properly when such a port mapping is specified.

      Marathon sends the following launch request (from the logs, debugging enabled):

      marathon [2017-05-17 19:39:56,779] DEBUG Operations on value: "bac68e1f-9c25-41ef-b299-4517162c7282-O594"
      marathon :
      marathon type: LAUNCH
      marathon launch {
      marathon   task_infos {
      marathon     name: "mesos-http"
      marathon     task_id {
      marathon       value: "mesos-http.e5d4308f-3b6a-11e7-8d3e-acde48001122"
      marathon     }
      marathon     slave_id {
      marathon       value: "93dcd16c-d525-446c-943f-fe0a627b6c89-S0"
      marathon     }
      marathon     resources {
      marathon       name: "cpus"
      marathon       type: SCALAR
      marathon       scalar {
      marathon         value: 0.1
      marathon       }
      marathon       role: "*"
      marathon     }
      marathon     resources {
      marathon       name: "mem"
      marathon       type: SCALAR
      marathon       scalar {
      marathon         value: 256.0
      marathon       }
      marathon       role: "*"
      marathon     }
      marathon     resources {
      marathon       name: "ports"
      marathon       type: RANGES
      marathon       ranges {
      marathon         range {
      marathon           begin: 31864
      marathon           end: 31864
      marathon         }
      marathon       }
      marathon       role: "*"
      marathon     }
      marathon     command {
      marathon       environment {
      marathon         variables {
      marathon           name: "MARATHON_APP_VERSION"
      marathon           value: "2017-05-18T01:39:56.180Z"
      marathon         }
      marathon         variables {
      marathon           name: "HOST"
      marathon           value: "mesos-2.dev.vagrant"
      marathon         }
      marathon         variables {
      marathon           name: "MARATHON_APP_RESOURCE_CPUS"
      marathon           value: "0.1"
      marathon         }
      marathon         variables {
      marathon           name: "MARATHON_APP_RESOURCE_GPUS"
      marathon           value: "0"
      marathon         }
      marathon         variables {
      marathon           name: "MARATHON_APP_DOCKER_IMAGE"
      marathon           value: "abiosoft/caddy"
      marathon         }
      marathon         variables {
      marathon           name: "MESOS_TASK_ID"
      marathon           value: "mesos-http.e5d4308f-3b6a-11e7-8d3e-acde48001122"
      marathon         }
      marathon         variables {
      marathon           name: "PORT"
      marathon           value: "31864"
      marathon         }
      marathon         variables {
      marathon           name: "MARATHON_APP_RESOURCE_MEM"
      marathon           value: "256.0"
      marathon         }
      marathon         variables {
      marathon           name: "PORTS"
      marathon           value: "31864"
      marathon         }
      marathon         variables {
      marathon           name: "MARATHON_APP_RESOURCE_DISK"
      marathon           value: "0.0"
      marathon         }
      marathon         variables {
      marathon           name: "MARATHON_APP_LABELS"
      marathon           value: ""
      marathon         }
      marathon         variables {
      marathon           name: "MARATHON_APP_ID"
      marathon           value: "/mesos-http"
      marathon         }
      marathon         variables {
      marathon           name: "PORT0"
      marathon           value: "31864"
      marathon         }
      marathon         variables {
      marathon           name: "PORT_DEFAULT"
      marathon           value: "31864"
      marathon         }
      marathon         variables {
      marathon           name: "PORT_31864"
      marathon           value: "31864"
      marathon         }
      marathon       }
      marathon       value: "cat <<EOF > /etc/Caddyfile\n0.0.0.0:$PORT0\nbrowse\nlog stdout\nerrors stdout\nEOF\n/usr/bin/caddy --conf /etc/Caddyfile --log stdout\n"
      marathon     }
      marathon     health_check {
      marathon       http {
      marathon         port: 0
      marathon         path: "/"
      marathon         scheme: "http"
      marathon       }
      marathon       delay_seconds: 15.0
      marathon       interval_seconds: 60.0
      marathon       timeout_seconds: 20.0
      marathon       consecutive_failures: 3
      marathon       grace_period_seconds: 300.0
      marathon       type: HTTP
      marathon     }
      marathon     container {
      marathon       type: DOCKER
      marathon       docker {
      marathon         image: "abiosoft/caddy"
      marathon         network: BRIDGE
      marathon         port_mappings {
      marathon           host_port: 31864
      marathon           container_port: 31864
      marathon           protocol: "tcp"
      marathon         }
      marathon         privileged: false
      marathon         parameters {
      marathon           key: "label"
      marathon           value: "MESOS_TASK_ID=mesos-http.e5d4308f-3b6a-11e7-8d3e-acde48001122"
      marathon         }
      marathon         force_pull_image: false
      marathon       }
      marathon     }
      marathon     discovery {
      marathon       visibility: FRAMEWORK
      marathon       name: "mesos-http"
      marathon       ports {
      marathon         ports {
      marathon           number: 31864
      marathon           name: "default"
      marathon           protocol: "tcp"
      marathon           labels {
      marathon             labels {
      marathon               key: "network-scope"
      marathon               value: "host"
      marathon             }
      marathon           }
      marathon         }
      marathon       }
      marathon     }
      marathon   }
      marathon }
      

      The health_check record is clearly incorrect.

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                tharper Tim Harper
                Reporter:
                tharper Tim Harper
                Team:
                Orchestration Team
                Watchers:
                bydga, Deepak Goel, Jie Yu, Marco Monaco, Tim Harper
              • Watchers:
                5 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: