Uploaded image for project: 'DC/OS'
  1. DC/OS
  2. DCOS_OSS-1167

dcos-docker test failure: test_applications.test_octarine

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Medium
    • Resolution: Duplicate
    • Affects Version/s: DC/OS 1.9.0
    • Fix Version/s: None
    • Component/s: networking
    • Labels:
      None

      Description

      Integration test failure on dcos-docker (DC/OSĀ 1.9.0).

      Test Name: test_applications.test_octarine

      Error Details

      Exception: Application deployment failed - operation was not completed in 120 seconds.Stack Trace
      self = <test_util.marathon.Marathon object at 0x7f1ef9b0e8d0>
      app_definition = {'acceptedResourceRoles': ['slave_public'], 'cmd': '/opt/mesosphere/bin/dcos-shell python /opt/mesosphere/active/dcos-...n_test_server.py $PORT0', 'cpus': 0.1, 'env': {'DCOS_TEST_UUID': '0b17cc6c0e3549258583f67c116135d7', 'HOME': '/'}, ...}
      timeout = 120, check_health = True, ignore_failed_tasks = False
      
          def deploy_app(self, app_definition, timeout=120, check_health=True, ignore_failed_tasks=False):
              """Deploy an app to marathon
          
                  This function deploys an an application and then waits for marathon to
                  acknowledge it's successful creation or fails the test.
          
                  The wait for application is immediately aborted if Marathon returns
                  nonempty 'lastTaskFailure' field. Otherwise it waits until all the
                  instances reach tasksRunning and then tasksHealthy state.
          
                  Args:
                      app_definition: a dict with application definition as specified in
                                      Marathon API (https://mesosphere.github.io/marathon/docs/rest-api.html#post-v2-apps)
                      timeout: a time to wait for the application to reach 'Healthy' status
                               after which the test should be failed.
                      check_health: wait until Marathon reports tasks as healthy before
                                    returning
          
                  Returns:
                      A list of named tuples which represent service points of deployed
                      applications. I.E:
                          [Endpoint(host='172.17.10.202', port=10464), Endpoint(host='172.17.10.201', port=1630)]
                  """
              r = self.post('v2/apps', json=app_definition)
              log.info('Response from marathon: {}'.format(repr(r.json())))
              r.raise_for_status()
          
              @retrying.retry(wait_fixed=1000, stop_max_delay=timeout * 1000,
                              retry_on_result=lambda ret: ret is None,
                              retry_on_exception=lambda x: False)
              def _poll_marathon_for_app_deployment(app_id):
                  Endpoint = collections.namedtuple("Endpoint", ["host", "port", "ip"])
                  # Some of the counters need to be explicitly enabled now and/or in
                  # future versions of Marathon:
                  req_params = (('embed', 'apps.lastTaskFailure'),
                                ('embed', 'apps.counts'))
          
                  r = self.get(path_join('v2/apps', app_id), params=req_params)
                  r.raise_for_status()
          
                  data = r.json()
          
                  if not ignore_failed_tasks:
                      assert 'lastTaskFailure' not in data['app'], (
                          'Application deployment failed, reason: {}'.format(data['app']['lastTaskFailure']['message'])
                      )
          
                  check_tasks_running = (data['app']['tasksRunning'] == app_definition['instances'])
                  check_tasks_healthy = (not check_health or data['app']['tasksHealthy'] == app_definition['instances'])
          
                  if check_tasks_running and check_tasks_healthy:
                      res = [Endpoint(t['host'], t['ports'][0], t['ipAddresses'][0]['ipAddress'])
                             if len(t['ports']) is not 0
                             else Endpoint(t['host'], 0, t['ipAddresses'][0]['ipAddress'])
                             for t in data['app']['tasks']]
                      log.info('Application deployed, running on {}'.format(res))
                      return res
                  elif not check_tasks_running:
                      log.info('Waiting for application to be deployed: '
                               'Not all instances are running: {}'.format(repr(data)))
                      return None
                  elif not check_tasks_healthy:
                      log.info('Waiting for application to be deployed: '
                               'Not all instances are healthy: {}'.format(repr(data)))
                      return None
                  else:
                      log.info('Waiting for application to be deployed: {}'.format(repr(data)))
                      return None
          
              try:
      >           return _poll_marathon_for_app_deployment(app_definition['id'])
      
      ../../lib/python3.5/site-packages/test_util/marathon.py:223: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      args = ('integration-test-0b17cc6c0e3549258583f67c116135d7',), kw = {}
      
          @six.wraps(f)
          def wrapped_f(*args, **kw):
      >       return Retrying(*dargs, **dkw).call(f, *args, **kw)
      
      ../../lib/python3.5/site-packages/retrying.py:49: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      self = <retrying.Retrying object at 0x7f1ef9b0ee80>
      fn = <function Marathon.deploy_app.<locals>._poll_marathon_for_app_deployment at 0x7f1ef9afae18>
      args = ('integration-test-0b17cc6c0e3549258583f67c116135d7',), kwargs = {}
      start_time = 1496268116500, attempt_number = 120
      attempt = Attempts: 120, Value: None, delay_since_first_attempt_ms = 120164
      sleep = 1000
      
          def call(self, fn, *args, **kwargs):
              start_time = int(round(time.time() * 1000))
              attempt_number = 1
              while True:
                  try:
                      attempt = Attempt(fn(*args, **kwargs), attempt_number, False)
                  except:
                      tb = sys.exc_info()
                      attempt = Attempt(tb, attempt_number, True)
          
                  if not self.should_reject(attempt):
                      return attempt.get(self._wrap_exception)
          
                  delay_since_first_attempt_ms = int(round(time.time() * 1000)) - start_time
                  if self.stop(attempt_number, delay_since_first_attempt_ms):
                      if not self._wrap_exception and attempt.has_exception:
                          # get() on an attempt with an exception should cause it to be raised, but raise just in case
                          raise attempt.get()
                      else:
      >                   raise RetryError(attempt)
      E                   retrying.RetryError: RetryError[Attempts: 120, Value: None]
      
      ../../lib/python3.5/site-packages/retrying.py:214: RetryError
      
      During handling of the above exception, another exception occurred:
      
      dcos_api_session = <test_util.dcos_api_session.DcosApiSession object at 0x7f1efa427da0>
      timeout = 30
      
          def test_octarine(dcos_api_session, timeout=30):
              # This app binds to port 80. This is only required by the http (not srv)
              # transparent mode test. In transparent mode, we use ".mydcos.directory"
              # to go to localhost, the port attached there is only used to
              # determine which port to send traffic to on localhost. When it
              # reaches the proxy, the port is not used, and a request is made
              # to port 80.
          
              app, uuid = get_test_app()
              app['acceptedResourceRoles'] = ["slave_public"]
              app['portDefinitions'][0]["port"] = 80
              app['requirePorts'] = True
          
      >       with dcos_api_session.marathon.deploy_and_cleanup(app) as service_points:
      
      test_applications.py:128: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      ../python--b7a144a49577a223d37d447c568f51330ee95390/lib/python3.5/contextlib.py:59: in __enter__
          return next(self.gen)
      ../../lib/python3.5/site-packages/test_util/marathon.py:370: in deploy_and_cleanup
          app_definition, timeout, check_health, ignore_failed_tasks)
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      self = <test_util.marathon.Marathon object at 0x7f1ef9b0e8d0>
      app_definition = {'acceptedResourceRoles': ['slave_public'], 'cmd': '/opt/mesosphere/bin/dcos-shell python /opt/mesosphere/active/dcos-...n_test_server.py $PORT0', 'cpus': 0.1, 'env': {'DCOS_TEST_UUID': '0b17cc6c0e3549258583f67c116135d7', 'HOME': '/'}, ...}
      timeout = 120, check_health = True, ignore_failed_tasks = False
      
          def deploy_app(self, app_definition, timeout=120, check_health=True, ignore_failed_tasks=False):
              """Deploy an app to marathon
          
                  This function deploys an an application and then waits for marathon to
                  acknowledge it's successful creation or fails the test.
          
                  The wait for application is immediately aborted if Marathon returns
                  nonempty 'lastTaskFailure' field. Otherwise it waits until all the
                  instances reach tasksRunning and then tasksHealthy state.
          
                  Args:
                      app_definition: a dict with application definition as specified in
                                      Marathon API (https://mesosphere.github.io/marathon/docs/rest-api.html#post-v2-apps)
                      timeout: a time to wait for the application to reach 'Healthy' status
                               after which the test should be failed.
                      check_health: wait until Marathon reports tasks as healthy before
                                    returning
          
                  Returns:
                      A list of named tuples which represent service points of deployed
                      applications. I.E:
                          [Endpoint(host='172.17.10.202', port=10464), Endpoint(host='172.17.10.201', port=1630)]
                  """
              r = self.post('v2/apps', json=app_definition)
              log.info('Response from marathon: {}'.format(repr(r.json())))
              r.raise_for_status()
          
              @retrying.retry(wait_fixed=1000, stop_max_delay=timeout * 1000,
                              retry_on_result=lambda ret: ret is None,
                              retry_on_exception=lambda x: False)
              def _poll_marathon_for_app_deployment(app_id):
                  Endpoint = collections.namedtuple("Endpoint", ["host", "port", "ip"])
                  # Some of the counters need to be explicitly enabled now and/or in
                  # future versions of Marathon:
                  req_params = (('embed', 'apps.lastTaskFailure'),
                                ('embed', 'apps.counts'))
          
                  r = self.get(path_join('v2/apps', app_id), params=req_params)
                  r.raise_for_status()
          
                  data = r.json()
          
                  if not ignore_failed_tasks:
                      assert 'lastTaskFailure' not in data['app'], (
                          'Application deployment failed, reason: {}'.format(data['app']['lastTaskFailure']['message'])
                      )
          
                  check_tasks_running = (data['app']['tasksRunning'] == app_definition['instances'])
                  check_tasks_healthy = (not check_health or data['app']['tasksHealthy'] == app_definition['instances'])
          
                  if check_tasks_running and check_tasks_healthy:
                      res = [Endpoint(t['host'], t['ports'][0], t['ipAddresses'][0]['ipAddress'])
                             if len(t['ports']) is not 0
                             else Endpoint(t['host'], 0, t['ipAddresses'][0]['ipAddress'])
                             for t in data['app']['tasks']]
                      log.info('Application deployed, running on {}'.format(res))
                      return res
                  elif not check_tasks_running:
                      log.info('Waiting for application to be deployed: '
                               'Not all instances are running: {}'.format(repr(data)))
                      return None
                  elif not check_tasks_healthy:
                      log.info('Waiting for application to be deployed: '
                               'Not all instances are healthy: {}'.format(repr(data)))
                      return None
                  else:
                      log.info('Waiting for application to be deployed: {}'.format(repr(data)))
                      return None
          
              try:
                  return _poll_marathon_for_app_deployment(app_definition['id'])
              except retrying.RetryError:
                  raise Exception("Application deployment failed - operation was not "
      >                           "completed in {} seconds.".format(timeout))
      E           Exception: Application deployment failed - operation was not completed in 120 seconds.
      
      ../../lib/python3.5/site-packages/test_util/marathon.py:226: Exception
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                jp Jan-Philip Gehrcke
                Reporter:
                karl Karl Isenberg (Inactive)
                Team:
                Networking Team
                Watchers:
                Jan-Philip Gehrcke, Karl Isenberg (Inactive)
              • Watchers:
                2 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: