Source code for airflow.contrib.operators.awsbatch_operator

# -*- coding: utf-8 -*-## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.#importsysfrommathimportpowfromtimeimportsleepfromairflow.exceptionsimportAirflowExceptionfromairflow.modelsimportBaseOperatorfromairflow.utils.decoratorsimportapply_defaultsfromairflow.contrib.hooks.aws_hookimportAwsHook

[docs]classAWSBatchOperator(BaseOperator):""" Execute a job on AWS Batch Service .. warning: the queue parameter was renamed to job_queue to segregate the internal CeleryExecutor queue from the AWS Batch internal queue. :param job_name: the name for the job that will run on AWS Batch (templated) :type job_name: str :param job_definition: the job definition name on AWS Batch :type job_definition: str :param job_queue: the queue name on AWS Batch :type job_queue: str :param overrides: the same parameter that boto3 will receive on containerOverrides (templated): http://boto3.readthedocs.io/en/latest/reference/services/batch.html#submit_job :type overrides: dict :param max_retries: exponential backoff retries while waiter is not merged, 4200 = 48 hours :type max_retries: int :param aws_conn_id: connection id of AWS credentials / region name. If None, credential boto3 strategy will be used (http://boto3.readthedocs.io/en/latest/guide/configuration.html). :type aws_conn_id: str :param region_name: region name to use in AWS Hook. Override the region_name in connection (if provided) :type region_name: str """ui_color='#c3dae0'client=Nonearn=Nonetemplate_fields=('job_name','overrides',)@apply_defaultsdef__init__(self,job_name,job_definition,job_queue,overrides,max_retries=4200,aws_conn_id=None,region_name=None,**kwargs):super(AWSBatchOperator,self).__init__(**kwargs)self.job_name=job_nameself.aws_conn_id=aws_conn_idself.region_name=region_nameself.job_definition=job_definitionself.job_queue=job_queueself.overrides=overridesself.max_retries=max_retriesself.jobId=Noneself.jobName=Noneself.hook=self.get_hook()defexecute(self,context):self.log.info('Running AWS Batch Job - Job definition: %s - on queue %s',self.job_definition,self.job_queue)self.log.info('AWSBatchOperator overrides: %s',self.overrides)self.client=self.hook.get_client_type('batch',region_name=self.region_name)try:response=self.client.submit_job(jobName=self.job_name,jobQueue=self.job_queue,jobDefinition=self.job_definition,containerOverrides=self.overrides)self.log.info('AWS Batch Job started: %s',response)self.jobId=response['jobId']self.jobName=response['jobName']self._wait_for_task_ended()self._check_success_task()self.log.info('AWS Batch Job has been successfully executed: %s',response)exceptExceptionase:self.log.info('AWS Batch Job has failed executed')raiseAirflowException(e)def_wait_for_task_ended(self):""" Try to use a waiter from the below pull request * https://github.com/boto/botocore/pull/1307 If the waiter is not available apply a exponential backoff * docs.aws.amazon.com/general/latest/gr/api-retries.html """try:waiter=self.client.get_waiter('job_execution_complete')waiter.config.max_attempts=sys.maxsize# timeout is managed by airflowwaiter.wait(jobs=[self.jobId])exceptValueError:# If waiter not available use exporetry=Trueretries=0whileretries<self.max_retriesandretry:self.log.info('AWS Batch retry in the next %s seconds',retries)response=self.client.describe_jobs(jobs=[self.jobId])ifresponse['jobs'][-1]['status']in['SUCCEEDED','FAILED']:retry=Falsesleep(1+pow(retries*0.1,2))retries+=1def_check_success_task(self):response=self.client.describe_jobs(jobs=[self.jobId],)self.log.info('AWS Batch stopped, check status: %s',response)iflen(response.get('jobs'))<1:raiseAirflowException('No job found for {}'.format(response))forjobinresponse['jobs']:job_status=job['status']ifjob_status=='FAILED':reason=job['statusReason']raiseAirflowException('Job failed with status {}'.format(reason))elifjob_statusin['SUBMITTED','PENDING','RUNNABLE','STARTING','RUNNING']:raiseAirflowException('This task is still pending {}'.format(job_status))defget_hook(self):returnAwsHook(aws_conn_id=self.aws_conn_id)defon_kill(self):response=self.client.terminate_job(jobId=self.jobId,reason='Task killed by the user')self.log.info(response)