Source code for airflow.hooks.dbapi_hook

# -*- coding: utf-8 -*-## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.frombuiltinsimportstrfrompast.builtinsimportbasestringfromdatetimeimportdatetimefromcontextlibimportclosingimportsysfromtypingimportOptionalfromsqlalchemyimportcreate_enginefromairflow.hooks.base_hookimportBaseHookfromairflow.exceptionsimportAirflowException

[docs]classDbApiHook(BaseHook):""" Abstract base class for sql hooks. """# Override to provide the connection name.conn_name_attr=None# type: Optional[str]# Override to have a default connection id for a particular dbHookdefault_conn_name='default_conn_id'# Override if this db supports autocommit.supports_autocommit=False# Override with the object that exposes the connect methodconnector=Nonedef__init__(self,*args,**kwargs):ifnotself.conn_name_attr:raiseAirflowException("conn_name_attr is not defined")eliflen(args)==1:setattr(self,self.conn_name_attr,args[0])elifself.conn_name_attrnotinkwargs:setattr(self,self.conn_name_attr,self.default_conn_name)else:setattr(self,self.conn_name_attr,kwargs[self.conn_name_attr])

[docs]defget_pandas_df(self,sql,parameters=None):""" Executes the sql and returns a pandas dataframe :param sql: the sql statement to be executed (str) or a list of sql statements to execute :type sql: str or list :param parameters: The parameters to render the SQL query with. :type parameters: mapping or iterable """ifsys.version_info[0]<3:sql=sql.encode('utf-8')importpandas.io.sqlaspsqlwithclosing(self.get_conn())asconn:returnpsql.read_sql(sql,con=conn,params=parameters)

[docs]defget_records(self,sql,parameters=None):""" Executes the sql and returns a set of records. :param sql: the sql statement to be executed (str) or a list of sql statements to execute :type sql: str or list :param parameters: The parameters to render the SQL query with. :type parameters: mapping or iterable """ifsys.version_info[0]<3:sql=sql.encode('utf-8')withclosing(self.get_conn())asconn:withclosing(conn.cursor())ascur:ifparametersisnotNone:cur.execute(sql,parameters)else:cur.execute(sql)returncur.fetchall()

[docs]defget_first(self,sql,parameters=None):""" Executes the sql and returns the first resulting row. :param sql: the sql statement to be executed (str) or a list of sql statements to execute :type sql: str or list :param parameters: The parameters to render the SQL query with. :type parameters: mapping or iterable """ifsys.version_info[0]<3:sql=sql.encode('utf-8')withclosing(self.get_conn())asconn:withclosing(conn.cursor())ascur:ifparametersisnotNone:cur.execute(sql,parameters)else:cur.execute(sql)returncur.fetchone()

[docs]defrun(self,sql,autocommit=False,parameters=None):""" Runs a command or a list of commands. Pass a list of sql statements to the sql parameter to get them to execute sequentially :param sql: the sql statement to be executed (str) or a list of sql statements to execute :type sql: str or list :param autocommit: What to set the connection's autocommit setting to before executing the query. :type autocommit: bool :param parameters: The parameters to render the SQL query with. :type parameters: mapping or iterable """ifisinstance(sql,basestring):sql=[sql]withclosing(self.get_conn())asconn:ifself.supports_autocommit:self.set_autocommit(conn,autocommit)withclosing(conn.cursor())ascur:forsinsql:ifsys.version_info[0]<3:s=s.encode('utf-8')ifparametersisnotNone:self.log.info("{} with parameters {}".format(s,parameters))cur.execute(s,parameters)else:self.log.info(s)cur.execute(s)# If autocommit was set to False for db that supports autocommit,# or if db does not supports autocommit, we do a manual commit.ifnotself.get_autocommit(conn):conn.commit()

[docs]defset_autocommit(self,conn,autocommit):""" Sets the autocommit flag on the connection """ifnotself.supports_autocommitandautocommit:self.log.warn(("%s connection doesn't support ""autocommit but autocommit activated."),getattr(self,self.conn_name_attr))conn.autocommit=autocommit

[docs]defget_autocommit(self,conn):""" Get autocommit setting for the provided connection. Return True if conn.autocommit is set to True. Return False if conn.autocommit is not set or set to False or conn does not support autocommit. :param conn: Connection to get autocommit setting from. :type conn: connection object. :return: connection autocommit setting. :rtype: bool """returngetattr(conn,'autocommit',False)andself.supports_autocommit

[docs]definsert_rows(self,table,rows,target_fields=None,commit_every=1000,replace=False):""" A generic way to insert a set of tuples into a table, a new transaction is created every commit_every rows :param table: Name of the target table :type table: str :param rows: The rows to insert into the table :type rows: iterable of tuples :param target_fields: The names of the columns to fill in the table :type target_fields: iterable of strings :param commit_every: The maximum number of rows to insert in one transaction. Set to 0 to insert all rows in one transaction. :type commit_every: int :param replace: Whether to replace instead of insert :type replace: bool """iftarget_fields:target_fields=", ".join(target_fields)target_fields="({})".format(target_fields)else:target_fields=''i=0withclosing(self.get_conn())asconn:ifself.supports_autocommit:self.set_autocommit(conn,False)conn.commit()withclosing(conn.cursor())ascur:fori,rowinenumerate(rows,1):lst=[]forcellinrow:lst.append(self._serialize_cell(cell,conn))values=tuple(lst)placeholders=["%s",]*len(values)ifnotreplace:sql="INSERT INTO "else:sql="REPLACE INTO "sql+="{0}{1} VALUES ({2})".format(table,target_fields,",".join(placeholders))cur.execute(sql,values)ifcommit_everyandi%commit_every==0:conn.commit()self.log.info("Loaded %s into %s rows so far",i,table)conn.commit()self.log.info("Done loading. Loaded a total of %s rows",i)

[docs]defbulk_dump(self,table,tmp_file):""" Dumps a database table into a tab-delimited file :param table: The name of the source table :type table: str :param tmp_file: The path of the target file :type tmp_file: str """raiseNotImplementedError()

[docs]defbulk_load(self,table,tmp_file):""" Loads a tab-delimited file into a database table :param table: The name of the target table :type table: str :param tmp_file: The path of the file to load into the table :type tmp_file: str """raiseNotImplementedError()