Navigation

Quick search

Source code for apache_beam.testing.pipeline_verifiers

## Licensed to the Apache Software Foundation (ASF) under one or more# contributor license agreements. See the NOTICE file distributed with# this work for additional information regarding copyright ownership.# The ASF licenses this file to You under the Apache License, Version 2.0# (the "License"); you may not use this file except in compliance with# the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.#"""End-to-end test result verifiersA set of verifiers that are used in end-to-end tests to verify state/outputof test pipeline job. Customized verifier should extend`hamcrest.core.base_matcher.BaseMatcher` and override _matches."""importloggingimporttimefromhamcrest.core.base_matcherimportBaseMatcherfromapache_beam.io.filesystemsimportFileSystemsfromapache_beam.runners.runnerimportPipelineStatefromapache_beam.testingimporttest_utilsasutilsfromapache_beam.utilsimportretry__all__=['PipelineStateMatcher','FileChecksumMatcher','retry_on_io_error_and_server_error',]try:fromapitools.base.py.exceptionsimportHttpErrorexceptImportError:HttpError=NoneMAX_RETRIES=4

[docs]classPipelineStateMatcher(BaseMatcher):"""Matcher that verify pipeline job terminated in expected state Matcher compares the actual pipeline terminate state with expected. By default, `PipelineState.DONE` is used as expected state. """def__init__(self,expected_state=PipelineState.DONE):self.expected_state=expected_statedef_matches(self,pipeline_result):returnpipeline_result.state==self.expected_state

[docs]classFileChecksumMatcher(BaseMatcher):"""Matcher that verifies file(s) content by comparing file checksum. Use apache_beam.io.filebasedsink to fetch file(s) from given path. File checksum is a hash string computed from content of file(s). """def__init__(self,file_path,expected_checksum,sleep_secs=None):"""Initialize a FileChecksumMatcher object Args: file_path : A string that is the full path of output file. This path can contain globs. expected_checksum : A hash string that is computed from expected result. sleep_secs : Number of seconds to wait before verification start. Extra time are given to make sure output files are ready on FS. """ifsleep_secsisnotNone:ifisinstance(sleep_secs,int):self.sleep_secs=sleep_secselse:raiseValueError('Sleep seconds, if received, must be int. ''But received: %r, %s'%(sleep_secs,type(sleep_secs)))else:self.sleep_secs=Noneself.file_path=file_pathself.expected_checksum=expected_checksum@retry.with_exponential_backoff(num_retries=MAX_RETRIES,retry_filter=retry_on_io_error_and_server_error)def_read_with_retry(self):"""Read path with retry if I/O failed"""read_lines=[]match_result=FileSystems.match([self.file_path])[0]matched_path=[f.pathforfinmatch_result.metadata_list]ifnotmatched_path:raiseIOError('No such file or directory: %s'%self.file_path)logging.info('Find %d files in %s: \n%s',len(matched_path),self.file_path,'\n'.join(matched_path))forpathinmatched_path:withFileSystems.open(path,'r')asf:forlineinf:read_lines.append(line)returnread_linesdef_matches(self,_):ifself.sleep_secs:# Wait to have output file ready on FSlogging.info('Wait %d seconds...',self.sleep_secs)time.sleep(self.sleep_secs)# Read from given file(s) pathread_lines=self._read_with_retry()# Compute checksumself.checksum=utils.compute_hash(read_lines)logging.info('Read from given path %s, %d lines, checksum: %s.',self.file_path,len(read_lines),self.checksum)returnself.checksum==self.expected_checksum