importosimportreimporttimeimportmimetypesimportcalendarfromdatetimeimportdatetimetry:fromcStringIOimportStringIOexceptImportError:fromStringIOimportStringIO# noqafromdjango.confimportsettingsfromdjango.core.files.baseimportFilefromdjango.core.files.storageimportStoragefromdjango.core.exceptionsimportImproperlyConfigured,SuspiciousOperationfromdjango.utils.encodingimportforce_unicode,smart_strtry:fromboto.s3.connectionimportS3Connection,SubdomainCallingFormatfromboto.exceptionimportS3ResponseErrorfromboto.s3.keyimportKeyexceptImportError:raiseImproperlyConfigured("Could not load Boto's S3 bindings.\n""See http://code.google.com/p/boto/")ACCESS_KEY_NAME=getattr(settings,'AWS_ACCESS_KEY_ID',None)SECRET_KEY_NAME=getattr(settings,'AWS_SECRET_ACCESS_KEY',None)HEADERS=getattr(settings,'AWS_HEADERS',{})STORAGE_BUCKET_NAME=getattr(settings,'AWS_STORAGE_BUCKET_NAME',None)AUTO_CREATE_BUCKET=getattr(settings,'AWS_AUTO_CREATE_BUCKET',False)DEFAULT_ACL=getattr(settings,'AWS_DEFAULT_ACL','public-read')BUCKET_ACL=getattr(settings,'AWS_BUCKET_ACL',DEFAULT_ACL)QUERYSTRING_AUTH=getattr(settings,'AWS_QUERYSTRING_AUTH',True)QUERYSTRING_EXPIRE=getattr(settings,'AWS_QUERYSTRING_EXPIRE',3600)REDUCED_REDUNDANCY=getattr(settings,'AWS_REDUCED_REDUNDANCY',False)LOCATION=getattr(settings,'AWS_LOCATION','')CUSTOM_DOMAIN=getattr(settings,'AWS_S3_CUSTOM_DOMAIN',None)CALLING_FORMAT=getattr(settings,'AWS_S3_CALLING_FORMAT',SubdomainCallingFormat())SECURE_URLS=getattr(settings,'AWS_S3_SECURE_URLS',True)FILE_NAME_CHARSET=getattr(settings,'AWS_S3_FILE_NAME_CHARSET','utf-8')FILE_OVERWRITE=getattr(settings,'AWS_S3_FILE_OVERWRITE',True)FILE_BUFFER_SIZE=getattr(settings,'AWS_S3_FILE_BUFFER_SIZE',5242880)IS_GZIPPED=getattr(settings,'AWS_IS_GZIPPED',False)PRELOAD_METADATA=getattr(settings,'AWS_PRELOAD_METADATA',False)GZIP_CONTENT_TYPES=getattr(settings,'GZIP_CONTENT_TYPES',('text/css','application/javascript','application/x-javascript',))ifIS_GZIPPED:fromgzipimportGzipFiledefsafe_join(base,*paths):""" A version of django.utils._os.safe_join for S3 paths. Joins one or more path components to the base path component intelligently. Returns a normalized version of the final path. The final path must be located inside of the base path component (otherwise a ValueError is raised). Paths outside the base path indicate a possible security sensitive operation. """fromurlparseimporturljoinbase_path=force_unicode(base)base_path=base_path.rstrip('/')paths=[force_unicode(p)forpinpaths]final_path=base_pathforpathinpaths:final_path=urljoin(final_path.rstrip('/')+"/",path.rstrip("/"))# Ensure final_path starts with base_path and that the next character after# the final path is '/' (or nothing, in which case final_path must be# equal to base_path).base_path_len=len(base_path)ifnotfinal_path.startswith(base_path) \
orfinal_path[base_path_len:base_path_len+1]notin('','/'):raiseValueError('the joined path is located outside of the base path'' component')returnfinal_path.lstrip('/')# Dates returned from S3's API look something like this:# "Sun, 11 Mar 2012 17:01:41 GMT"MONTH_NAMES=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']DATESTR_RE=re.compile(r"^.+, (?P<day>\d{1,2}) (?P<month_name>%s) (?P<year>\d{4}) (?P<hour>\d{1,2}):(?P<minute>\d{1,2}):(?P<second>\d{1,2}) (GMT|UTC)$"%("|".join(MONTH_NAMES)))def_parse_datestring(dstr):""" Parse a simple datestring returned by the S3 API and returns a datetime object in the local timezone. """# This regular expression and thus this function# assumes the date is GMT/UTCm=DATESTR_RE.match(dstr)ifm:# This code could raise a ValueError if there is some# bad data or the date is invalid.datedict=m.groupdict()utc_datetime=datetime(int(datedict['year']),int(MONTH_NAMES.index(datedict['month_name']))+1,int(datedict['day']),int(datedict['hour']),int(datedict['minute']),int(datedict['second']),)else:try:format='%Y-%m-%dT%H:%M:%S'if'T'notindstr:format=format.replace('T','')if'.'indstr:format+=".%f"ifdstr.endswith('Z'):dstr=dstr[:-1]elifdstr.endswith(('UTC','GMT')):dstr=dstr[:-3]utc_datetime=datetime.datetime.strptime(dstr,format)except:raiseValueError("Could not parse date string: "+dstr)# Convert the UTC datetime object to local time.returndatetime(*time.localtime(calendar.timegm(utc_datetime.timetuple()))[:6])classS3BotoStorage(Storage):""" Amazon Simple Storage Service using Boto This storage backend supports opening files in read or write mode and supports streaming(buffering) data in chunks to S3 when writing. """def__init__(self,bucket=STORAGE_BUCKET_NAME,access_key=None,secret_key=None,bucket_acl=BUCKET_ACL,acl=DEFAULT_ACL,headers=HEADERS,gzip=IS_GZIPPED,gzip_content_types=GZIP_CONTENT_TYPES,querystring_auth=QUERYSTRING_AUTH,querystring_expire=QUERYSTRING_EXPIRE,reduced_redundancy=REDUCED_REDUNDANCY,custom_domain=CUSTOM_DOMAIN,secure_urls=SECURE_URLS,location=LOCATION,file_name_charset=FILE_NAME_CHARSET,preload_metadata=PRELOAD_METADATA,calling_format=CALLING_FORMAT):self.bucket_acl=bucket_aclself.bucket_name=bucketself.acl=aclself.headers=headersself.preload_metadata=preload_metadataself.gzip=gzipself.gzip_content_types=gzip_content_typesself.querystring_auth=querystring_authself.querystring_expire=querystring_expireself.reduced_redundancy=reduced_redundancyself.custom_domain=custom_domainself.secure_urls=secure_urlsself.location=locationor''self.location=self.location.lstrip('/')self.file_name_charset=file_name_charsetifnotaccess_keyandnotsecret_key:access_key,secret_key=self._get_access_keys()self.connection=S3Connection(access_key,secret_key,calling_format=calling_format)self._entries={}@propertydefbucket(self):""" Get the current bucket. If there is no current bucket object create it. """ifnothasattr(self,'_bucket'):self._bucket=self._get_or_create_bucket(self.bucket_name)returnself._bucket@propertydefentries(self):""" Get the locally cached files for the bucket. """ifself.preload_metadataandnotself._entries:self._entries=dict((self._decode_name(entry.key),entry)forentryinself.bucket.list())returnself._entriesdef_get_access_keys(self):""" Gets the access keys to use when accessing S3. If none are provided to the class in the constructor or in the settings then get them from the environment variables. """access_key=ACCESS_KEY_NAMEsecret_key=SECRET_KEY_NAMEif(access_keyorsecret_key)and(notaccess_keyornotsecret_key):# TODO: this seems to be brokenaccess_key=os.environ.get(ACCESS_KEY_NAME)secret_key=os.environ.get(SECRET_KEY_NAME)ifaccess_keyandsecret_key:# Both were provided, so use themreturnaccess_key,secret_keyreturnNone,Nonedef_get_or_create_bucket(self,name):"""Retrieves a bucket if it exists, otherwise creates it."""try:returnself.connection.get_bucket(name,validate=AUTO_CREATE_BUCKET)exceptS3ResponseError:ifAUTO_CREATE_BUCKET:bucket=self.connection.create_bucket(name)bucket.set_acl(self.bucket_acl)returnbucketraiseImproperlyConfigured("Bucket specified by ""AWS_STORAGE_BUCKET_NAME does not exist. ""Buckets can be automatically created by setting ""AWS_AUTO_CREATE_BUCKET=True")def_clean_name(self,name):""" Cleans the name so that Windows style paths work """# Useful for windows' pathsreturnos.path.normpath(name).replace('\\','/')def_normalize_name(self,name):""" Normalizes the name so that paths like /path/to/ignored/../something.txt work. We check to make sure that the path pointed to is not outside the directory specified by the LOCATION setting. """try:returnsafe_join(self.location,name)exceptValueError:raiseSuspiciousOperation("Attempted access to '%s' denied."%name)def_encode_name(self,name):returnsmart_str(name,encoding=self.file_name_charset)def_decode_name(self,name):returnforce_unicode(name,encoding=self.file_name_charset)def_compress_content(self,content):"""Gzip a given string content."""zbuf=StringIO()zfile=GzipFile(mode='wb',compresslevel=6,fileobj=zbuf)zfile.write(content.read())zfile.close()content.file=zbufreturncontentdef_open(self,name,mode='rb'):name=self._normalize_name(self._clean_name(name))f=S3BotoStorageFile(name,mode,self)ifnotf.key:raiseIOError('File does not exist: %s'%name)returnfdef_save(self,name,content):cleaned_name=self._clean_name(name)name=self._normalize_name(cleaned_name)headers=self.headers.copy()content_type=getattr(content,'content_type',mimetypes.guess_type(name)[0]orKey.DefaultContentType)ifself.gzipandcontent_typeinself.gzip_content_types:content=self._compress_content(content)headers.update({'Content-Encoding':'gzip'})content.name=cleaned_nameencoded_name=self._encode_name(name)key=self.bucket.get_key(encoded_name)ifnotkey:key=self.bucket.new_key(encoded_name)ifself.preload_metadata:self._entries[encoded_name]=keykey.set_metadata('Content-Type',content_type)key.set_contents_from_file(content,headers=headers,policy=self.acl,reduced_redundancy=self.reduced_redundancy)returncleaned_namedefdelete(self,name):name=self._normalize_name(self._clean_name(name))self.bucket.delete_key(self._encode_name(name))defexists(self,name):name=self._normalize_name(self._clean_name(name))ifself.entries:returnnameinself.entriesk=self.bucket.new_key(self._encode_name(name))returnk.exists()deflistdir(self,name):name=self._normalize_name(self._clean_name(name))# for the bucket.list and logic below name needs to end in /# But for the root path "" we leave it as an empty stringifname:name+='/'dirlist=self.bucket.list(self._encode_name(name))files=[]dirs=set()base_parts=name.split("/")[:-1]foritemindirlist:parts=item.name.split("/")parts=parts[len(base_parts):]iflen(parts)==1:# Filefiles.append(parts[0])eliflen(parts)>1:# Directorydirs.add(parts[0])returnlist(dirs),filesdefsize(self,name):name=self._normalize_name(self._clean_name(name))ifself.entries:entry=self.entries.get(name)ifentry:returnentry.sizereturn0returnself.bucket.get_key(self._encode_name(name)).sizedefmodified_time(self,name):name=self._normalize_name(self._clean_name(name))entry=self.entries.get(name)ifentryisNone:entry=self.bucket.get_key(self._encode_name(name))# Parse the last_modified string to a local datetime object.return_parse_datestring(entry.last_modified)defurl(self,name):name=self._normalize_name(self._clean_name(name))ifself.custom_domain:return"%s://%s/%s"%('https'ifself.secure_urlselse'http',self.custom_domain,name)returnself.connection.generate_url(self.querystring_expire,method='GET',bucket=self.bucket.name,key=self._encode_name(name),query_auth=self.querystring_auth,force_http=notself.secure_urls)defget_available_name(self,name):""" Overwrite existing file with the same name. """ifFILE_OVERWRITE:name=self._clean_name(name)returnnamereturnsuper(S3BotoStorage,self).get_available_name(name)classS3BotoStorageFile(File):""" The default file object used by the S3BotoStorage backend. This file implements file streaming using boto's multipart uploading functionality. The file can be opened in read or write mode. This class extends Django's File class. However, the contained data is only the data contained in the current buffer. So you should not access the contained file object directly. You should access the data via this class. Warning: This file *must* be closed using the close() method in order to properly write the file to S3. Be sure to close the file in your application. """# TODO: Read/Write (rw) mode may be a bit undefined at the moment. Needs testing.# TODO: When Django drops support for Python 2.5, rewrite to use the# BufferedIO streams in the Python 2.6 io module.def__init__(self,name,mode,storage,buffer_size=FILE_BUFFER_SIZE):self._storage=storageself.name=name[len(self._storage.location):].lstrip('/')self._mode=modeself.key=storage.bucket.get_key(self._storage._encode_name(name))ifnotself.keyand'w'inmode:self.key=storage.bucket.new_key(storage._encode_name(name))self._is_dirty=Falseself._file=Noneself._multipart=None# 5 MB is the minimum part size (if there is more than one part).# Amazon allows up to 10,000 parts. The default supports uploads# up to roughly 50 GB. Increase the part size to accommodate# for files larger than this.self._write_buffer_size=buffer_sizeself._write_counter=0@propertydefsize(self):returnself.key.size@propertydeffile(self):ifself._fileisNone:self._file=StringIO()content_type=mimetypes.guess_type(self.name)[0]orKey.DefaultContentTypeif(self._storage.gzipandcontent_typeinself._storage.gzip_content_types):self._file=GzipFile(fileobj=self._file)if'r'inself._mode:self._is_dirty=Falseself.key.get_contents_to_file(self._file)self._file.seek(0)returnself._filedefread(self,*args,**kwargs):if'r'notinself._mode:raiseAttributeError("File was not opened in read mode.")returnsuper(S3BotoStorageFile,self).read(*args,**kwargs)defwrite(self,*args,**kwargs):if'w'notinself._mode:raiseAttributeError("File was not opened in write mode.")self._is_dirty=Trueifself._multipartisNone:provider=self.key.bucket.connection.providerupload_headers={provider.acl_header:self._storage.acl}upload_headers.update(self._storage.headers)self._multipart=self._storage.bucket.initiate_multipart_upload(self.key.name,headers=upload_headers,reduced_redundancy=self._storage.reduced_redundancy)ifself._write_buffer_size<=self._buffer_file_size:self._flush_write_buffer()returnsuper(S3BotoStorageFile,self).write(*args,**kwargs)@propertydef_buffer_file_size(self):pos=self.file.tell()self.file.seek(0,os.SEEK_END)length=self.file.tell()self.file.seek(pos)returnlengthdef_flush_write_buffer(self):""" Flushes the write buffer. """ifself._buffer_file_size:self._write_counter+=1self.file.seek(0)self._multipart.upload_part_from_file(self.file,self._write_counter,headers=self._storage.headers)self.file.close()self._file=Nonedefclose(self):ifself._is_dirty:self._flush_write_buffer()self._multipart.complete_upload()else:ifnotself._multipartisNone:self._multipart.cancel_upload()self.key.close()