#!/usr/bin/env python# -*- coding: utf-8 -*-## Copyright 2011-2012 Codernity (http://codernity.com)## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.fromCodernityDB.indeximport(Index,IndexException,DocIdNotFound,ElemNotFound,TryReindexException,IndexPreconditionsException)importosimportmarshalimportioimportstructimportshutilfromCodernityDB.storageimportIU_Storage,DummyStoragefromCodernityDB.envimportcdb_environmentifcdb_environment.get('rlock_obj'):fromCodernityDBimportpatchpatch.patch_cache_rr(cdb_environment['rlock_obj'])fromCodernityDB.rr_cacheimportcache1lvlfromCodernityDB.miscimportrandom_hex_32try:fromCodernityDBimport__version__exceptImportError:from__init__import__version__classIU_HashIndex(Index):""" That class is for Internal Use only, if you want to use HashIndex just subclass the :py:class:`HashIndex` instead this one. That design is because main index logic should be always in database not in custom user indexes. """def__init__(self,db_path,name,entry_line_format='<32s{key}IIcI',hash_lim=0xfffff,storage_class=None,key_format='c'):""" The index is capable to solve conflicts by `Separate chaining` :param db_path: database path :type db_path: string :param name: index name :type name: ascii string :param line_format: line format, `key_format` parameter value will replace `{key}` if present. :type line_format: string (32s{key}IIcI by default) {doc_id}{hash_key}{start}{size}{status}{next} :param hash_lim: maximum hash functon results (remember about birthday problem) count from 0 :type hash_lim: integer :param storage_class: Storage class by default it will open standard :py:class:`CodernityDB.storage.Storage` (if string has to be accesible by globals()[storage_class]) :type storage_class: class name which will be instance of CodernityDB.storage.Storage instance or None :param key_format: a index key format """ifkey_formatand'{key}'inentry_line_format:entry_line_format=entry_line_format.replace('{key}',key_format)super(IU_HashIndex,self).__init__(db_path,name)self.hash_lim=hash_limifnotstorage_class:storage_class=IU_Storageifstorage_classandnotisinstance(storage_class,basestring):storage_class=storage_class.__name__self.storage_class=storage_classself.storage=Noneself.bucket_line_format="<I"self.bucket_line_size=struct.calcsize(self.bucket_line_format)self.entry_line_format=entry_line_formatself.entry_line_size=struct.calcsize(self.entry_line_format)cache=cache1lvl(100)self._find_key=cache(self._find_key)self._locate_doc_id=cache(self._locate_doc_id)self.bucket_struct=struct.Struct(self.bucket_line_format)self.entry_struct=struct.Struct(self.entry_line_format)self.data_start=(self.hash_lim+1)*self.bucket_line_size+self._start_ind+2def_fix_params(self):super(IU_HashIndex,self)._fix_params()self.bucket_line_size=struct.calcsize(self.bucket_line_format)self.entry_line_size=struct.calcsize(self.entry_line_format)self.bucket_struct=struct.Struct(self.bucket_line_format)self.entry_struct=struct.Struct(self.entry_line_format)self.data_start=(self.hash_lim+1)*self.bucket_line_size+self._start_ind+2defopen_index(self):ifnotos.path.isfile(os.path.join(self.db_path,self.name+'_buck')):raiseIndexException("Doesn't exists")self.buckets=io.open(os.path.join(self.db_path,self.name+"_buck"),'r+b',buffering=0)self._fix_params()self._open_storage()defcreate_index(self):ifos.path.isfile(os.path.join(self.db_path,self.name+'_buck')):raiseIndexException('Already exists')withio.open(os.path.join(self.db_path,self.name+"_buck"),'w+b')asf:props=dict(name=self.name,bucket_line_format=self.bucket_line_format,entry_line_format=self.entry_line_format,hash_lim=self.hash_lim,version=self.__version__,storage_class=self.storage_class)f.write(marshal.dumps(props))self.buckets=io.open(os.path.join(self.db_path,self.name+"_buck"),'r+b',buffering=0)self._create_storage()defdestroy(self):super(IU_HashIndex,self).destroy()self._clear_cache()def_open_storage(self):s=globals()[self.storage_class]ifnotself.storage:self.storage=s(self.db_path,self.name)self.storage.open()def_create_storage(self):s=globals()[self.storage_class]ifnotself.storage:self.storage=s(self.db_path,self.name)self.storage.create()# def close_index(self):# self.buckets.flush()# self.buckets.close()# self.storage.close()# @lfu_cache(100)def_find_key(self,key):""" Find the key position :param key: the key to find """start_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)ifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]ifnotlocation:returnNone,None,0,0,'u'found_at,doc_id,l_key,start,size,status,_next=self._locate_key(key,location)ifstatus=='d':# when first record from many is deletedwhileTrue:found_at,doc_id,l_key,start,size,status,_next=self._locate_key(key,_next)ifstatus!='d':breakreturndoc_id,l_key,start,size,statuselse:returnNone,None,0,0,'u'def_find_key_many(self,key,limit=1,offset=0):location=Nonestart_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)ifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]whileoffset:ifnotlocation:breaktry:found_at,doc_id,l_key,start,size,status,_next=self._locate_key(key,location)exceptIndexException:breakelse:ifstatus!='d':ifl_key==key:# in case of hash function conflictsoffset-=1location=_nextwhilelimit:ifnotlocation:breaktry:found_at,doc_id,l_key,start,size,status,_next=self._locate_key(key,location)exceptIndexException:breakelse:ifstatus!='d':ifself.compare_keys(l_key,key):# in case of hash function conflictsyielddoc_id,start,size,statuslimit-=1location=_nextdefcompare_keys(self,a,b):returna==bdef_calculate_position(self,key):returnabs(hash(key)&self.hash_lim)*self.bucket_line_size+self._start_ind#TODO add cache!def_locate_key(self,key,start):""" Locate position of the key, it will iterate using `next` field in record until required key will be find. :param key: the key to locate :param start: position to start from """location=startwhileTrue:self.buckets.seek(location)data=self.buckets.read(self.entry_line_size)# todo, maybe partial read there...try:doc_id,l_key,start,size,status,_next=self.entry_struct.unpack(data)exceptstruct.error:raiseElemNotFound("Not found")# not found but might be also brokenifself.compare_keys(l_key,key):breakelse:ifnot_next:# not foundraiseElemNotFound("Not found")else:location=_next# go to next recordreturnlocation,doc_id,l_key,start,size,status,_next# @lfu_cache(100)def_locate_doc_id(self,doc_id,key,start):""" Locate position of the doc_id, it will iterate using `next` field in record until required key will be find. :param doc_id: the doc_id to locate :param key: key value :param start: position to start from """location=startwhileTrue:self.buckets.seek(location)data=self.buckets.read(self.entry_line_size)try:l_doc_id,l_key,start,size,status,_next=self.entry_struct.unpack(data)except:raiseDocIdNotFound("Doc_id '%s' for '%s' not found"%(doc_id,key))ifl_doc_id==doc_idandl_key==key:# added for consistencybreakelse:ifnot_next:# not foundraiseDocIdNotFound("Doc_id '%s' for '%s' not found"%(doc_id,key))else:location=_next# go to next recordreturnlocation,doc_id,l_key,start,size,status,_nextdef_find_place(self,start):""" Find a place to where put the key. It will iterate using `next` field in record, until empty `next` found :param start: position to start from """location=startwhileTrue:self.buckets.seek(location)data=self.buckets.read(self.entry_line_size)# todo, maybe partial read there...doc_id,l_key,start,size,status,_next=self.entry_struct.unpack(data)ifnot_nextorstatus=='d':returnself.buckets.tell()-self.entry_line_size,doc_id,l_key,start,size,status,_nextelse:location=_next# go to next recorddefupdate(self,doc_id,key,u_start=0,u_size=0,u_status='o'):start_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)# test if it's unique or not really unique hashifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]else:raiseElemNotFound("Location '%s' not found"%doc_id)found_at,_doc_id,_key,start,size,status,_next=self._locate_doc_id(doc_id,key,location)self.buckets.seek(found_at)self.buckets.write(self.entry_struct.pack(doc_id,key,u_start,u_size,u_status,_next))self.flush()self._find_key.delete(key)self._locate_doc_id.delete(doc_id)returnTruedefinsert(self,doc_id,key,start,size,status='o'):start_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)# conflict occurs?ifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]else:location=0iflocation:# last key with that hashtry:found_at,_doc_id,_key,_start,_size,_status,_next=self._locate_doc_id(doc_id,key,location)exceptDocIdNotFound:found_at,_doc_id,_key,_start,_size,_status,_next=self._find_place(location)self.buckets.seek(0,2)wrote_at=self.buckets.tell()self.buckets.write(self.entry_struct.pack(doc_id,key,start,size,status,_next))self.flush()self.buckets.seek(found_at)self.buckets.write(self.entry_struct.pack(_doc_id,_key,_start,_size,_status,wrote_at))else:self.buckets.seek(found_at)self.buckets.write(self.entry_struct.pack(doc_id,key,start,size,status,_next))self.flush()self._locate_doc_id.delete(doc_id)self._find_key.delete(_key)# self._find_key.delete(key)# self._locate_key.delete(_key)returnTrue# raise NotImplementedErrorelse:self.buckets.seek(0,2)wrote_at=self.buckets.tell()# check if position is bigger than all hash entries...ifwrote_at<self.data_start:self.buckets.seek(self.data_start)wrote_at=self.buckets.tell()self.buckets.write(self.entry_struct.pack(doc_id,key,start,size,status,0))self.flush()self._find_key.delete(key)self.buckets.seek(start_position)self.buckets.write(self.bucket_struct.pack(wrote_at))self.flush()returnTruedefget(self,key):returnself._find_key(self.make_key(key))defget_many(self,key,limit=1,offset=0):returnself._find_key_many(self.make_key(key),limit,offset)defall(self,limit=-1,offset=0):self.buckets.seek(self.data_start)whileoffset:curr_data=self.buckets.read(self.entry_line_size)ifnotcurr_data:breaktry:doc_id,key,start,size,status,_next=self.entry_struct.unpack(curr_data)exceptIndexException:breakelse:ifstatus!='d':offset-=1whilelimit:curr_data=self.buckets.read(self.entry_line_size)ifnotcurr_data:breaktry:doc_id,key,start,size,status,_next=self.entry_struct.unpack(curr_data)exceptIndexException:breakelse:ifstatus!='d':yielddoc_id,key,start,size,statuslimit-=1def_fix_link(self,key,pos_prev,pos_next):# CHECKIT why I need that hackifpos_prev>=self.data_start:self.buckets.seek(pos_prev)data=self.buckets.read(self.entry_line_size)ifdata:doc_id,l_key,start,size,status,_next=self.entry_struct.unpack(data)self.buckets.seek(pos_prev)self.buckets.write(self.entry_struct.pack(doc_id,l_key,start,size,status,pos_next))self.flush()ifpos_next:self.buckets.seek(pos_next)data=self.buckets.read(self.entry_line_size)ifdata:doc_id,l_key,start,size,status,_next=self.entry_struct.unpack(data)self.buckets.seek(pos_next)self.buckets.write(self.entry_struct.pack(doc_id,l_key,start,size,status,_next))self.flush()returndefdelete(self,doc_id,key,start=0,size=0):start_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)ifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]else:#case happens when trying to delete element with new index key in data#after adding new index to database without reindexraiseTryReindexException()found_at,_doc_id,_key,start,size,status,_next=self._locate_doc_id(doc_id,key,location)self.buckets.seek(found_at)self.buckets.write(self.entry_struct.pack(doc_id,key,start,size,'d',_next))self.flush()# self._fix_link(_key, _prev, _next)self._find_key.delete(key)self._locate_doc_id.delete(doc_id)returnTruedefcompact(self,hash_lim=None):ifnothash_lim:hash_lim=self.hash_limcompact_ind=self.__class__(self.db_path,self.name+'_compact',hash_lim=hash_lim)compact_ind.create_index()gen=self.all()whileTrue:try:doc_id,key,start,size,status=gen.next()exceptStopIteration:breakself.storage._f.seek(start)value=self.storage._f.read(size)start_=compact_ind.storage._f.tell()compact_ind.storage._f.write(value)compact_ind.insert(doc_id,key,start_,size,status)compact_ind.close_index()original_name=self.name# os.unlink(os.path.join(self.db_path, self.name + "_buck"))self.close_index()shutil.move(os.path.join(compact_ind.db_path,compact_ind.name+"_buck"),os.path.join(self.db_path,self.name+"_buck"))shutil.move(os.path.join(compact_ind.db_path,compact_ind.name+"_stor"),os.path.join(self.db_path,self.name+"_stor"))# self.name = original_nameself.open_index()# reload...self.name=original_nameself._save_params(dict(name=original_name))self._fix_params()self._clear_cache()returnTruedefmake_key(self,key):returnkeydefmake_key_value(self,data):return'1',datadef_clear_cache(self):self._find_key.clear()self._locate_doc_id.clear()defclose_index(self):super(IU_HashIndex,self).close_index()self._clear_cache()classIU_UniqueHashIndex(IU_HashIndex):""" Index for *unique* keys! Designed to be a **id** index. That class is for Internal Use only, if you want to use UniqueHashIndex just subclass the :py:class:`UniqueHashIndex` instead this one. That design is because main index logic should be always in database not in custom user indexes. """def__init__(self,db_path,name,entry_line_format="<32s8sIIcI",*args,**kwargs):if'key'inkwargs:raiseIndexPreconditionsException("UniqueHashIndex doesn't accept key parameter'")super(IU_UniqueHashIndex,self).__init__(db_path,name,entry_line_format,*args,**kwargs)self.create_key=random_hex_32# : set the function to create random key when no _id given#self.entry_struct=struct.Struct(entry_line_format)# @lfu_cache(100)def_find_key(self,key):""" Find the key position :param key: the key to find """start_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)ifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]found_at,l_key,rev,start,size,status,_next=self._locate_key(key,location)returnl_key,rev,start,size,statuselse:returnNone,None,0,0,'u'def_find_key_many(self,*args,**kwargs):raiseNotImplementeddef_find_place(self,start,key):""" Find a place to where put the key. It will iterate using `next` field in record, until empty `next` found :param start: position to start from """location=startwhileTrue:self.buckets.seek(location)data=self.buckets.read(self.entry_line_size)# todo, maybe partial read there...l_key,rev,start,size,status,_next=self.entry_struct.unpack(data)ifl_key==key:raiseIndexException("The '%s' key already exists"%key)ifnot_nextorstatus=='d':returnself.buckets.tell()-self.entry_line_size,l_key,rev,start,size,status,_nextelse:location=_next# go to next record# @lfu_cache(100)def_locate_key(self,key,start):""" Locate position of the key, it will iterate using `next` field in record until required key will be find. :param key: the key to locate :param start: position to start from """location=startwhileTrue:self.buckets.seek(location)data=self.buckets.read(self.entry_line_size)# todo, maybe partial read there...try:l_key,rev,start,size,status,_next=self.entry_struct.unpack(data)exceptstruct.error:raiseElemNotFound("Location '%s' not found"%key)ifl_key==key:breakelse:ifnot_next:# not foundraiseElemNotFound("Location '%s' not found"%key)else:location=_next# go to next recordreturnself.buckets.tell()-self.entry_line_size,l_key,rev,start,size,status,_nextdefupdate(self,key,rev,u_start=0,u_size=0,u_status='o'):start_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)# test if it's unique or not really unique hashifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]else:raiseElemNotFound("Location '%s' not found"%key)found_at,_key,_rev,start,size,status,_next=self._locate_key(key,location)ifu_start==0:u_start=startifu_size==0:u_size=sizeself.buckets.seek(found_at)self.buckets.write(self.entry_struct.pack(key,rev,u_start,u_size,u_status,_next))self.flush()self._find_key.delete(key)returnTruedefinsert(self,key,rev,start,size,status='o'):start_position=self._calculate_position(key)self.buckets.seek(start_position)curr_data=self.buckets.read(self.bucket_line_size)# conflict occurs?ifcurr_data:location=self.bucket_struct.unpack(curr_data)[0]else:location=0iflocation:# last key with that hashfound_at,_key,_rev,_start,_size,_status,_next=self._find_place(location,key)self.buckets.seek(0,2)wrote_at=self.buckets.tell()# check if position is bigger than all hash entries...ifwrote_at<self.data_start:self.buckets.seek(self.data_start)wrote_at=self.buckets.tell()self.buckets.write(self.entry_struct.pack(key,rev,start,size,status,_next))self.flush()self.buckets.seek(found_at)self.buckets.write(self.entry_struct.pack(_key,_rev,_start,_size,_status,wrote_at))self.flush()self._find_key.delete(_key)# self._locate_key.delete(_key)returnTrue# raise NotImplementedErrorelse:self.buckets.seek(0,2)wrote_at=self.buckets.tell()# check if position is bigger than all hash entries...ifwrote_at<self.data_start:self.buckets.seek(self.data_start)wrote_at=self.buckets.tell()self.buckets.write(self.entry_struct.pack(key,rev,start,size,status,0))self.flush()self.buckets.seek(start_position)self.buckets.write(self.bucket_struct.pack(wrote_at))self.flush()self._find_key.delete(key)returnTruedefall(self,limit=-1,offset=0):self.buckets.seek(self.data_start)whileoffset:curr_data=self.buckets.read(self.entry_line_size)ifnotcurr_data:breaktry:doc_id,rev,start,size,status,next=self.entry_struct.unpack(curr_data)exceptIndexException:breakelse:ifstatus!='d':offset-=1whilelimit:curr_data=self.buckets.read(self.entry_line_size)ifnotcurr_data:breaktry:doc_id,rev,start,size,status,next=self.entry_struct.unpack(curr_data)exceptIndexException:breakelse:ifstatus!='d':yielddoc_id,rev,start,size,statuslimit-=1defget_many(self,*args,**kwargs):raiseNotImplementeddefdelete(self,key,start=0,size=0):self.update(key,'0000',start,size,'d')defmake_key_value(self,data):_id=data['_id']try:_id=bytes(data['_id'])except:raiseIndexPreconditionsException("_id must be valid string/bytes object")iflen(_id)!=32:raiseIndexPreconditionsException("Invalid _id lenght")deldata['_id']deldata['_rev']return_id,datadefdestroy(self):Index.destroy(self)self._clear_cache()def_clear_cache(self):self._find_key.clear()classDummyHashIndex(IU_HashIndex):def__init__(self,db_path,name,entry_line_format="<32s4sIIcI",*args,**kwargs):super(DummyHashIndex,self).__init__(db_path,name,entry_line_format,*args,**kwargs)self.create_key=random_hex_32# : set the function to create random key when no _id given#self.entry_struct=struct.Struct(entry_line_format)defupdate(self,*args,**kwargs):returnTruedefinsert(self,*args,**kwargs):returnTruedefall(self,*args,**kwargs):raiseStopIterationdefget(self,*args,**kwargs):raiseElemNotFounddefget_many(self,*args,**kwargs):raiseStopIterationdefdelete(self,*args,**kwargs):passdefmake_key_value(self,data):return'1',{'_':1}defdestroy(self):passdef_clear_cache(self):passdef_open_storage(self):ifnotself.storage:self.storage=DummyStorage()self.storage.open()def_create_storage(self):ifnotself.storage:self.storage=DummyStorage()self.storage.create()# classes for public use, done in this way because of# generation static files with indexes (_index directory)classHashIndex(IU_HashIndex):""" That class is designed to be used in custom indexes. """passclassUniqueHashIndex(IU_UniqueHashIndex):""" That class is designed to be used in custom indexes. It's designed to be **id** index. """pass