import struct import types import cStringIO import string class IndexedPage: #-- pageType,pageLength,directoryLength PAGE_HEADER_STRUCTURE="!III" PAGE_HEADER_STRUCTURE_length=struct.calcsize(PAGE_HEADER_STRUCTURE) DIRECTORY_STRUCTURE="!IIB" DIRECTORY_STRUCTURE_length=struct.calcsize(DIRECTORY_STRUCTURE) #-- kwargs: #-- source= #-- data= def __init__(self,kwdict=None,**kwargs): self.storage="memory" self.directory={} self.cache={} self.pageType=-1 self.source=None self.sourceOffset=0 self.baseAddress=0 if kwdict is not None: kwargs=kwdict if kwargs.has_key('source'): self.storage="disk" self.source=kwargs['source'] elif kwargs.has_key('data'): #-- I'll use data to construct a temporary memory file. self.source=cStringIO.StringIO(kwargs['data']) else: pass if self.source is not None: #-- The offset of this index page relative to the file. self.sourceOffset=self.source.tell() #-- Read the page header. self.pageType,pageLength,directoryLength=struct.unpack(IndexedPage.PAGE_HEADER_STRUCTURE,self.source.read(IndexedPage.PAGE_HEADER_STRUCTURE_length)) #print pageLength,directoryLength #-- Compute the base address of the entries (not the directory). self.baseAddress=self.sourceOffset+IndexedPage.PAGE_HEADER_STRUCTURE_length+directoryLength #-- Read the directory, and parse it. directoryData=self.source.read(directoryLength) self.parseDirectory(directoryData) #-- Check for aggressive caching. if kwargs.has_key('cache'): #-- Yep, load the entire page into memory. self.cache=self.readAll() self.storage="memory" elif kwargs.has_key('data'): self.cache=self.readAll() self.source.close() else: #-- Nope. Position the file pointer at the end of the page. self.source.seek(self.sourceOffset+pageLength) #-- Returns a list of keys that are in the index. def keys(self): if self.storage=="memory": return self.cache.keys() if self.storage=="disk": return self.directory.keys() def has_key(self,key): if self.storage=="memory": return self.cache.has_key(key) if self.storage=="disk": return self.directory.has_key(key) def __getitem__(self,indexTerm): if self.storage=="disk": return self.readEntry(indexTerm) else: return self.cache[indexTerm] def __setitem__(self,index,value): if self.storage=="disk": raise Exception("IndexedPage is on disk and is therefore read only") self.cache[index]=value def readEntry(self,indexTerm): entryOffset,entryLength=self.directory[indexTerm] self.source.seek(self.baseAddress+entryOffset) return self.unserializeEntry(self.source.read(entryLength)) def readAll(self): cache={} for indexTerm in self.directory.keys(): cache[indexTerm]=self.readEntry(indexTerm) return cache def parseDirectory(self,directoryData): i=0 while i1: size=len(l) l2=[] for i in range(size/2): l2.append(f(l[i*2],l[i*2+1])) if size%2!=0: l2.append(l[size-1]) l=l2 return l[0]