filters

pole.cpp

00001 /* POLE - Portable C++ library to access OLE Storage 
00002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
00003 
00004    Redistribution and use in source and binary forms, with or without 
00005    modification, are permitted provided that the following conditions 
00006    are met:
00007    * Redistributions of source code must retain the above copyright notice, 
00008      this list of conditions and the following disclaimer.
00009    * Redistributions in binary form must reproduce the above copyright notice, 
00010      this list of conditions and the following disclaimer in the documentation 
00011      and/or other materials provided with the distribution.
00012    * Neither the name of the authors nor the names of its contributors may be 
00013      used to endorse or promote products derived from this software without 
00014      specific prior written permission.
00015 
00016    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
00017    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
00018    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
00019    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
00020    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
00021    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
00022    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
00023    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
00024    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
00025    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
00026    THE POSSIBILITY OF SUCH DAMAGE.
00027 */
00028 
00029 #include <fstream>
00030 #include <iostream>
00031 #include <list>
00032 #include <string>
00033 #include <vector>
00034 
00035 #include <string.h>
00036 
00037 #include "pole.h"
00038 
00039 // enable to activate debugging output
00040 // #define POLE_DEBUG
00041 
00042 namespace POLE
00043 {
00044 
00045 class Header
00046 {
00047   public:
00048     unsigned char id[8];       // signature, or magic identifier
00049     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
00050     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
00051     unsigned num_bat;          // blocks allocated for big bat
00052     unsigned dirent_start;     // starting block for directory info
00053     unsigned threshold;        // switch from small to big file (usually 4K)
00054     unsigned sbat_start;       // starting block index to store small bat
00055     unsigned num_sbat;         // blocks allocated for small bat
00056     unsigned mbat_start;       // starting block to store meta bat
00057     unsigned num_mbat;         // blocks allocated for meta bat
00058     unsigned long bb_blocks[109];
00059     
00060     Header();
00061     bool valid();
00062     void load( const unsigned char* buffer );
00063     void save( unsigned char* buffer );
00064     void debug();
00065 };
00066 
00067 class AllocTable
00068 {
00069   public:
00070     static const unsigned Eof;
00071     static const unsigned Avail;
00072     static const unsigned Bat;    
00073     static const unsigned MetaBat;    
00074     unsigned blockSize;
00075     AllocTable();
00076     void clear();
00077     unsigned long count();
00078     void resize( unsigned long newsize );
00079     void preserve( unsigned long n );
00080     void set( unsigned long index, unsigned long val );
00081     unsigned unused();
00082     void setChain( std::vector<unsigned long> );
00083     std::vector<unsigned long> follow( unsigned long start );
00084     unsigned long operator[](unsigned long index );
00085     void load( const unsigned char* buffer, unsigned len );
00086     void save( unsigned char* buffer );
00087     unsigned size();
00088     void debug();
00089   private:
00090     std::vector<unsigned long> data;
00091     AllocTable( const AllocTable& );
00092     AllocTable& operator=( const AllocTable& );
00093 };
00094 
00095 class DirEntry
00096 {
00097   public:
00098     bool valid;            // false if invalid (should be skipped)
00099     std::string name;      // the name, not in unicode anymore 
00100     bool dir;              // true if directory   
00101     unsigned long size;    // size (not valid if directory)
00102     unsigned long start;   // starting block
00103     unsigned prev;         // previous sibling
00104     unsigned next;         // next sibling
00105     unsigned child;        // first child
00106 };
00107 
00108 class DirTree
00109 {
00110   public:
00111     static const unsigned End;
00112     DirTree();
00113     void clear();
00114     unsigned entryCount();
00115     DirEntry* entry( unsigned index );
00116     DirEntry* entry( const std::string& name, bool create=false );
00117     int indexOf( DirEntry* e );
00118     int parent( unsigned index );
00119     std::string fullName( unsigned index );
00120     std::vector<unsigned> children( unsigned index );
00121     void load( unsigned char* buffer, unsigned len );
00122     void save( unsigned char* buffer );
00123     unsigned size();
00124     void debug();
00125   private:
00126     std::vector<DirEntry> entries;
00127     DirTree( const DirTree& );
00128     DirTree& operator=( const DirTree& );
00129 };
00130 
00131 class StorageIO
00132 {
00133   public:
00134     Storage* storage;         // owner
00135     std::string filename;     // filename
00136     std::fstream file;        // associated with above name
00137     int result;               // result of operation
00138     bool opened;              // true if file is opened
00139     unsigned long filesize;   // size of the file
00140     
00141     Header* header;           // storage header 
00142     DirTree* dirtree;         // directory tree
00143     AllocTable* bbat;         // allocation table for big blocks
00144     AllocTable* sbat;         // allocation table for small blocks
00145     
00146     std::vector<unsigned long> sb_blocks; // blocks for "small" files
00147        
00148     std::list<Stream*> streams;
00149 
00150     StorageIO( Storage* storage, const char* filename );
00151     ~StorageIO();
00152     
00153     bool open();
00154     void close();
00155     void flush();
00156     void load();
00157     void create();
00158 
00159     unsigned long loadBigBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00160 
00161     unsigned long loadBigBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00162 
00163     unsigned long loadSmallBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00164 
00165     unsigned long loadSmallBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00166     
00167     StreamIO* streamIO( const std::string& name ); 
00168 
00169   private:  
00170     // no copy or assign
00171     StorageIO( const StorageIO& );
00172     StorageIO& operator=( const StorageIO& );
00173 
00174 };
00175 
00176 class StreamIO
00177 {
00178   public:
00179     StorageIO* io;
00180     DirEntry* entry;
00181     std::string fullName;
00182     bool eof;
00183     bool fail;
00184 
00185     StreamIO( StorageIO* io, DirEntry* entry );
00186     ~StreamIO();
00187     unsigned long size();
00188     void seek( unsigned long pos );
00189     unsigned long tell();
00190     int getch();
00191     unsigned long read( unsigned char* data, unsigned long maxlen );
00192     unsigned long read( unsigned long pos, unsigned char* data, unsigned long maxlen );
00193 
00194 
00195   private:
00196     std::vector<unsigned long> blocks;
00197 
00198     // no copy or assign
00199     StreamIO( const StreamIO& );
00200     StreamIO& operator=( const StreamIO& );
00201 
00202     // pointer for read
00203     unsigned long m_pos;
00204 
00205     // simple cache system to speed-up getch()
00206     unsigned char* cache_data;
00207     unsigned long cache_size;
00208     unsigned long cache_pos;
00209     void updateCache();
00210 };
00211 
00212 } // namespace POLE
00213 
00214 using namespace POLE;
00215 
00216 static inline unsigned long readU16( const unsigned char* ptr )
00217 {
00218   return ptr[0]+(ptr[1]<<8);
00219 }
00220 
00221 static inline unsigned long readU32( const unsigned char* ptr )
00222 {
00223   return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
00224 }
00225 
00226 static inline void writeU16( unsigned char* ptr, unsigned long data )
00227 {
00228   ptr[0] = (unsigned char)(data & 0xff);
00229   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00230 }
00231 
00232 static inline void writeU32( unsigned char* ptr, unsigned long data )
00233 {
00234   ptr[0] = (unsigned char)(data & 0xff);
00235   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00236   ptr[2] = (unsigned char)((data >> 16) & 0xff);
00237   ptr[3] = (unsigned char)((data >> 24) & 0xff);
00238 }
00239 
00240 static const unsigned char pole_magic[] = 
00241  { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
00242 
00243 // =========== Header ==========
00244 
00245 Header::Header()
00246 {
00247   b_shift = 9;
00248   s_shift = 6;
00249   num_bat = 0;
00250   dirent_start = 0;
00251   threshold = 4096;
00252   sbat_start = 0;
00253   num_sbat = 0;
00254   mbat_start = 0;
00255   num_mbat = 0;
00256 
00257   for( unsigned i = 0; i < 8; i++ )
00258     id[i] = pole_magic[i];  
00259   for( unsigned i=0; i<109; i++ )
00260     bb_blocks[i] = AllocTable::Avail;
00261 }
00262 
00263 bool Header::valid()
00264 {
00265   if( threshold != 4096 ) return false;
00266   if( num_bat == 0 ) return false;
00267   if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
00268   if( (num_bat < 109) && (num_mbat != 0) ) return false;
00269   if( s_shift > b_shift ) return false;
00270   if( b_shift <= 6 ) return false;
00271   if( b_shift >=31 ) return false;
00272   
00273   return true;
00274 }
00275 
00276 void Header::load( const unsigned char* buffer )
00277 {
00278   b_shift      = readU16( buffer + 0x1e );
00279   s_shift      = readU16( buffer + 0x20 );
00280   num_bat      = readU32( buffer + 0x2c );
00281   dirent_start = readU32( buffer + 0x30 );
00282   threshold    = readU32( buffer + 0x38 );
00283   sbat_start   = readU32( buffer + 0x3c );
00284   num_sbat     = readU32( buffer + 0x40 );
00285   mbat_start   = readU32( buffer + 0x44 );
00286   num_mbat     = readU32( buffer + 0x48 );
00287   
00288   for( unsigned i = 0; i < 8; i++ )
00289     id[i] = buffer[i];  
00290   for( unsigned i=0; i<109; i++ )
00291     bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
00292 }
00293 
00294 void Header::save( unsigned char* buffer )
00295 {
00296   memset( buffer, 0, 0x4c );
00297   memcpy( buffer, pole_magic, 8 );        // ole signature
00298   writeU32( buffer + 8, 0 );              // unknown 
00299   writeU32( buffer + 12, 0 );             // unknown
00300   writeU32( buffer + 16, 0 );             // unknown
00301   writeU16( buffer + 24, 0x003e );        // revision ?
00302   writeU16( buffer + 26, 3 );             // version ?
00303   writeU16( buffer + 28, 0xfffe );        // unknown
00304   writeU16( buffer + 0x1e, b_shift );
00305   writeU16( buffer + 0x20, s_shift );
00306   writeU32( buffer + 0x2c, num_bat );
00307   writeU32( buffer + 0x30, dirent_start );
00308   writeU32( buffer + 0x38, threshold );
00309   writeU32( buffer + 0x3c, sbat_start );
00310   writeU32( buffer + 0x40, num_sbat );
00311   writeU32( buffer + 0x44, mbat_start );
00312   writeU32( buffer + 0x48, num_mbat );
00313   
00314   for( unsigned i=0; i<109; i++ )
00315     writeU32( buffer + 0x4C+i*4, bb_blocks[i] );
00316 }
00317 
00318 void Header::debug()
00319 {
00320   std::cout << std::endl;
00321   std::cout << "b_shift " << b_shift << std::endl;
00322   std::cout << "s_shift " << s_shift << std::endl;
00323   std::cout << "num_bat " << num_bat << std::endl;
00324   std::cout << "dirent_start " << dirent_start << std::endl;
00325   std::cout << "threshold " << threshold << std::endl;
00326   std::cout << "sbat_start " << sbat_start << std::endl;
00327   std::cout << "num_sbat " << num_sbat << std::endl;
00328   std::cout << "mbat_start " << mbat_start << std::endl;
00329   std::cout << "num_mbat " << num_mbat << std::endl;
00330   
00331   unsigned s = (num_bat<=109) ? num_bat : 109;
00332   std::cout << "bat blocks: ";
00333   for( unsigned i = 0; i < s; i++ )
00334     std::cout << bb_blocks[i] << " ";
00335   std::cout << std::endl;
00336 }
00337  
00338 // =========== AllocTable ==========
00339 
00340 const unsigned AllocTable::Avail = 0xffffffff;
00341 const unsigned AllocTable::Eof = 0xfffffffe;
00342 const unsigned AllocTable::Bat = 0xfffffffd;
00343 const unsigned AllocTable::MetaBat = 0xfffffffc;
00344 
00345 AllocTable::AllocTable()
00346 {
00347   blockSize = 4096;
00348   // initial size
00349   resize( 128 );
00350 }
00351 
00352 unsigned long AllocTable::count()
00353 {
00354   return data.size();
00355 }
00356 
00357 void AllocTable::resize( unsigned long newsize )
00358 {
00359   unsigned oldsize = data.size();
00360   data.resize( newsize );
00361   if( newsize > oldsize )
00362     for( unsigned i = oldsize; i<newsize; i++ )
00363       data[i] = Avail;
00364 }
00365 
00366 // make sure there're still free blocks
00367 void AllocTable::preserve( unsigned long n )
00368 {
00369   std::vector<unsigned long> pre;
00370   for( unsigned i=0; i < n; i++ )
00371     pre.push_back( unused() );
00372 }
00373 
00374 unsigned long AllocTable::operator[]( unsigned long index )
00375 {
00376   unsigned long result;
00377   result = data[index];
00378   return result;
00379 }
00380 
00381 void AllocTable::set( unsigned long index, unsigned long value )
00382 {
00383   if( index >= count() ) resize( index + 1);
00384   data[ index ] = value;
00385 }
00386 
00387 void AllocTable::setChain( std::vector<unsigned long> chain )
00388 {
00389   if( chain.size() )
00390   {
00391     for( unsigned i=0; i<chain.size()-1; i++ )
00392       set( chain[i], chain[i+1] );
00393     set( chain[ chain.size()-1 ], AllocTable::Eof );
00394   }
00395 }
00396 
00397 // follow 
00398 std::vector<unsigned long> AllocTable::follow( unsigned long start )
00399 {
00400   std::vector<unsigned long> chain;
00401 
00402   if( start >= count() ) return chain; 
00403 
00404   unsigned long p = start;
00405   while( p < count() )
00406   {
00407     if( p == (unsigned long)Eof ) break;
00408     if( p == (unsigned long)Bat ) break;
00409     if( p == (unsigned long)MetaBat ) break;
00410     if( p >= count() ) break;
00411     chain.push_back( p );
00412     if( data[p] >= count() ) break;
00413     p = data[ p ];
00414   }
00415 
00416   return chain;
00417 }
00418 
00419 unsigned AllocTable::unused()
00420 {
00421   // find first available block
00422   for( unsigned i = 0; i < data.size(); i++ )
00423     if( data[i] == Avail )
00424       return i;
00425   
00426   // completely full, so enlarge the table
00427   unsigned block = data.size();
00428   resize( data.size()+10 );
00429   return block;      
00430 }
00431 
00432 void AllocTable::load( const unsigned char* buffer, unsigned len )
00433 {
00434   resize( len / 4 );
00435   for( unsigned i = 0; i < count(); i++ )
00436     set( i, readU32( buffer + i*4 ) );
00437 }
00438 
00439 // return space required to save this dirtree
00440 unsigned AllocTable::size()
00441 {
00442   return count() * 4;
00443 }
00444 
00445 void AllocTable::save( unsigned char* buffer )
00446 {
00447   for( unsigned i = 0; i < count(); i++ )
00448     writeU32( buffer + i*4, data[i] );
00449 }
00450 
00451 void AllocTable::debug()
00452 {
00453   std::cout << "block size " << data.size() << std::endl;
00454   for( unsigned i=0; i< data.size(); i++ )
00455   {
00456      if( data[i] == Avail ) continue;
00457      std::cout << i << ": ";
00458      if( data[i] == Eof ) std::cout << "[eof]";
00459      else if( data[i] == Bat ) std::cout << "[bat]";
00460      else if( data[i] == MetaBat ) std::cout << "[metabat]";
00461      else std::cout << data[i];
00462      std::cout << std::endl;
00463   }
00464 }
00465 
00466 // =========== DirTree ==========
00467 
00468 const unsigned DirTree::End = 0xffffffff;
00469 
00470 DirTree::DirTree()
00471 {
00472   clear();
00473 }
00474 
00475 void DirTree::clear()
00476 {
00477   // leave only root entry
00478   entries.resize( 1 );
00479   entries[0].valid = true;
00480   entries[0].name = "Root Entry";
00481   entries[0].dir = true;
00482   entries[0].size = 0;
00483   entries[0].start = End;
00484   entries[0].prev = End;
00485   entries[0].next = End;
00486   entries[0].child = End;
00487 }
00488 
00489 unsigned DirTree::entryCount()
00490 {
00491   return entries.size();
00492 }
00493 
00494 DirEntry* DirTree::entry( unsigned index )
00495 {
00496   if( index >= entryCount() ) return (DirEntry*) 0;
00497   return &entries[ index ];
00498 }
00499 
00500 int DirTree::indexOf( DirEntry* e )
00501 {
00502   for( unsigned i = 0; i < entryCount(); i++ )
00503     if( entry( i ) == e ) return i;
00504     
00505   return -1;
00506 }
00507 
00508 int DirTree::parent( unsigned index )
00509 {
00510   // brute-force, basically we iterate for each entries, find its children
00511   // and check if one of the children is 'index'
00512   for( unsigned j=0; j<entryCount(); j++ )
00513   {
00514     std::vector<unsigned> chi = children( j );
00515     for( unsigned i=0; i<chi.size();i++ )
00516       if( chi[i] == index )
00517         return j;
00518   }
00519         
00520   return -1;
00521 }
00522 
00523 std::string DirTree::fullName( unsigned index )
00524 {
00525   // don't use root name ("Root Entry"), just give "/"
00526   if( index == 0 ) return "/";
00527 
00528   std::string result = entry( index )->name;
00529   result.insert( 0,  "/" );
00530   int p = parent( index );
00531   DirEntry * _entry = 0;
00532   while( p > 0 )
00533   {
00534     _entry = entry( p );
00535     if (_entry->dir && _entry->valid)
00536     {
00537       result.insert( 0,  _entry->name);
00538       result.insert( 0,  "/" );
00539     }
00540     --p;
00541     index = p;
00542     if( index <= 0 ) break;
00543   }
00544   return result;
00545 }
00546 
00547 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
00548 // if not found and create is false, return 0
00549 // if create is true, a new entry is returned
00550 DirEntry* DirTree::entry( const std::string& name, bool create )
00551 {
00552    if( !name.length() ) return (DirEntry*)0;
00553  
00554    // quick check for "/" (that's root)
00555    if( name == "/" ) return entry( 0 );
00556    
00557    // split the names, e.g  "/ObjectPool/_1020961869" will become:
00558    // "ObjectPool" and "_1020961869" 
00559    std::list<std::string> names;
00560    std::string::size_type start = 0, end = 0;
00561    if( name[0] == '/' ) start++;
00562    while( start < name.length() )
00563    {
00564      end = name.find_first_of( '/', start );
00565      if( end == std::string::npos ) end = name.length();
00566      names.push_back( name.substr( start, end-start ) );
00567      start = end+1;
00568    }
00569   
00570    // start from root 
00571    int index = 0 ;
00572 
00573    // trace one by one   
00574    std::list<std::string>::iterator it; 
00575 
00576    for( it = names.begin(); it != names.end(); ++it )
00577    {
00578      // find among the children of index
00579      std::vector<unsigned> chi = children( index );
00580      unsigned child = 0;
00581      for( unsigned i = 0; i < chi.size(); i++ )
00582      {
00583        DirEntry* ce = entry( chi[i] );
00584        if( ce ) 
00585        if( ce->valid && ( ce->name.length()>1 ) )
00586        if( ce->name == *it )
00587              child = chi[i];
00588      }
00589      
00590      // traverse to the child
00591      if( child > 0 ) index = child;
00592      else
00593      {
00594        // not found among children
00595        if( !create ) return (DirEntry*)0;
00596        
00597        // create a new entry
00598        unsigned parent = index;
00599        entries.push_back( DirEntry() );
00600        index = entryCount()-1;
00601        DirEntry* e = entry( index );
00602        e->valid = true;
00603        e->name = *it;
00604        e->dir = false;
00605        e->size = 0;
00606        e->start = 0;
00607        e->child = End;
00608        e->prev = End;
00609        e->next = entry(parent)->child;
00610        entry(parent)->child = index;
00611      }
00612    }
00613 
00614    return entry( index );
00615 }
00616 
00617 // helper function: recursively find siblings of index
00618 void dirtree_find_siblings( DirTree* dirtree, std::vector<unsigned>& result, 
00619   unsigned index )
00620 {
00621   DirEntry* e = dirtree->entry( index );
00622   if( !e ) return;
00623   if( !e->valid ) return;
00624 
00625   // prevent infinite loop  
00626   for( unsigned i = 0; i < result.size(); i++ )
00627     if( result[i] == index ) return;
00628 
00629   // add myself    
00630   result.push_back( index );
00631   
00632   // visit previous sibling, don't go infinitely
00633   unsigned prev = e->prev;
00634   if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) )
00635   {
00636     for( unsigned i = 0; i < result.size(); i++ )
00637       if( result[i] == prev ) prev = 0;
00638     if( prev ) dirtree_find_siblings( dirtree, result, prev );
00639   }
00640     
00641   // visit next sibling, don't go infinitely
00642   unsigned next = e->next;
00643   if( ( next > 0 ) && ( next < dirtree->entryCount() ) )
00644   {
00645     for( unsigned i = 0; i < result.size(); i++ )
00646       if( result[i] == next ) next = 0;
00647     if( next ) dirtree_find_siblings( dirtree, result, next );
00648   }
00649 }
00650 
00651 std::vector<unsigned> DirTree::children( unsigned index )
00652 {
00653   std::vector<unsigned> result;
00654   
00655   DirEntry* e = entry( index );
00656   if( e ) if( e->valid && e->child < entryCount() )
00657     dirtree_find_siblings( this, result, e->child );
00658     
00659   return result;
00660 }
00661 
00662 void DirTree::load( unsigned char* buffer, unsigned size )
00663 {
00664   entries.clear();
00665   
00666   for( unsigned i = 0; i < size/128; i++ )
00667   {
00668     unsigned p = i * 128;
00669     
00670     // would be < 32 if first char in the name isn't printable
00671     unsigned prefix = 32;
00672     
00673     // parse name of this entry, which stored as Unicode 16-bit
00674     std::string name;
00675     int name_len = readU16( buffer + 0x40+p );
00676     if( name_len > 64 ) name_len = 64;
00677     for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 )
00678       name.append( 1, buffer[j+p] );
00679       
00680     // first char isn't printable ? remove it...
00681     if( buffer[p] < 32 )
00682     { 
00683       prefix = buffer[0]; 
00684       name.erase( 0,1 ); 
00685     }
00686     
00687     // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
00688     unsigned type = buffer[ 0x42 + p];
00689     
00690     DirEntry e;
00691     e.valid = true;
00692     e.name = name;
00693     e.start = readU32( buffer + 0x74+p );
00694     e.size = readU32( buffer + 0x78+p );
00695     e.prev = readU32( buffer + 0x44+p );
00696     e.next = readU32( buffer + 0x48+p );
00697     e.child = readU32( buffer + 0x4C+p );
00698     e.dir = ( type!=2 );
00699     
00700     // sanity checks
00701     if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false;
00702     if( name_len < 1 ) e.valid = false;
00703     
00704     entries.push_back( e );
00705   }  
00706 }
00707 
00708 // return space required to save this dirtree
00709 unsigned DirTree::size()
00710 {
00711   return entryCount() * 128;
00712 }
00713 
00714 void DirTree::save( unsigned char* buffer )
00715 {
00716   memset( buffer, 0, size() );
00717   
00718   // root is fixed as "Root Entry"
00719   DirEntry* root = entry( 0 );
00720   std::string name = "Root Entry";
00721   for( unsigned j = 0; j < name.length(); j++ )
00722     buffer[ j*2 ] = name[j];
00723   writeU16( buffer + 0x40, name.length()*2 + 2 );    
00724   writeU32( buffer + 0x74, 0xffffffff );
00725   writeU32( buffer + 0x78, 0 );
00726   writeU32( buffer + 0x44, 0xffffffff );
00727   writeU32( buffer + 0x48, 0xffffffff );
00728   writeU32( buffer + 0x4c, root->child );
00729   buffer[ 0x42 ] = 5;
00730   buffer[ 0x43 ] = 1; 
00731 
00732   for( unsigned i = 1; i < entryCount(); i++ )
00733   {
00734     DirEntry* e = entry( i );
00735     if( !e ) continue;
00736     if( e->dir )
00737     {
00738       e->start = 0xffffffff;
00739       e->size = 0;
00740     }
00741     
00742     // max length for name is 32 chars
00743     std::string name = e->name;
00744     if( name.length() > 32 )
00745       name.erase( 32, name.length() );
00746       
00747     // write name as Unicode 16-bit
00748     for( unsigned j = 0; j < name.length(); j++ )
00749       buffer[ i*128 + j*2 ] = name[j];
00750 
00751     writeU16( buffer + i*128 + 0x40, name.length()*2 + 2 );    
00752     writeU32( buffer + i*128 + 0x74, e->start );
00753     writeU32( buffer + i*128 + 0x78, e->size );
00754     writeU32( buffer + i*128 + 0x44, e->prev );
00755     writeU32( buffer + i*128 + 0x48, e->next );
00756     writeU32( buffer + i*128 + 0x4c, e->child );
00757     buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
00758     buffer[ i*128 + 0x43 ] = 1; // always black
00759   }  
00760 }
00761 
00762 void DirTree::debug()
00763 {
00764   for( unsigned i = 0; i < entryCount(); i++ )
00765   {
00766     DirEntry* e = entry( i );
00767     if( !e ) continue;
00768     std::cout << i << ": ";
00769     if( !e->valid ) std::cout << "INVALID ";
00770     std::cout << e->name << " ";
00771     if( e->dir ) std::cout << "(Dir) ";
00772     else std::cout << "(File) ";
00773     std::cout << e->size << " ";
00774     std::cout << "s:" << e->start << " ";
00775     std::cout << "(";
00776     if( e->child == End ) std::cout << "-"; else std::cout << e->child;
00777     std::cout << " ";
00778     if( e->prev == End ) std::cout << "-"; else std::cout << e->prev;
00779     std::cout << ":";
00780     if( e->next == End ) std::cout << "-"; else std::cout << e->next;
00781     std::cout << ")";    
00782     std::cout << std::endl;
00783   }
00784 }
00785 
00786 // =========== StorageIO ==========
00787 
00788 StorageIO::StorageIO( Storage* st, const char* fname )
00789 {
00790   storage = st;
00791   filename = fname;
00792   result = Storage::Ok;
00793   opened = false;
00794   
00795   header = new Header();
00796   dirtree = new DirTree();
00797   bbat = new AllocTable();
00798   sbat = new AllocTable();
00799   
00800   filesize = 0;
00801   bbat->blockSize = 1 << header->b_shift;
00802   sbat->blockSize = 1 << header->s_shift;
00803 }
00804 
00805 StorageIO::~StorageIO()
00806 {
00807   if( opened ) close();
00808   delete sbat;
00809   delete bbat;
00810   delete dirtree;
00811   delete header;
00812 }
00813 
00814 bool StorageIO::open()
00815 {
00816   // already opened ? close first
00817   if( opened ) close();
00818   
00819   load();
00820   
00821   return result == Storage::Ok;
00822 }
00823 
00824 void StorageIO::load()
00825 {
00826   unsigned char* buffer = 0;
00827   unsigned long buflen = 0;
00828   std::vector<unsigned long> blocks;
00829   
00830   // open the file, check for error
00831   result = Storage::OpenFailed;
00832   file.open( filename.c_str(), std::ios::binary | std::ios::in );
00833   if( !file.good() ) return;
00834   
00835   // find size of input file
00836   file.seekg( 0, std::ios::end );
00837   filesize = file.tellg();
00838 
00839   // load header
00840   buffer = new unsigned char[512];
00841   file.seekg( 0 ); 
00842   file.read( (char*)buffer, 512 );
00843   header->load( buffer );
00844   delete[] buffer;
00845 
00846   // check OLE magic id
00847   result = Storage::NotOLE;
00848   for( unsigned i=0; i<8; i++ )
00849     if( header->id[i] != pole_magic[i] )
00850       return;
00851   
00852   // sanity checks
00853   result = Storage::BadOLE;
00854   if( !header->valid() ) return;
00855   if( header->threshold != 4096 ) return;
00856 
00857   // important block size
00858   bbat->blockSize = 1 << header->b_shift;
00859   sbat->blockSize = 1 << header->s_shift;
00860   
00861   // find blocks allocated to store big bat
00862   // the first 109 blocks are in header, the rest in meta bat
00863   blocks.clear();
00864   blocks.resize( header->num_bat );
00865   for( unsigned i = 0; i < 109; i++ )
00866     if( i >= header->num_bat ) break;
00867     else blocks[i] = header->bb_blocks[i];
00868   if( (header->num_bat > 109) && (header->num_mbat > 0) )
00869   {
00870     unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
00871     unsigned k = 109;
00872     for( unsigned r = 0; r < header->num_mbat; r++ )
00873     {
00874       loadBigBlock( header->mbat_start+r, buffer2, bbat->blockSize );
00875       for( unsigned s=0; s < bbat->blockSize; s+=4 )
00876       {
00877         if( k >= header->num_bat ) break;
00878         else  blocks[k++] = readU32( buffer2 + s );
00879       }  
00880      }    
00881     delete[] buffer2;
00882   }
00883 
00884   // load big bat
00885   buflen = blocks.size()*bbat->blockSize;
00886   if( buflen > 0 )
00887   {
00888     buffer = new unsigned char[ buflen ];  
00889     loadBigBlocks( blocks, buffer, buflen );
00890     bbat->load( buffer, buflen );
00891     delete[] buffer;
00892   }  
00893 
00894   // load small bat
00895   blocks.clear();
00896   blocks = bbat->follow( header->sbat_start );
00897   buflen = blocks.size()*bbat->blockSize;
00898   if( buflen > 0 )
00899   {
00900     buffer = new unsigned char[ buflen ];  
00901     loadBigBlocks( blocks, buffer, buflen );
00902     sbat->load( buffer, buflen );
00903     delete[] buffer;
00904   }  
00905   
00906   // load directory tree
00907   blocks.clear();
00908   blocks = bbat->follow( header->dirent_start );
00909   buflen = blocks.size()*bbat->blockSize;
00910   buffer = new unsigned char[ buflen ];  
00911   loadBigBlocks( blocks, buffer, buflen );
00912   dirtree->load( buffer, buflen );
00913   unsigned sb_start = readU32( buffer + 0x74 );
00914   delete[] buffer;
00915   
00916   // fetch block chain as data for small-files
00917   sb_blocks = bbat->follow( sb_start ); // small files
00918   
00919   // for troubleshooting, just enable this block
00920 #if 0
00921   header->debug();
00922   sbat->debug();
00923   bbat->debug();
00924   dirtree->debug();
00925 #endif
00926   
00927   // so far so good
00928   result = Storage::Ok;
00929   opened = true;
00930 }
00931 
00932 void StorageIO::create()
00933 {
00934   // std::cout << "Creating " << filename << std::endl; 
00935   
00936   file.open( filename.c_str(), std::ios::out|std::ios::binary );
00937   if( !file.good() )
00938   {
00939     std::cerr << "Can't create " << filename << std::endl;
00940     result = Storage::OpenFailed;
00941     return;
00942   }
00943   
00944   // so far so good
00945   opened = true;
00946   result = Storage::Ok;
00947 }
00948 
00949 void StorageIO::flush()
00950 {
00951   /* Note on Microsoft implementation:
00952      - directory entries are stored in the last block(s)
00953      - BATs are as second to the last
00954      - Meta BATs are third to the last  
00955   */
00956 }
00957 
00958 void StorageIO::close()
00959 {
00960   if( !opened ) return;
00961   
00962   file.close(); 
00963   opened = false;
00964   
00965   std::list<Stream*>::iterator it;
00966   for( it = streams.begin(); it != streams.end(); ++it )
00967     delete *it;
00968 }
00969 
00970 StreamIO* StorageIO::streamIO( const std::string& name )
00971 {
00972   // sanity check
00973   if( !name.length() ) return (StreamIO*)0;
00974 
00975   // search in the entries
00976   DirEntry* entry = dirtree->entry( name );
00977   //if( entry) std::cout << "FOUND\n";
00978   if( !entry ) return (StreamIO*)0;
00979   //if( !entry->dir ) std::cout << "  NOT DIR\n";
00980   if( entry->dir ) return (StreamIO*)0;
00981 
00982   StreamIO* result = new StreamIO( this, entry );
00983   result->fullName = name;
00984   
00985   return result;
00986 }
00987 
00988 unsigned long StorageIO::loadBigBlocks( std::vector<unsigned long> blocks,
00989   unsigned char* data, unsigned long maxlen )
00990 {
00991   // sentinel
00992   if( !data ) return 0;
00993   if( !file.good() ) return 0;
00994   if( blocks.size() < 1 ) return 0;
00995   if( maxlen == 0 ) return 0;
00996 
00997   // read block one by one, seems fast enough
00998   unsigned long bytes = 0;
00999   for( unsigned long i=0; (i < blocks.size() ) & ( bytes<maxlen ); i++ )
01000   {
01001     unsigned long block = blocks[i];
01002     unsigned long pos =  bbat->blockSize * ( block+1 );
01003     unsigned long p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
01004     if( pos + p > filesize ) p = filesize - pos;
01005     file.seekg( pos );
01006     file.read( (char*)data + bytes, p );
01007     bytes += p;
01008   }
01009 
01010   return bytes;
01011 }
01012 
01013 unsigned long StorageIO::loadBigBlock( unsigned long block,
01014   unsigned char* data, unsigned long maxlen )
01015 {
01016   // sentinel
01017   if( !data ) return 0;
01018   if( !file.good() ) return 0;
01019   
01020   // wraps call for loadBigBlocks
01021   std::vector<unsigned long> blocks;
01022   blocks.resize( 1 );
01023   blocks[ 0 ] = block;
01024   
01025   return loadBigBlocks( blocks, data, maxlen );
01026 }
01027 
01028 // return number of bytes which has been read
01029 unsigned long StorageIO::loadSmallBlocks( std::vector<unsigned long> blocks,
01030   unsigned char* data, unsigned long maxlen )
01031 {
01032   // sentinel
01033   if( !data ) return 0;
01034   if( !file.good() ) return 0;
01035   if( blocks.size() < 1 ) return 0;
01036   if( maxlen == 0 ) return 0;
01037 
01038   // our own local buffer
01039   unsigned char* buf = new unsigned char[ bbat->blockSize ];
01040 
01041   // read small block one by one
01042   unsigned long bytes = 0;
01043   for( unsigned long i=0; ( i<blocks.size() ) & ( bytes<maxlen ); i++ )
01044   {
01045     unsigned long block = blocks[i];
01046 
01047     // find where the small-block exactly is
01048     unsigned long pos = block * sbat->blockSize;
01049     unsigned long bbindex = pos / bbat->blockSize;
01050     if( bbindex >= sb_blocks.size() ) break;
01051 
01052     loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
01053 
01054     // copy the data
01055     unsigned offset = pos % bbat->blockSize;
01056     unsigned long p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes :  bbat->blockSize-offset;
01057     p = (sbat->blockSize<p ) ? sbat->blockSize : p;
01058     memcpy( data + bytes, buf + offset, p );
01059     bytes += p;
01060   }
01061   
01062   delete[] buf;
01063 
01064   return bytes;
01065 }
01066 
01067 unsigned long StorageIO::loadSmallBlock( unsigned long block,
01068   unsigned char* data, unsigned long maxlen )
01069 {
01070   // sentinel
01071   if( !data ) return 0;
01072   if( !file.good() ) return 0;
01073 
01074   // wraps call for loadSmallBlocks
01075   std::vector<unsigned long> blocks;
01076   blocks.resize( 1 );
01077   blocks.assign( 1, block );
01078 
01079   return loadSmallBlocks( blocks, data, maxlen );
01080 }
01081 
01082 // =========== StreamIO ==========
01083 
01084 StreamIO::StreamIO( StorageIO* s, DirEntry* e)
01085 {
01086   io = s;
01087   entry = e;
01088   eof = false;
01089   fail = false;
01090   
01091   m_pos = 0;
01092 
01093   if( entry->size >= io->header->threshold ) 
01094     blocks = io->bbat->follow( entry->start );
01095   else
01096     blocks = io->sbat->follow( entry->start );
01097 
01098   // prepare cache
01099   cache_pos = 0;
01100   cache_size = 4096; // optimal ?
01101   cache_data = new unsigned char[cache_size];
01102   updateCache();
01103 }
01104 
01105 // FIXME tell parent we're gone
01106 StreamIO::~StreamIO()
01107 {
01108   delete[] cache_data;  
01109 }
01110 
01111 void StreamIO::seek( unsigned long pos )
01112 {
01113   m_pos = pos;
01114 }
01115 
01116 unsigned long StreamIO::tell()
01117 {
01118   return m_pos;
01119 }
01120 
01121 int StreamIO::getch()
01122 {
01123   // past end-of-file ?
01124   if( m_pos > entry->size ) return -1;
01125 
01126   // need to update cache ?
01127   if( !cache_size || ( m_pos < cache_pos ) ||
01128     ( m_pos >= cache_pos + cache_size ) )
01129       updateCache();
01130 
01131   // something bad if we don't get good cache
01132   if( !cache_size ) return -1;
01133 
01134   int data = cache_data[m_pos - cache_pos];
01135   m_pos++;
01136 
01137   return data;
01138 }
01139 
01140 unsigned long StreamIO::read( unsigned long pos, unsigned char* data, unsigned long maxlen )
01141 {
01142   // sanity checks
01143   if( !data ) return 0;
01144   if( maxlen == 0 ) return 0;
01145 
01146   unsigned long totalbytes = 0;
01147   
01148   if ( entry->size < io->header->threshold )
01149   {
01150     // small file
01151     unsigned long index = pos / io->sbat->blockSize;
01152 
01153     if( index >= blocks.size() ) return 0;
01154 
01155     unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
01156     unsigned long offset = pos % io->sbat->blockSize;
01157     while( totalbytes < maxlen )
01158     {
01159       if( index >= blocks.size() ) break;
01160       io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
01161       unsigned long count = io->sbat->blockSize - offset;
01162       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01163       memcpy( data+totalbytes, buf + offset, count );
01164       totalbytes += count;
01165       offset = 0;
01166       index++;
01167     }
01168     delete[] buf;
01169 
01170   }
01171   else
01172   {
01173     // big file
01174     unsigned long index = pos / io->bbat->blockSize;
01175     
01176     if( index >= blocks.size() ) return 0;
01177     
01178     unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
01179     unsigned long offset = pos % io->bbat->blockSize;
01180     while( totalbytes < maxlen )
01181     {
01182       if( index >= blocks.size() ) break;
01183       io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
01184       unsigned long count = io->bbat->blockSize - offset;
01185       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01186       memcpy( data+totalbytes, buf + offset, count );
01187       totalbytes += count;
01188       index++;
01189       offset = 0;
01190     }
01191     delete [] buf;
01192 
01193   }
01194 
01195   return totalbytes;
01196 }
01197 
01198 unsigned long StreamIO::read( unsigned char* data, unsigned long maxlen )
01199 {
01200   unsigned long bytes = read( tell(), data, maxlen );
01201   m_pos += bytes;
01202   return bytes;
01203 }
01204 
01205 void StreamIO::updateCache()
01206 {
01207   // sanity check
01208   if( !cache_data ) return;
01209 
01210   cache_pos = m_pos - ( m_pos % cache_size );
01211   unsigned long bytes = cache_size;
01212   if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
01213   cache_size = read( cache_pos, cache_data, bytes );
01214 }
01215 
01216 
01217 // =========== Storage ==========
01218 
01219 Storage::Storage( const char* filename )
01220 {
01221   io = new StorageIO( this, filename );
01222 }
01223 
01224 Storage::~Storage()
01225 {
01226   delete io;
01227 }
01228 
01229 int Storage::result()
01230 {
01231   return io->result;
01232 }
01233 
01234 bool Storage::open()
01235 {
01236   return io->open();
01237 }
01238 
01239 void Storage::close()
01240 {
01241   io->close();
01242 }
01243 
01244 std::list<std::string> Storage::entries( const std::string& path )
01245 {
01246   std::list<std::string> result;
01247   DirTree* dt = io->dirtree;
01248   DirEntry* e = dt->entry( path, false );
01249   if( e  && e->dir )
01250   {
01251     unsigned parent = dt->indexOf( e );
01252     std::vector<unsigned> children = dt->children( parent );
01253     for( unsigned i = 0; i < children.size(); i++ )
01254       result.push_back( dt->entry( children[i] )->name );
01255   }
01256   
01257   return result;
01258 }
01259 
01260 bool Storage::isDirectory( const std::string& name )
01261 {
01262   DirEntry* e = io->dirtree->entry( name, false );
01263   return e ? e->dir : false;
01264 }
01265 
01266 // =========== Stream ==========
01267 
01268 Stream::Stream( Storage* storage, const std::string& name )
01269 {
01270   io = storage->io->streamIO( name );
01271 }
01272 
01273 // FIXME tell parent we're gone
01274 Stream::~Stream()
01275 {
01276   delete io;
01277 }
01278 
01279 std::string Stream::fullName()
01280 {
01281   return io ? io->fullName : std::string();
01282 }
01283 
01284 unsigned long Stream::tell()
01285 {
01286   return io ? io->tell() : 0;
01287 }
01288 
01289 void Stream::seek( unsigned long newpos )
01290 {
01291   if( io ) io->seek( newpos );
01292 }
01293 
01294 unsigned long Stream::size()
01295 {
01296   return io ? io->entry->size : 0;
01297 }
01298 
01299 int Stream::getch()
01300 {
01301   return io ? io->getch() : 0;
01302 }
01303 
01304 unsigned long Stream::read( unsigned char* data, unsigned long maxlen )
01305 {
01306   return io ? io->read( data, maxlen ) : 0;
01307 }
01308 
01309 bool Stream::eof()
01310 {
01311   return io ? io->eof : false;
01312 }
01313 
01314 bool Stream::fail()
01315 {
01316   return io ? io->fail : true;
01317 }
KDE Home | KDE Accessibility Home | Description of Access Keys