filters

XRef.cc

00001 //========================================================================
00002 //
00003 // XRef.cc
00004 //
00005 // Copyright 1996-2002 Glyph & Cog, LLC
00006 //
00007 //========================================================================
00008 
00009 #include <aconf.h>
00010 
00011 #ifdef USE_GCC_PRAGMAS
00012 #pragma implementation
00013 #endif
00014 
00015 #include <limits.h>
00016 #include <stdlib.h>
00017 #include <stddef.h>
00018 #include <string.h>
00019 #include <ctype.h>
00020 #include "gmem.h"
00021 #include "Object.h"
00022 #include "Stream.h"
00023 #include "Lexer.h"
00024 #include "Parser.h"
00025 #include "Dict.h"
00026 #ifndef NO_DECRYPTION
00027 #include "Decrypt.h"
00028 #endif
00029 #include "Error.h"
00030 #include "ErrorCodes.h"
00031 #include "XRef.h"
00032 
00033 //------------------------------------------------------------------------
00034 
00035 #define xrefSearchSize 1024 // read this many bytes at end of file
00036                 //   to look for 'startxref'
00037 
00038 #ifndef NO_DECRYPTION
00039 //------------------------------------------------------------------------
00040 // Permission bits
00041 //------------------------------------------------------------------------
00042 
00043 #define permPrint    (1<<2)
00044 #define permChange   (1<<3)
00045 #define permCopy     (1<<4)
00046 #define permNotes    (1<<5)
00047 #define defPermFlags 0xfffc
00048 #endif
00049 
00050 //------------------------------------------------------------------------
00051 // XRef
00052 //------------------------------------------------------------------------
00053 
00054 XRef::XRef(BaseStream *strA, GString *ownerPassword, GString *userPassword) {
00055   Guint pos;
00056   int i;
00057 
00058   ok = gTrue;
00059   errCode = errNone;
00060   size = 0;
00061   entries = NULL;
00062   streamEnds = NULL;
00063   streamEndsLen = 0;
00064 
00065   // read the trailer
00066   str = strA;
00067   start = str->getStart();
00068   pos = readTrailer();
00069 
00070   // if there was a problem with the trailer,
00071   // try to reconstruct the xref table
00072   if (pos == 0) {
00073     if (!(ok = constructXRef())) {
00074       errCode = errDamaged;
00075       return;
00076     }
00077 
00078   // trailer is ok - read the xref table
00079   } else {
00080     if ((unsigned) size >= INT_MAX / sizeof(XRefEntry)) {
00081       error(-1, "Invalid 'size' inside xref table.");
00082       ok = gFalse;
00083       errCode = errDamaged;
00084       return;
00085     }
00086     entries = (XRefEntry *)gmalloc(size * sizeof(XRefEntry));
00087     for (i = 0; i < size; ++i) {
00088       entries[i].offset = 0xffffffff;
00089       entries[i].used = gFalse;
00090     }
00091     while (readXRef(&pos)) ;
00092 
00093     // if there was a problem with the xref table,
00094     // try to reconstruct it
00095     if (!ok) {
00096       gfree(entries);
00097       size = 0;
00098       entries = NULL;
00099       if (!(ok = constructXRef())) {
00100     errCode = errDamaged;
00101     return;
00102       }
00103     }
00104   }
00105 
00106   // now set the trailer dictionary's xref pointer so we can fetch
00107   // indirect objects from it
00108   trailerDict.getDict()->setXRef(this);
00109 
00110   // check for encryption
00111 #ifndef NO_DECRYPTION
00112   encrypted = gFalse;
00113 #endif
00114   if (checkEncrypted(ownerPassword, userPassword)) {
00115     ok = gFalse;
00116     errCode = errEncrypted;
00117     return;
00118   }
00119 }
00120 
00121 XRef::~XRef() {
00122   gfree(entries);
00123   trailerDict.free();
00124   if (streamEnds) {
00125     gfree(streamEnds);
00126   }
00127 }
00128 
00129 // Read startxref position, xref table size, and root.  Returns
00130 // first xref position.
00131 Guint XRef::readTrailer() {
00132   Parser *parser;
00133   Object obj;
00134   char buf[xrefSearchSize+1];
00135   int n;
00136   Guint pos, pos1;
00137   char *p;
00138   int c;
00139   int i;
00140 
00141   // read last xrefSearchSize bytes
00142   str->setPos(xrefSearchSize, -1);
00143   for (n = 0; n < xrefSearchSize; ++n) {
00144     if ((c = str->getChar()) == EOF)
00145       break;
00146     buf[n] = c;
00147   }
00148   buf[n] = '\0';
00149 
00150   // find startxref
00151   for (i = n - 9; i >= 0; --i) {
00152     if (!strncmp(&buf[i], "startxref", 9))
00153       break;
00154   }
00155   if (i < 0)
00156     return 0;
00157   for (p = &buf[i+9]; isspace(*p); ++p) ;
00158   pos = lastXRefPos = strToUnsigned(p);
00159 
00160   // find trailer dict by looking after first xref table
00161   // (NB: we can't just use the trailer dict at the end of the file --
00162   // this won't work for linearized files.)
00163   str->setPos(start + pos);
00164   for (i = 0; i < 4; ++i)
00165     buf[i] = str->getChar();
00166   if (strncmp(buf, "xref", 4))
00167     return 0;
00168   pos1 = pos + 4;
00169   while (1) {
00170     str->setPos(start + pos1);
00171     for (i = 0; i < 35; ++i) {
00172       if ((c = str->getChar()) == EOF)
00173     return 0;
00174       buf[i] = c;
00175     }
00176     if (!strncmp(buf, "trailer", 7))
00177       break;
00178     p = buf;
00179     while (isspace(*p)) ++p;
00180     while ('0' <= *p && *p <= '9') ++p;
00181     while (isspace(*p)) ++p;
00182     n = atoi(p);
00183     while ('0' <= *p && *p <= '9') ++p;
00184     while (isspace(*p)) ++p;
00185     if (p == buf)
00186       return 0;
00187     pos1 += (p - buf) + n * 20;
00188   }
00189   pos1 += 7;
00190 
00191   // read trailer dict
00192   obj.initNull();
00193   parser = new Parser(NULL,
00194          new Lexer(NULL,
00195            str->makeSubStream(start + pos1, gFalse, 0, &obj)));
00196   parser->getObj(&trailerDict);
00197   if (trailerDict.isDict()) {
00198     trailerDict.dictLookupNF("Size", &obj);
00199     if (obj.isInt())
00200       size = obj.getInt();
00201     else
00202       pos = 0;
00203     obj.free();
00204     trailerDict.dictLookupNF("Root", &obj);
00205     if (obj.isRef()) {
00206       rootNum = obj.getRefNum();
00207       rootGen = obj.getRefGen();
00208     } else {
00209       pos = 0;
00210     }
00211     obj.free();
00212   } else {
00213     pos = 0;
00214   }
00215   delete parser;
00216 
00217   // return first xref position
00218   return pos;
00219 }
00220 
00221 // Read an xref table and the prev pointer from the trailer.
00222 GBool XRef::readXRef(Guint *pos) {
00223   Parser *parser;
00224   Object obj, obj2;
00225   char s[20];
00226   GBool more;
00227   int first, newSize, n, i, j;
00228   int c;
00229 
00230   // seek to xref in stream
00231   str->setPos(start + *pos);
00232 
00233   // make sure it's an xref table
00234   while ((c = str->getChar()) != EOF && isspace(c)) ;
00235   s[0] = (char)c;
00236   s[1] = (char)str->getChar();
00237   s[2] = (char)str->getChar();
00238   s[3] = (char)str->getChar();
00239   if (!(s[0] == 'x' && s[1] == 'r' && s[2] == 'e' && s[3] == 'f')) {
00240     goto err2;
00241   }
00242 
00243   // read xref
00244   while (1) {
00245     while ((c = str->lookChar()) != EOF && isspace(c)) {
00246       str->getChar();
00247     }
00248     if (c == 't') {
00249       break;
00250     }
00251     for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i) {
00252       s[i] = (char)c;
00253     }
00254     if (i == 0) {
00255       goto err2;
00256     }
00257     s[i] = '\0';
00258     first = atoi(s);
00259     while ((c = str->lookChar()) != EOF && isspace(c)) {
00260       str->getChar();
00261     }
00262     for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i) {
00263       s[i] = (char)c;
00264     }
00265     if (i == 0) {
00266       goto err2;
00267     }
00268     s[i] = '\0';
00269     n = atoi(s);
00270     while ((c = str->lookChar()) != EOF && isspace(c)) {
00271       str->getChar();
00272     }
00273     // check for buggy PDF files with an incorrect (too small) xref
00274     // table size
00275     if (first + n > size) {
00276       newSize = size + 256;
00277       if ((unsigned) newSize >= INT_MAX / sizeof(XRefEntry)) {
00278         error(-1, "Invalid 'newSize'");
00279         goto err2;
00280       }
00281       entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
00282       for (i = size; i < newSize; ++i) {
00283     entries[i].offset = 0xffffffff;
00284     entries[i].used = gFalse;
00285       }
00286       size = newSize;
00287     }
00288     for (i = first; i < first + n; ++i) {
00289       for (j = 0; j < 20; ++j) {
00290     if ((c = str->getChar()) == EOF) {
00291       goto err2;
00292     }
00293     s[j] = (char)c;
00294       }
00295       if (entries[i].offset == 0xffffffff) {
00296     s[10] = '\0';
00297     entries[i].offset = strToUnsigned(s);
00298     s[16] = '\0';
00299     entries[i].gen = atoi(&s[11]);
00300     if (s[17] == 'n') {
00301       entries[i].used = gTrue;
00302     } else if (s[17] == 'f') {
00303       entries[i].used = gFalse;
00304     } else {
00305       goto err2;
00306     }
00307     // PDF files of patents from the IBM Intellectual Property
00308     // Network have a bug: the xref table claims to start at 1
00309     // instead of 0.
00310     if (i == 1 && first == 1 &&
00311         entries[1].offset == 0 && entries[1].gen == 65535 &&
00312         !entries[1].used) {
00313       i = first = 0;
00314       entries[0] = entries[1];
00315       entries[1].offset = 0xffffffff;
00316     }
00317       }
00318     }
00319   }
00320 
00321   // read prev pointer from trailer dictionary
00322   obj.initNull();
00323   parser = new Parser(NULL,
00324          new Lexer(NULL,
00325            str->makeSubStream(str->getPos(), gFalse, 0, &obj)));
00326   parser->getObj(&obj);
00327   if (!obj.isCmd("trailer")) {
00328     goto err1;
00329   }
00330   obj.free();
00331   parser->getObj(&obj);
00332   if (!obj.isDict()) {
00333     goto err1;
00334   }
00335   obj.getDict()->lookupNF("Prev", &obj2);
00336   if (obj2.isInt()) {
00337     *pos = (Guint)obj2.getInt();
00338     more = gTrue;
00339   } else {
00340     more = gFalse;
00341   }
00342   obj.free();
00343   obj2.free();
00344 
00345   delete parser;
00346   return more;
00347 
00348  err1:
00349   obj.free();
00350  err2:
00351   ok = gFalse;
00352   return gFalse;
00353 }
00354 
00355 // Attempt to construct an xref table for a damaged file.
00356 GBool XRef::constructXRef() {
00357   Parser *parser;
00358   Object obj;
00359   char buf[256];
00360   Guint pos;
00361   int num, gen;
00362   int newSize;
00363   int streamEndsSize;
00364   char *p;
00365   int i;
00366   GBool gotRoot;
00367 
00368   error(0, "PDF file is damaged - attempting to reconstruct xref table...");
00369   gotRoot = gFalse;
00370   streamEndsLen = streamEndsSize = 0;
00371 
00372   str->reset();
00373   while (1) {
00374     pos = str->getPos();
00375     if (!str->getLine(buf, 256)) {
00376       break;
00377     }
00378     p = buf;
00379 
00380     // got trailer dictionary
00381     if (!strncmp(p, "trailer", 7)) {
00382       obj.initNull();
00383       parser = new Parser(NULL,
00384          new Lexer(NULL,
00385            str->makeSubStream(start + pos + 7, gFalse, 0, &obj)));
00386       if (!trailerDict.isNone())
00387     trailerDict.free();
00388       parser->getObj(&trailerDict);
00389       if (trailerDict.isDict()) {
00390     trailerDict.dictLookupNF("Root", &obj);
00391     if (obj.isRef()) {
00392       rootNum = obj.getRefNum();
00393       rootGen = obj.getRefGen();
00394       gotRoot = gTrue;
00395     }
00396     obj.free();
00397       } else {
00398     pos = 0;
00399       }
00400       delete parser;
00401 
00402     // look for object
00403     } else if (isdigit(*p)) {
00404       num = atoi(p);
00405       do {
00406     ++p;
00407       } while (*p && isdigit(*p));
00408       if (isspace(*p)) {
00409     do {
00410       ++p;
00411     } while (*p && isspace(*p));
00412     if (isdigit(*p)) {
00413       gen = atoi(p);
00414       do {
00415         ++p;
00416       } while (*p && isdigit(*p));
00417       if (isspace(*p)) {
00418         do {
00419           ++p;
00420         } while (*p && isspace(*p));
00421         if (!strncmp(p, "obj", 3)) {
00422           if (num >= size) {
00423         newSize = (num + 1 + 255) & ~255;
00424             if ((unsigned) newSize >= INT_MAX / sizeof(XRefEntry)) {
00425               error(-1, "Invalid 'obj' parameters.");
00426               return gFalse;
00427             }
00428         entries = (XRefEntry *)
00429                     grealloc(entries, newSize * sizeof(XRefEntry));
00430         for (i = size; i < newSize; ++i) {
00431           entries[i].offset = 0xffffffff;
00432           entries[i].used = gFalse;
00433         }
00434         size = newSize;
00435           }
00436           if (!entries[num].used || gen >= entries[num].gen) {
00437         entries[num].offset = pos - start;
00438         entries[num].gen = gen;
00439         entries[num].used = gTrue;
00440           }
00441         }
00442       }
00443     }
00444       }
00445 
00446     } else if (!strncmp(p, "endstream", 9)) {
00447       if (streamEndsLen == streamEndsSize) {
00448     streamEndsSize += 64;
00449         if ((unsigned) streamEndsSize >= INT_MAX / sizeof(int)) {
00450           error(-1, "Invalid 'endstream' parameter.");
00451           return gFalse;
00452         }
00453 
00454     streamEnds = (Guint *)grealloc(streamEnds,
00455                        streamEndsSize * sizeof(int));
00456       }
00457       streamEnds[streamEndsLen++] = pos;
00458     }
00459   }
00460 
00461   if (gotRoot)
00462     return gTrue;
00463 
00464   error(-1, "Couldn't find trailer dictionary");
00465   return gFalse;
00466 }
00467 
00468 #ifndef NO_DECRYPTION
00469 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
00470   Object encrypt, filterObj, versionObj, revisionObj, lengthObj;
00471   Object ownerKey, userKey, permissions, fileID, fileID1;
00472   GBool encrypted1;
00473   GBool ret;
00474 
00475   ret = gFalse;
00476 
00477   permFlags = defPermFlags;
00478   ownerPasswordOk = gFalse;
00479   trailerDict.dictLookup("Encrypt", &encrypt);
00480   if ((encrypted1 = encrypt.isDict())) {
00481     ret = gTrue;
00482     encrypt.dictLookup("Filter", &filterObj);
00483     if (filterObj.isName("Standard")) {
00484       encrypt.dictLookup("V", &versionObj);
00485       encrypt.dictLookup("R", &revisionObj);
00486       encrypt.dictLookup("Length", &lengthObj);
00487       encrypt.dictLookup("O", &ownerKey);
00488       encrypt.dictLookup("U", &userKey);
00489       encrypt.dictLookup("P", &permissions);
00490       trailerDict.dictLookup("ID", &fileID);
00491       if (versionObj.isInt() &&
00492       revisionObj.isInt() &&
00493       ownerKey.isString() && ownerKey.getString()->getLength() == 32 &&
00494       userKey.isString() && userKey.getString()->getLength() == 32 &&
00495       permissions.isInt() &&
00496       fileID.isArray()) {
00497     encVersion = versionObj.getInt();
00498     encRevision = revisionObj.getInt();
00499     if (lengthObj.isInt()) {
00500       keyLength = lengthObj.getInt() / 8;
00501     } else {
00502       keyLength = 5;
00503     }
00504     if (keyLength < 1) {
00505       keyLength = 1;
00506     }
00507     if (keyLength > 16) {
00508       keyLength = 16;
00509     }
00510     permFlags = permissions.getInt();
00511     if (encVersion >= 1 && encVersion <= 2 &&
00512         encRevision >= 2 && encRevision <= 3) {
00513       fileID.arrayGet(0, &fileID1);
00514       if (fileID1.isString()) {
00515         if (Decrypt::makeFileKey(encVersion, encRevision, keyLength,
00516                      ownerKey.getString(), userKey.getString(),
00517                      permFlags, fileID1.getString(),
00518                      ownerPassword, userPassword, fileKey,
00519                      &ownerPasswordOk)) {
00520           if (ownerPassword && !ownerPasswordOk) {
00521         error(-1, "Incorrect owner password");
00522           }
00523           ret = gFalse;
00524         } else {
00525           error(-1, "Incorrect password");
00526         }
00527       } else {
00528         error(-1, "Weird encryption info");
00529       }
00530       fileID1.free();
00531     } else {
00532       error(-1, "Unsupported version/revision (%d/%d) of Standard security handler",
00533         encVersion, encRevision);
00534     }
00535       } else {
00536     error(-1, "Weird encryption info");
00537       }
00538       fileID.free();
00539       permissions.free();
00540       userKey.free();
00541       ownerKey.free();
00542       lengthObj.free();
00543       revisionObj.free();
00544       versionObj.free();
00545     } else {
00546       error(-1, "Unknown security handler '%s'",
00547         filterObj.isName() ? filterObj.getName() : "???");
00548     }
00549     filterObj.free();
00550   }
00551   encrypt.free();
00552 
00553   // this flag has to be set *after* we read the O/U/P strings
00554   encrypted = encrypted1;
00555 
00556   return ret;
00557 }
00558 #else
00559 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
00560   Object obj;
00561   GBool encrypted;
00562 
00563   trailerDict.dictLookup("Encrypt", &obj);
00564   if ((encrypted = !obj.isNull())) {
00565     error(-1, "PDF file is encrypted and this version of the Xpdf tools");
00566     error(-1, "was built without decryption support.");
00567   }
00568   obj.free();
00569   return encrypted;
00570 }
00571 #endif
00572 
00573 GBool XRef::okToPrint(GBool ignoreOwnerPW) {
00574 #ifndef NO_DECRYPTION
00575   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permPrint)) {
00576     return gFalse;
00577   }
00578 #endif
00579   return gTrue;
00580 }
00581 
00582 GBool XRef::okToChange(GBool ignoreOwnerPW) {
00583 #ifndef NO_DECRYPTION
00584   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permChange)) {
00585     return gFalse;
00586   }
00587 #endif
00588   return gTrue;
00589 }
00590 
00591 GBool XRef::okToCopy(GBool ignoreOwnerPW) {
00592 #ifndef NO_DECRYPTION
00593   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permCopy)) {
00594     return gFalse;
00595   }
00596 #endif
00597   return gTrue;
00598 }
00599 
00600 GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
00601 #ifndef NO_DECRYPTION
00602   if ((ignoreOwnerPW || !ownerPasswordOk) && !(permFlags & permNotes)) {
00603     return gFalse;
00604   }
00605 #endif
00606   return gTrue;
00607 }
00608 
00609 Object *XRef::fetch(int num, int gen, Object *obj) {
00610   XRefEntry *e;
00611   Parser *parser;
00612   Object obj1, obj2, obj3;
00613 
00614   // check for bogus ref - this can happen in corrupted PDF files
00615   if (num < 0 || num >= size) {
00616     obj->initNull();
00617     return obj;
00618   }
00619 
00620   e = &entries[num];
00621   if (e->gen == gen && e->offset != 0xffffffff) {
00622     obj1.initNull();
00623     parser = new Parser(this,
00624            new Lexer(this,
00625          str->makeSubStream(start + e->offset, gFalse, 0, &obj1)));
00626     parser->getObj(&obj1);
00627     parser->getObj(&obj2);
00628     parser->getObj(&obj3);
00629     if (obj1.isInt() && obj1.getInt() == num &&
00630     obj2.isInt() && obj2.getInt() == gen &&
00631     obj3.isCmd("obj")) {
00632 #ifndef NO_DECRYPTION
00633       parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, keyLength,
00634              num, gen);
00635 #else
00636       parser->getObj(obj);
00637 #endif
00638     } else {
00639       obj->initNull();
00640     }
00641     obj1.free();
00642     obj2.free();
00643     obj3.free();
00644     delete parser;
00645   } else {
00646     obj->initNull();
00647   }
00648   return obj;
00649 }
00650 
00651 Object *XRef::getDocInfo(Object *obj) {
00652   return trailerDict.dictLookup("Info", obj);
00653 }
00654 
00655 // Added for the pdftex project.
00656 Object *XRef::getDocInfoNF(Object *obj) {
00657   return trailerDict.dictLookupNF("Info", obj);
00658 }
00659 
00660 GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
00661   int a, b, m;
00662 
00663   if (streamEndsLen == 0 ||
00664       streamStart > streamEnds[streamEndsLen - 1]) {
00665     return gFalse;
00666   }
00667 
00668   a = -1;
00669   b = streamEndsLen - 1;
00670   // invariant: streamEnds[a] < streamStart <= streamEnds[b]
00671   while (b - a > 1) {
00672     m = (a + b) / 2;
00673     if (streamStart <= streamEnds[m]) {
00674       b = m;
00675     } else {
00676       a = m;
00677     }
00678   }
00679   *streamEnd = streamEnds[b];
00680   return gTrue;
00681 }
00682 
00683 Guint XRef::strToUnsigned(char *s) {
00684   Guint x;
00685   char *p;
00686   int i;
00687 
00688   x = 0;
00689   for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
00690     x = 10 * x + (*p - '0');
00691   }
00692   return x;
00693 }
KDE Home | KDE Accessibility Home | Description of Access Keys