libzypp 8.13.6

MediaBlockList.cc

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                          ____ _   __ __ ___                          |
00003 |                         |__  / \ / / . \ . \                         |
00004 |                           / / \ V /|  _/  _/                         |
00005 |                          / /__ | | | | | |                           |
00006 |                         /_____||_| |_| |_|                           |
00007 |                                                                      |
00008 \---------------------------------------------------------------------*/
00013 #include <sys/types.h>
00014 #include <stdio.h>
00015 #include <stdlib.h>
00016 #include <string.h>
00017 #include <expat.h>
00018 
00019 #include <vector>
00020 #include <iostream>
00021 #include <fstream>
00022 
00023 #include "zypp/media/MediaBlockList.h"
00024 #include "zypp/base/Logger.h"
00025 #include "zypp/base/String.h"
00026 
00027 using namespace std;
00028 using namespace zypp::base;
00029 
00030 namespace zypp {
00031   namespace media {
00032 
00033 MediaBlockList::MediaBlockList(off_t size)
00034 {
00035   filesize = size;
00036   haveblocks = false;
00037   chksumlen = 0;
00038   chksumpad = 0;
00039   rsumlen = 0;
00040   rsumpad = 0;
00041 }
00042 
00043 size_t
00044 MediaBlockList::addBlock(off_t off, size_t size)
00045 {
00046   haveblocks = true;
00047   blocks.push_back(MediaBlock());
00048   blocks.back().off = off;
00049   blocks.back().size = size;
00050   return blocks.size() - 1;
00051 }
00052 
00053 void
00054 MediaBlockList::setFileChecksum(std::string ctype, int cl, unsigned char *c)
00055 {
00056   if (!cl)
00057     return;
00058   fsumtype = ctype;
00059   fsum.resize(cl);
00060   memcpy(&fsum[0], c, cl);
00061 }
00062 
00063 bool
00064 MediaBlockList::createFileDigest(Digest &digest) const
00065 {
00066   return digest.create(fsumtype);
00067 }
00068 
00069 bool
00070 MediaBlockList::verifyFileDigest(Digest &digest) const
00071 {
00072   if (!haveFileChecksum())
00073     return true;
00074   vector<unsigned char>dig = digest.digestVector();
00075   if (dig.empty() || dig.size() < fsum.size())
00076     return false;
00077   return memcmp(&dig[0], &fsum[0], fsum.size()) ? false : true;
00078 }
00079 
00080 void
00081 MediaBlockList::setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad)
00082 {
00083   if (!csl)
00084     return;
00085   if (!chksumlen)
00086     {
00087       if (blkno)
00088         return;
00089       chksumlen = csl;
00090       chksumtype = cstype;
00091       chksumpad = cspad;
00092     }
00093   if (csl != chksumlen || cstype != chksumtype || cspad != chksumpad || blkno != chksums.size() / chksumlen)
00094     return;
00095   chksums.resize(chksums.size() + csl);
00096   memcpy(&chksums[csl * blkno], cs, csl);
00097 }
00098 
00099 void
00100 MediaBlockList::setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad)
00101 {
00102   if (!rsl)
00103     return;
00104   if (!rsumlen)
00105     {
00106       if (blkno)
00107         return;
00108       rsumlen = rsl;
00109       rsumpad = rspad;
00110     }
00111   if (rsl != rsumlen || rspad != rsumpad || blkno != rsums.size())
00112     return;
00113   rsums.push_back(rs);
00114 }
00115 
00116 bool
00117 MediaBlockList::createDigest(Digest &digest) const
00118 {
00119   return digest.create(chksumtype);
00120 }
00121 
00122 bool
00123 MediaBlockList::verifyDigest(size_t blkno, Digest &digest) const
00124 {
00125   if (!haveChecksum(blkno))
00126     return true;
00127   size_t size = blocks[blkno].size;
00128   if (!size)
00129     return true;
00130   if (chksumpad > size)
00131     {
00132       char pad[chksumpad - size];
00133       memset(pad, 0, chksumpad - size);
00134       digest.update(pad, chksumpad - size);
00135     }
00136   vector<unsigned char>dig = digest.digestVector();
00137   if (dig.empty() || dig.size() < size_t(chksumlen))
00138     return false;
00139   return memcmp(&dig[0], &chksums[chksumlen * blkno], chksumlen) ? false : true;
00140 }
00141 
00142 unsigned int
00143 MediaBlockList::updateRsum(unsigned int rs, const char* bytes, size_t len) const
00144 {
00145   if (!len)
00146     return rs;
00147   unsigned short s, m;
00148   s = (rs >> 16) & 65535;
00149   m = rs & 65535;
00150   for (; len > 0 ; len--)
00151     {
00152       unsigned short c = (unsigned char)*bytes++;
00153       s += c;
00154       m += s;
00155     }
00156   return (s & 65535) << 16 | (m & 65535);
00157 }
00158 
00159 bool
00160 MediaBlockList::verifyRsum(size_t blkno, unsigned int rs) const
00161 {
00162   if (!haveRsum(blkno))
00163     return true;
00164   size_t size = blocks[blkno].size;
00165   if (!size)
00166     return true;
00167   if (rsumpad > size)
00168     {
00169       unsigned short s, m;
00170       s = (rs >> 16) & 65535;
00171       m = rs & 65535;
00172       m += s * (rsumpad - size);
00173       rs = (s & 65535) << 16 | (m & 65535);
00174     }
00175   switch(rsumlen)
00176     {
00177     case 3:
00178       rs &= 0xffffff;
00179     case 2:
00180       rs &= 0xffff;
00181     case 1:
00182       rs &= 0xff;
00183     default:
00184       break;
00185     }
00186   return rs == rsums[blkno];
00187 }
00188 
00189 bool
00190 MediaBlockList::checkRsum(size_t blkno, const unsigned char *buf, size_t bufl) const
00191 {
00192   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00193     return false;
00194   unsigned int rs = updateRsum(0, (const char *)buf, blocks[blkno].size);
00195   return verifyRsum(blkno, rs);
00196 }
00197 
00198 bool
00199 MediaBlockList::checkChecksum(size_t blkno, const unsigned char *buf, size_t bufl) const
00200 {
00201   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00202     return false;
00203   Digest dig;
00204   if (!createDigest(dig))
00205     return false;
00206   dig.update((const char *)buf, blocks[blkno].size);
00207   return verifyDigest(blkno, dig);
00208 }
00209 
00210 // specialized version of checkChecksum that can deal with a "rotated" buffer
00211 bool
00212 MediaBlockList::checkChecksumRotated(size_t blkno, const unsigned char *buf, size_t bufl, size_t start) const
00213 {
00214   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00215     return false;
00216   if (start == bufl)
00217     start = 0;
00218   Digest dig;
00219   if (!createDigest(dig))
00220     return false;
00221   size_t size = blocks[blkno].size;
00222   size_t len = bufl - start > size ? size : bufl - start;
00223   dig.update((const char *)buf + start, len);
00224   if (size > len)
00225     dig.update((const char *)buf, size - len);
00226   return verifyDigest(blkno, dig);
00227 }
00228 
00229 // write block to the file. can also deal with "rotated" buffers
00230 void
00231 MediaBlockList::writeBlock(size_t blkno, FILE *fp, const unsigned char *buf, size_t bufl, size_t start, vector<bool> &found) const
00232 {
00233   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00234     return;
00235   off_t off = blocks[blkno].off;
00236   size_t size = blocks[blkno].size;
00237   if (fseeko(fp, off, SEEK_SET))
00238     return;
00239   if (start == bufl)
00240     start = 0;
00241   size_t len = bufl - start > size ? size : bufl - start;
00242   if (fwrite(buf + start, len, 1, fp) != 1)
00243     return;
00244   if (size > len && fwrite(buf, size - len, 1, fp) != 1)
00245     return;
00246   found[blkno] = true;
00247   found[blocks.size()] = true;
00248 }
00249 
00250 static size_t
00251 fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
00252 {
00253   size_t l = blksize;
00254   int c;
00255 
00256   if (pushback)
00257     {
00258       if (pushbackp != bp)
00259         memmove(bp, pushbackp, pushback);
00260       bp += pushback;
00261       l -= pushback;
00262     }
00263   while (l)
00264     {
00265       c = getc(fp);
00266       if (c == EOF)
00267         break;
00268       *bp++ = c;
00269       l--;
00270     }
00271   if (l)
00272     memset(bp, 0, l);
00273   return blksize - l;
00274 }
00275 
00276 
00277 void
00278 MediaBlockList::reuseBlocks(FILE *wfp, string filename)
00279 {
00280   FILE *fp;
00281 
00282   if (!chksumlen || (fp = fopen(filename.c_str(), "r")) == 0)
00283     return;
00284   size_t nblks = blocks.size();
00285   vector<bool> found;
00286   found.resize(nblks + 1);
00287   if (rsumlen && !rsums.empty())
00288     {
00289       size_t blksize = blocks[0].size;
00290       if (nblks == 1 && rsumpad && rsumpad > blksize)
00291         blksize = rsumpad;
00292       // create hash of checksums
00293       unsigned int hm = rsums.size() * 2;
00294       while (hm & (hm - 1))
00295         hm &= hm - 1;
00296       hm = hm * 2 - 1;
00297       if (hm < 16383)
00298         hm = 16383;
00299       unsigned int *ht = new unsigned int[hm + 1];
00300       memset(ht, 0, (hm + 1) * sizeof(unsigned int));
00301       for (unsigned int i = 0; i < rsums.size(); i++)
00302         {
00303           if (blocks[i].size != blksize && (i != nblks - 1 || rsumpad != blksize))
00304             continue;
00305           unsigned int r = rsums[i];
00306           unsigned int h = r & hm;
00307           unsigned int hh = 7;
00308           while (ht[h])
00309             h = (h + hh++) & hm;
00310           ht[h] = i + 1;
00311         }
00312 
00313       unsigned char *buf = new unsigned char[blksize];
00314       unsigned char *buf2 = new unsigned char[blksize];
00315       size_t pushback = 0;
00316       unsigned char *pushbackp = 0;
00317       int bshift = 0;
00318       if ((blksize & (blksize - 1)) == 0)
00319         for (bshift = 0; size_t(1 << bshift) != blksize; bshift++)
00320           ;
00321       unsigned short a, b;
00322       a = b = 0;
00323       memset(buf, 0, blksize);
00324       bool eof = 0;
00325       bool init = 1;
00326       int sql = nblks > 1 && chksumlen < 16 ? 2 : 1;
00327       while (!eof)
00328         {
00329           for (size_t i = 0; i < blksize; i++)
00330             {
00331               int c;
00332               if (eof)
00333                 c = 0;
00334               else
00335                 {
00336                    if (pushback)
00337                     {
00338                       c = *pushbackp++;
00339                       pushback--;
00340                     }
00341                   else
00342                     c = getc(fp);
00343                   if (c == EOF)
00344                     {
00345                       eof = true;
00346                       c = 0;
00347                       if (!i || sql == 2)
00348                         break;
00349                     }
00350                 }
00351               int oc = buf[i];
00352               buf[i] = c;
00353               a += c - oc; 
00354               if (bshift)
00355                 b += a - (oc << bshift);
00356               else
00357                 b += a - oc * blksize;
00358               if (init)
00359                 {
00360                   if (size_t(i) != blksize - 1)
00361                     continue;
00362                   init = 0;
00363                 }
00364               unsigned int r;
00365               if (rsumlen == 1)
00366                 r = ((unsigned int)b & 255);
00367               else if (rsumlen == 2)
00368                 r = ((unsigned int)b & 65535);
00369               else if (rsumlen == 3)
00370                 r = ((unsigned int)a & 255) << 16 | ((unsigned int)b & 65535);
00371               else
00372                 r = ((unsigned int)a & 65535) << 16 | ((unsigned int)b & 65535);
00373               unsigned int h = r & hm; 
00374               unsigned int hh = 7;
00375               for (; ht[h]; h = (h + hh++) & hm)
00376                 {
00377                   size_t blkno = ht[h] - 1;
00378                   if (rsums[blkno] != r)
00379                     continue;
00380                   if (found[blkno])
00381                     continue;
00382                   if (sql == 2)
00383                     {
00384                       if (eof || blkno + 1 >= nblks)
00385                         continue;
00386                       pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
00387                       pushbackp = buf2;
00388                       if (!pushback)
00389                         continue;
00390                       if (!checkRsum(blkno + 1, buf2, blksize))
00391                         continue;
00392                     }
00393                   if (!checkChecksumRotated(blkno, buf, blksize, i + 1))
00394                     continue;
00395                   if (sql == 2 && !checkChecksum(blkno + 1, buf2, blksize))
00396                     continue;
00397                   writeBlock(blkno, wfp, buf, blksize, i + 1, found);
00398                   if (sql == 2)
00399                     {
00400                       writeBlock(blkno + 1, wfp, buf2, blksize, 0, found);
00401                       pushback = 0;
00402                       blkno++;
00403                     }
00404                   while (!eof)
00405                     {
00406                       blkno++;
00407                       pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
00408                       pushbackp = buf2;
00409                       if (!pushback)
00410                         break;
00411                       if (!checkRsum(blkno, buf2, blksize))
00412                         break;
00413                       if (!checkChecksum(blkno, buf2, blksize))
00414                         break;
00415                       writeBlock(blkno, wfp, buf2, blksize, 0, found);
00416                       pushback = 0;
00417                     }
00418                   init = false;
00419                   memset(buf, 0, blksize);
00420                   a = b = 0;
00421                   i = size_t(-1);       // start with 0 on next iteration
00422                   break;
00423                 }
00424             }
00425         }
00426       delete[] buf2;
00427       delete[] buf;
00428       delete[] ht;
00429     }
00430   else if (chksumlen >= 16)
00431     {
00432       // dummy variant, just check the checksums
00433       size_t bufl = 4096;
00434       off_t off = 0;
00435       unsigned char *buf = new unsigned char[bufl];
00436       for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
00437         {
00438           if (off > blocks[blkno].off)
00439             continue;
00440           size_t blksize = blocks[blkno].size;
00441           if (blksize > bufl)
00442             {
00443               delete[] buf;
00444               bufl = blksize;
00445               buf = new unsigned char[bufl];
00446             }
00447           size_t skip = blocks[blkno].off - off;
00448           while (skip)
00449             {
00450               size_t l = skip > bufl ? bufl : skip;
00451               if (fread(buf, l, 1, fp) != 1)
00452                 break;
00453               skip -= l;
00454               off += l;
00455             }
00456           if (fread(buf, blksize, 1, fp) != 1)
00457             break;
00458           if (checkChecksum(blkno, buf, blksize))
00459             writeBlock(blkno, wfp, buf, blksize, 0, found);
00460           off += blksize;
00461         }
00462     }
00463   if (!found[nblks])
00464     return;
00465   // now throw out all of the blocks we found
00466   std::vector<MediaBlock> nblocks;
00467   std::vector<unsigned char> nchksums;
00468   std::vector<unsigned int> nrsums;
00469   
00470   for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
00471     {
00472       if (!found[blkno])
00473         {
00474           // still need it
00475           nblocks.push_back(blocks[blkno]);
00476           if (chksumlen && (blkno + 1) * chksumlen <= chksums.size())
00477             {
00478               nchksums.resize(nblocks.size() * chksumlen);
00479               memcpy(&nchksums[(nblocks.size() - 1) * chksumlen], &chksums[blkno * chksumlen], chksumlen);
00480             }
00481           if (rsumlen && (blkno + 1) <= rsums.size())
00482             nrsums.push_back(rsums[blkno]);
00483         }
00484     }
00485   blocks = nblocks;
00486   chksums = nchksums;
00487   rsums = nrsums;
00488 }
00489 
00490 std::string
00491 MediaBlockList::asString() const
00492 {
00493   std::string s;
00494   size_t i, j;
00495 
00496   if (filesize != off_t(-1))
00497     {
00498       long long size = filesize;
00499       s = zypp::str::form("[ BlockList, file size %lld\n", size);
00500     }
00501   else
00502     s = "[ BlockList, filesize unknown\n";
00503   if (!haveblocks)
00504     s += "  No block information\n";
00505   if (chksumpad)
00506     s += zypp::str::form("  Checksum pad %zd\n", chksumpad);
00507   if (rsumpad)
00508     s += zypp::str::form("  Rsum pad %zd\n", rsumpad);
00509   for (i = 0; i < blocks.size(); ++i)
00510     {
00511       long long off=blocks[i].off;
00512       long long size=blocks[i].size;
00513       s += zypp::str::form("  (%8lld, %8lld)", off, size);
00514       if (chksumlen && chksums.size() >= (i + 1) * chksumlen)
00515         {
00516           s += "  " + chksumtype + ":";
00517           for (j = 0; j < size_t(chksumlen); j++)
00518             s += zypp::str::form("%02hhx", chksums[i * chksumlen + j]);
00519         }
00520       if (rsumlen && rsums.size() > i)
00521         {
00522           s += "  RSUM:";
00523           s += zypp::str::form("%0*x", 2 * rsumlen, rsums[i]);
00524         }
00525       s += "\n";
00526     }
00527   s += "]";
00528   return s;
00529 }
00530 
00531   } // namespace media
00532 } // namespace zypp
00533