libzypp  10.5.0
MediaBlockList.cc
Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                          ____ _   __ __ ___                          |
00003 |                         |__  / \ / / . \ . \                         |
00004 |                           / / \ V /|  _/  _/                         |
00005 |                          / /__ | | | | | |                           |
00006 |                         /_____||_| |_| |_|                           |
00007 |                                                                      |
00008 \---------------------------------------------------------------------*/
00013 #include <sys/types.h>
00014 #include <stdio.h>
00015 #include <stdlib.h>
00016 #include <string.h>
00017 #include <expat.h>
00018 
00019 #include <vector>
00020 #include <iostream>
00021 #include <fstream>
00022 
00023 #include "zypp/media/MediaBlockList.h"
00024 #include "zypp/base/Logger.h"
00025 #include "zypp/base/String.h"
00026 
00027 using namespace std;
00028 using namespace zypp::base;
00029 
00030 namespace zypp {
00031   namespace media {
00032 
00033 MediaBlockList::MediaBlockList(off_t size)
00034 {
00035   filesize = size;
00036   haveblocks = false;
00037   chksumlen = 0;
00038   chksumpad = 0;
00039   rsumlen = 0;
00040   rsumpad = 0;
00041 }
00042 
00043 size_t
00044 MediaBlockList::addBlock(off_t off, size_t size)
00045 {
00046   haveblocks = true;
00047   blocks.push_back(MediaBlock( off, size ));
00048   return blocks.size() - 1;
00049 }
00050 
00051 void
00052 MediaBlockList::setFileChecksum(std::string ctype, int cl, unsigned char *c)
00053 {
00054   if (!cl)
00055     return;
00056   fsumtype = ctype;
00057   fsum.resize(cl);
00058   memcpy(&fsum[0], c, cl);
00059 }
00060 
00061 bool
00062 MediaBlockList::createFileDigest(Digest &digest) const
00063 {
00064   return digest.create(fsumtype);
00065 }
00066 
00067 bool
00068 MediaBlockList::verifyFileDigest(Digest &digest) const
00069 {
00070   if (!haveFileChecksum())
00071     return true;
00072   vector<unsigned char>dig = digest.digestVector();
00073   if (dig.empty() || dig.size() < fsum.size())
00074     return false;
00075   return memcmp(&dig[0], &fsum[0], fsum.size()) ? false : true;
00076 }
00077 
00078 void
00079 MediaBlockList::setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad)
00080 {
00081   if (!csl)
00082     return;
00083   if (!chksumlen)
00084     {
00085       if (blkno)
00086         return;
00087       chksumlen = csl;
00088       chksumtype = cstype;
00089       chksumpad = cspad;
00090     }
00091   if (csl != chksumlen || cstype != chksumtype || cspad != chksumpad || blkno != chksums.size() / chksumlen)
00092     return;
00093   chksums.resize(chksums.size() + csl);
00094   memcpy(&chksums[csl * blkno], cs, csl);
00095 }
00096 
00097 void
00098 MediaBlockList::setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad)
00099 {
00100   if (!rsl)
00101     return;
00102   if (!rsumlen)
00103     {
00104       if (blkno)
00105         return;
00106       rsumlen = rsl;
00107       rsumpad = rspad;
00108     }
00109   if (rsl != rsumlen || rspad != rsumpad || blkno != rsums.size())
00110     return;
00111   rsums.push_back(rs);
00112 }
00113 
00114 bool
00115 MediaBlockList::createDigest(Digest &digest) const
00116 {
00117   return digest.create(chksumtype);
00118 }
00119 
00120 bool
00121 MediaBlockList::verifyDigest(size_t blkno, Digest &digest) const
00122 {
00123   if (!haveChecksum(blkno))
00124     return true;
00125   size_t size = blocks[blkno].size;
00126   if (!size)
00127     return true;
00128   if (chksumpad > size)
00129     {
00130       char pad[chksumpad - size];
00131       memset(pad, 0, chksumpad - size);
00132       digest.update(pad, chksumpad - size);
00133     }
00134   vector<unsigned char>dig = digest.digestVector();
00135   if (dig.empty() || dig.size() < size_t(chksumlen))
00136     return false;
00137   return memcmp(&dig[0], &chksums[chksumlen * blkno], chksumlen) ? false : true;
00138 }
00139 
00140 unsigned int
00141 MediaBlockList::updateRsum(unsigned int rs, const char* bytes, size_t len) const
00142 {
00143   if (!len)
00144     return rs;
00145   unsigned short s, m;
00146   s = (rs >> 16) & 65535;
00147   m = rs & 65535;
00148   for (; len > 0 ; len--)
00149     {
00150       unsigned short c = (unsigned char)*bytes++;
00151       s += c;
00152       m += s;
00153     }
00154   return (s & 65535) << 16 | (m & 65535);
00155 }
00156 
00157 bool
00158 MediaBlockList::verifyRsum(size_t blkno, unsigned int rs) const
00159 {
00160   if (!haveRsum(blkno))
00161     return true;
00162   size_t size = blocks[blkno].size;
00163   if (!size)
00164     return true;
00165   if (rsumpad > size)
00166     {
00167       unsigned short s, m;
00168       s = (rs >> 16) & 65535;
00169       m = rs & 65535;
00170       m += s * (rsumpad - size);
00171       rs = (s & 65535) << 16 | (m & 65535);
00172     }
00173   switch(rsumlen)
00174     {
00175     case 3:
00176       rs &= 0xffffff;
00177     case 2:
00178       rs &= 0xffff;
00179     case 1:
00180       rs &= 0xff;
00181     default:
00182       break;
00183     }
00184   return rs == rsums[blkno];
00185 }
00186 
00187 bool
00188 MediaBlockList::checkRsum(size_t blkno, const unsigned char *buf, size_t bufl) const
00189 {
00190   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00191     return false;
00192   unsigned int rs = updateRsum(0, (const char *)buf, blocks[blkno].size);
00193   return verifyRsum(blkno, rs);
00194 }
00195 
00196 bool
00197 MediaBlockList::checkChecksum(size_t blkno, const unsigned char *buf, size_t bufl) const
00198 {
00199   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00200     return false;
00201   Digest dig;
00202   if (!createDigest(dig))
00203     return false;
00204   dig.update((const char *)buf, blocks[blkno].size);
00205   return verifyDigest(blkno, dig);
00206 }
00207 
00208 // specialized version of checkChecksum that can deal with a "rotated" buffer
00209 bool
00210 MediaBlockList::checkChecksumRotated(size_t blkno, const unsigned char *buf, size_t bufl, size_t start) const
00211 {
00212   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00213     return false;
00214   if (start == bufl)
00215     start = 0;
00216   Digest dig;
00217   if (!createDigest(dig))
00218     return false;
00219   size_t size = blocks[blkno].size;
00220   size_t len = bufl - start > size ? size : bufl - start;
00221   dig.update((const char *)buf + start, len);
00222   if (size > len)
00223     dig.update((const char *)buf, size - len);
00224   return verifyDigest(blkno, dig);
00225 }
00226 
00227 // write block to the file. can also deal with "rotated" buffers
00228 void
00229 MediaBlockList::writeBlock(size_t blkno, FILE *fp, const unsigned char *buf, size_t bufl, size_t start, vector<bool> &found) const
00230 {
00231   if (blkno >= blocks.size() || bufl < blocks[blkno].size)
00232     return;
00233   off_t off = blocks[blkno].off;
00234   size_t size = blocks[blkno].size;
00235   if (fseeko(fp, off, SEEK_SET))
00236     return;
00237   if (start == bufl)
00238     start = 0;
00239   size_t len = bufl - start > size ? size : bufl - start;
00240   if (fwrite(buf + start, len, 1, fp) != 1)
00241     return;
00242   if (size > len && fwrite(buf, size - len, 1, fp) != 1)
00243     return;
00244   found[blkno] = true;
00245   found[blocks.size()] = true;
00246 }
00247 
00248 static size_t
00249 fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
00250 {
00251   size_t l = blksize;
00252   int c;
00253 
00254   if (pushback)
00255     {
00256       if (pushbackp != bp)
00257         memmove(bp, pushbackp, pushback);
00258       bp += pushback;
00259       l -= pushback;
00260     }
00261   while (l)
00262     {
00263       c = getc(fp);
00264       if (c == EOF)
00265         break;
00266       *bp++ = c;
00267       l--;
00268     }
00269   if (l)
00270     memset(bp, 0, l);
00271   return blksize - l;
00272 }
00273 
00274 
00275 void
00276 MediaBlockList::reuseBlocks(FILE *wfp, string filename)
00277 {
00278   FILE *fp;
00279 
00280   if (!chksumlen || (fp = fopen(filename.c_str(), "r")) == 0)
00281     return;
00282   size_t nblks = blocks.size();
00283   vector<bool> found;
00284   found.resize(nblks + 1);
00285   if (rsumlen && !rsums.empty())
00286     {
00287       size_t blksize = blocks[0].size;
00288       if (nblks == 1 && rsumpad && rsumpad > blksize)
00289         blksize = rsumpad;
00290       // create hash of checksums
00291       unsigned int hm = rsums.size() * 2;
00292       while (hm & (hm - 1))
00293         hm &= hm - 1;
00294       hm = hm * 2 - 1;
00295       if (hm < 16383)
00296         hm = 16383;
00297       unsigned int *ht = new unsigned int[hm + 1];
00298       memset(ht, 0, (hm + 1) * sizeof(unsigned int));
00299       for (unsigned int i = 0; i < rsums.size(); i++)
00300         {
00301           if (blocks[i].size != blksize && (i != nblks - 1 || rsumpad != blksize))
00302             continue;
00303           unsigned int r = rsums[i];
00304           unsigned int h = r & hm;
00305           unsigned int hh = 7;
00306           while (ht[h])
00307             h = (h + hh++) & hm;
00308           ht[h] = i + 1;
00309         }
00310 
00311       unsigned char *buf = new unsigned char[blksize];
00312       unsigned char *buf2 = new unsigned char[blksize];
00313       size_t pushback = 0;
00314       unsigned char *pushbackp = 0;
00315       int bshift = 0;
00316       if ((blksize & (blksize - 1)) == 0)
00317         for (bshift = 0; size_t(1 << bshift) != blksize; bshift++)
00318           ;
00319       unsigned short a, b;
00320       a = b = 0;
00321       memset(buf, 0, blksize);
00322       bool eof = 0;
00323       bool init = 1;
00324       int sql = nblks > 1 && chksumlen < 16 ? 2 : 1;
00325       while (!eof)
00326         {
00327           for (size_t i = 0; i < blksize; i++)
00328             {
00329               int c;
00330               if (eof)
00331                 c = 0;
00332               else
00333                 {
00334                    if (pushback)
00335                     {
00336                       c = *pushbackp++;
00337                       pushback--;
00338                     }
00339                   else
00340                     c = getc(fp);
00341                   if (c == EOF)
00342                     {
00343                       eof = true;
00344                       c = 0;
00345                       if (!i || sql == 2)
00346                         break;
00347                     }
00348                 }
00349               int oc = buf[i];
00350               buf[i] = c;
00351               a += c - oc;
00352               if (bshift)
00353                 b += a - (oc << bshift);
00354               else
00355                 b += a - oc * blksize;
00356               if (init)
00357                 {
00358                   if (size_t(i) != blksize - 1)
00359                     continue;
00360                   init = 0;
00361                 }
00362               unsigned int r;
00363               if (rsumlen == 1)
00364                 r = ((unsigned int)b & 255);
00365               else if (rsumlen == 2)
00366                 r = ((unsigned int)b & 65535);
00367               else if (rsumlen == 3)
00368                 r = ((unsigned int)a & 255) << 16 | ((unsigned int)b & 65535);
00369               else
00370                 r = ((unsigned int)a & 65535) << 16 | ((unsigned int)b & 65535);
00371               unsigned int h = r & hm;
00372               unsigned int hh = 7;
00373               for (; ht[h]; h = (h + hh++) & hm)
00374                 {
00375                   size_t blkno = ht[h] - 1;
00376                   if (rsums[blkno] != r)
00377                     continue;
00378                   if (found[blkno])
00379                     continue;
00380                   if (sql == 2)
00381                     {
00382                       if (eof || blkno + 1 >= nblks)
00383                         continue;
00384                       pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
00385                       pushbackp = buf2;
00386                       if (!pushback)
00387                         continue;
00388                       if (!checkRsum(blkno + 1, buf2, blksize))
00389                         continue;
00390                     }
00391                   if (!checkChecksumRotated(blkno, buf, blksize, i + 1))
00392                     continue;
00393                   if (sql == 2 && !checkChecksum(blkno + 1, buf2, blksize))
00394                     continue;
00395                   writeBlock(blkno, wfp, buf, blksize, i + 1, found);
00396                   if (sql == 2)
00397                     {
00398                       writeBlock(blkno + 1, wfp, buf2, blksize, 0, found);
00399                       pushback = 0;
00400                       blkno++;
00401                     }
00402                   while (!eof)
00403                     {
00404                       blkno++;
00405                       pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
00406                       pushbackp = buf2;
00407                       if (!pushback)
00408                         break;
00409                       if (!checkRsum(blkno, buf2, blksize))
00410                         break;
00411                       if (!checkChecksum(blkno, buf2, blksize))
00412                         break;
00413                       writeBlock(blkno, wfp, buf2, blksize, 0, found);
00414                       pushback = 0;
00415                     }
00416                   init = false;
00417                   memset(buf, 0, blksize);
00418                   a = b = 0;
00419                   i = size_t(-1);       // start with 0 on next iteration
00420                   break;
00421                 }
00422             }
00423         }
00424       delete[] buf2;
00425       delete[] buf;
00426       delete[] ht;
00427     }
00428   else if (chksumlen >= 16)
00429     {
00430       // dummy variant, just check the checksums
00431       size_t bufl = 4096;
00432       off_t off = 0;
00433       unsigned char *buf = new unsigned char[bufl];
00434       for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
00435         {
00436           if (off > blocks[blkno].off)
00437             continue;
00438           size_t blksize = blocks[blkno].size;
00439           if (blksize > bufl)
00440             {
00441               delete[] buf;
00442               bufl = blksize;
00443               buf = new unsigned char[bufl];
00444             }
00445           size_t skip = blocks[blkno].off - off;
00446           while (skip)
00447             {
00448               size_t l = skip > bufl ? bufl : skip;
00449               if (fread(buf, l, 1, fp) != 1)
00450                 break;
00451               skip -= l;
00452               off += l;
00453             }
00454           if (fread(buf, blksize, 1, fp) != 1)
00455             break;
00456           if (checkChecksum(blkno, buf, blksize))
00457             writeBlock(blkno, wfp, buf, blksize, 0, found);
00458           off += blksize;
00459         }
00460     }
00461   if (!found[nblks])
00462     return;
00463   // now throw out all of the blocks we found
00464   std::vector<MediaBlock> nblocks;
00465   std::vector<unsigned char> nchksums;
00466   std::vector<unsigned int> nrsums;
00467 
00468   for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
00469     {
00470       if (!found[blkno])
00471         {
00472           // still need it
00473           nblocks.push_back(blocks[blkno]);
00474           if (chksumlen && (blkno + 1) * chksumlen <= chksums.size())
00475             {
00476               nchksums.resize(nblocks.size() * chksumlen);
00477               memcpy(&nchksums[(nblocks.size() - 1) * chksumlen], &chksums[blkno * chksumlen], chksumlen);
00478             }
00479           if (rsumlen && (blkno + 1) <= rsums.size())
00480             nrsums.push_back(rsums[blkno]);
00481         }
00482     }
00483   blocks = nblocks;
00484   chksums = nchksums;
00485   rsums = nrsums;
00486 }
00487 
00488 std::string
00489 MediaBlockList::asString() const
00490 {
00491   std::string s;
00492   size_t i, j;
00493 
00494   if (filesize != off_t(-1))
00495     {
00496       long long size = filesize;
00497       s = zypp::str::form("[ BlockList, file size %lld\n", size);
00498     }
00499   else
00500     s = "[ BlockList, filesize unknown\n";
00501   if (!haveblocks)
00502     s += "  No block information\n";
00503   if (chksumpad)
00504     s += zypp::str::form("  Checksum pad %zd\n", chksumpad);
00505   if (rsumpad)
00506     s += zypp::str::form("  Rsum pad %zd\n", rsumpad);
00507   for (i = 0; i < blocks.size(); ++i)
00508     {
00509       long long off=blocks[i].off;
00510       long long size=blocks[i].size;
00511       s += zypp::str::form("  (%8lld, %8lld)", off, size);
00512       if (chksumlen && chksums.size() >= (i + 1) * chksumlen)
00513         {
00514           s += "  " + chksumtype + ":";
00515           for (j = 0; j < size_t(chksumlen); j++)
00516             s += zypp::str::form("%02hhx", chksums[i * chksumlen + j]);
00517         }
00518       if (rsumlen && rsums.size() > i)
00519         {
00520           s += "  RSUM:";
00521           s += zypp::str::form("%0*x", 2 * rsumlen, rsums[i]);
00522         }
00523       s += "\n";
00524     }
00525   s += "]";
00526   return s;
00527 }
00528 
00529   } // namespace media
00530 } // namespace zypp
00531