libzypp  13.10.6
MediaBlockList.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
13 #include <sys/types.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <expat.h>
18 
19 #include <vector>
20 #include <iostream>
21 #include <fstream>
22 
24 #include "zypp/base/Logger.h"
25 #include "zypp/base/String.h"
26 
27 using namespace std;
28 using namespace zypp::base;
29 
30 namespace zypp {
31  namespace media {
32 
33 MediaBlockList::MediaBlockList(off_t size)
34 {
35  filesize = size;
36  haveblocks = false;
37  chksumlen = 0;
38  chksumpad = 0;
39  rsumlen = 0;
40  rsumpad = 0;
41 }
42 
43 size_t
44 MediaBlockList::addBlock(off_t off, size_t size)
45 {
46  haveblocks = true;
47  blocks.push_back(MediaBlock( off, size ));
48  return blocks.size() - 1;
49 }
50 
51 void
52 MediaBlockList::setFileChecksum(std::string ctype, int cl, unsigned char *c)
53 {
54  if (!cl)
55  return;
56  fsumtype = ctype;
57  fsum.resize(cl);
58  memcpy(&fsum[0], c, cl);
59 }
60 
61 bool
62 MediaBlockList::createFileDigest(Digest &digest) const
63 {
64  return digest.create(fsumtype);
65 }
66 
67 bool
68 MediaBlockList::verifyFileDigest(Digest &digest) const
69 {
70  if (!haveFileChecksum())
71  return true;
72  vector<unsigned char>dig = digest.digestVector();
73  if (dig.empty() || dig.size() < fsum.size())
74  return false;
75  return memcmp(&dig[0], &fsum[0], fsum.size()) ? false : true;
76 }
77 
78 void
79 MediaBlockList::setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad)
80 {
81  if (!csl)
82  return;
83  if (!chksumlen)
84  {
85  if (blkno)
86  return;
87  chksumlen = csl;
88  chksumtype = cstype;
89  chksumpad = cspad;
90  }
91  if (csl != chksumlen || cstype != chksumtype || cspad != chksumpad || blkno != chksums.size() / chksumlen)
92  return;
93  chksums.resize(chksums.size() + csl);
94  memcpy(&chksums[csl * blkno], cs, csl);
95 }
96 
97 void
98 MediaBlockList::setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad)
99 {
100  if (!rsl)
101  return;
102  if (!rsumlen)
103  {
104  if (blkno)
105  return;
106  rsumlen = rsl;
107  rsumpad = rspad;
108  }
109  if (rsl != rsumlen || rspad != rsumpad || blkno != rsums.size())
110  return;
111  rsums.push_back(rs);
112 }
113 
114 bool
115 MediaBlockList::createDigest(Digest &digest) const
116 {
117  return digest.create(chksumtype);
118 }
119 
120 bool
121 MediaBlockList::verifyDigest(size_t blkno, Digest &digest) const
122 {
123  if (!haveChecksum(blkno))
124  return true;
125  size_t size = blocks[blkno].size;
126  if (!size)
127  return true;
128  if (chksumpad > size)
129  {
130  char pad[chksumpad - size];
131  memset(pad, 0, chksumpad - size);
132  digest.update(pad, chksumpad - size);
133  }
134  vector<unsigned char>dig = digest.digestVector();
135  if (dig.empty() || dig.size() < size_t(chksumlen))
136  return false;
137  return memcmp(&dig[0], &chksums[chksumlen * blkno], chksumlen) ? false : true;
138 }
139 
140 unsigned int
141 MediaBlockList::updateRsum(unsigned int rs, const char* bytes, size_t len) const
142 {
143  if (!len)
144  return rs;
145  unsigned short s, m;
146  s = (rs >> 16) & 65535;
147  m = rs & 65535;
148  for (; len > 0 ; len--)
149  {
150  unsigned short c = (unsigned char)*bytes++;
151  s += c;
152  m += s;
153  }
154  return (s & 65535) << 16 | (m & 65535);
155 }
156 
157 bool
158 MediaBlockList::verifyRsum(size_t blkno, unsigned int rs) const
159 {
160  if (!haveRsum(blkno))
161  return true;
162  size_t size = blocks[blkno].size;
163  if (!size)
164  return true;
165  if (rsumpad > size)
166  {
167  unsigned short s, m;
168  s = (rs >> 16) & 65535;
169  m = rs & 65535;
170  m += s * (rsumpad - size);
171  rs = (s & 65535) << 16 | (m & 65535);
172  }
173  switch(rsumlen)
174  {
175  case 3:
176  rs &= 0xffffff;
177  case 2:
178  rs &= 0xffff;
179  case 1:
180  rs &= 0xff;
181  default:
182  break;
183  }
184  return rs == rsums[blkno];
185 }
186 
187 bool
188 MediaBlockList::checkRsum(size_t blkno, const unsigned char *buf, size_t bufl) const
189 {
190  if (blkno >= blocks.size() || bufl < blocks[blkno].size)
191  return false;
192  unsigned int rs = updateRsum(0, (const char *)buf, blocks[blkno].size);
193  return verifyRsum(blkno, rs);
194 }
195 
196 bool
197 MediaBlockList::checkChecksum(size_t blkno, const unsigned char *buf, size_t bufl) const
198 {
199  if (blkno >= blocks.size() || bufl < blocks[blkno].size)
200  return false;
201  Digest dig;
202  if (!createDigest(dig))
203  return false;
204  dig.update((const char *)buf, blocks[blkno].size);
205  return verifyDigest(blkno, dig);
206 }
207 
208 // specialized version of checkChecksum that can deal with a "rotated" buffer
209 bool
210 MediaBlockList::checkChecksumRotated(size_t blkno, const unsigned char *buf, size_t bufl, size_t start) const
211 {
212  if (blkno >= blocks.size() || bufl < blocks[blkno].size)
213  return false;
214  if (start == bufl)
215  start = 0;
216  Digest dig;
217  if (!createDigest(dig))
218  return false;
219  size_t size = blocks[blkno].size;
220  size_t len = bufl - start > size ? size : bufl - start;
221  dig.update((const char *)buf + start, len);
222  if (size > len)
223  dig.update((const char *)buf, size - len);
224  return verifyDigest(blkno, dig);
225 }
226 
227 // write block to the file. can also deal with "rotated" buffers
228 void
229 MediaBlockList::writeBlock(size_t blkno, FILE *fp, const unsigned char *buf, size_t bufl, size_t start, vector<bool> &found) const
230 {
231  if (blkno >= blocks.size() || bufl < blocks[blkno].size)
232  return;
233  off_t off = blocks[blkno].off;
234  size_t size = blocks[blkno].size;
235  if (fseeko(fp, off, SEEK_SET))
236  return;
237  if (start == bufl)
238  start = 0;
239  size_t len = bufl - start > size ? size : bufl - start;
240  if (fwrite(buf + start, len, 1, fp) != 1)
241  return;
242  if (size > len && fwrite(buf, size - len, 1, fp) != 1)
243  return;
244  found[blkno] = true;
245  found[blocks.size()] = true;
246 }
247 
248 static size_t
249 fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
250 {
251  size_t l = blksize;
252  int c;
253 
254  if (pushback)
255  {
256  if (pushbackp != bp)
257  memmove(bp, pushbackp, pushback);
258  bp += pushback;
259  l -= pushback;
260  }
261  while (l)
262  {
263  c = getc(fp);
264  if (c == EOF)
265  break;
266  *bp++ = c;
267  l--;
268  }
269  if (l)
270  memset(bp, 0, l);
271  return blksize - l;
272 }
273 
274 
275 void
276 MediaBlockList::reuseBlocks(FILE *wfp, string filename)
277 {
278  FILE *fp;
279 
280  if (!chksumlen || (fp = fopen(filename.c_str(), "r")) == 0)
281  return;
282  size_t nblks = blocks.size();
283  vector<bool> found;
284  found.resize(nblks + 1);
285  if (rsumlen && !rsums.empty())
286  {
287  size_t blksize = blocks[0].size;
288  if (nblks == 1 && rsumpad && rsumpad > blksize)
289  blksize = rsumpad;
290  // create hash of checksums
291  unsigned int hm = rsums.size() * 2;
292  while (hm & (hm - 1))
293  hm &= hm - 1;
294  hm = hm * 2 - 1;
295  if (hm < 16383)
296  hm = 16383;
297  unsigned int *ht = new unsigned int[hm + 1];
298  memset(ht, 0, (hm + 1) * sizeof(unsigned int));
299  for (unsigned int i = 0; i < rsums.size(); i++)
300  {
301  if (blocks[i].size != blksize && (i != nblks - 1 || rsumpad != blksize))
302  continue;
303  unsigned int r = rsums[i];
304  unsigned int h = r & hm;
305  unsigned int hh = 7;
306  while (ht[h])
307  h = (h + hh++) & hm;
308  ht[h] = i + 1;
309  }
310 
311  unsigned char *buf = new unsigned char[blksize];
312  unsigned char *buf2 = new unsigned char[blksize];
313  size_t pushback = 0;
314  unsigned char *pushbackp = 0;
315  int bshift = 0;
316  if ((blksize & (blksize - 1)) == 0)
317  for (bshift = 0; size_t(1 << bshift) != blksize; bshift++)
318  ;
319  unsigned short a, b;
320  a = b = 0;
321  memset(buf, 0, blksize);
322  bool eof = 0;
323  bool init = 1;
324  int sql = nblks > 1 && chksumlen < 16 ? 2 : 1;
325  while (!eof)
326  {
327  for (size_t i = 0; i < blksize; i++)
328  {
329  int c;
330  if (eof)
331  c = 0;
332  else
333  {
334  if (pushback)
335  {
336  c = *pushbackp++;
337  pushback--;
338  }
339  else
340  c = getc(fp);
341  if (c == EOF)
342  {
343  eof = true;
344  c = 0;
345  if (!i || sql == 2)
346  break;
347  }
348  }
349  int oc = buf[i];
350  buf[i] = c;
351  a += c - oc;
352  if (bshift)
353  b += a - (oc << bshift);
354  else
355  b += a - oc * blksize;
356  if (init)
357  {
358  if (size_t(i) != blksize - 1)
359  continue;
360  init = 0;
361  }
362  unsigned int r;
363  if (rsumlen == 1)
364  r = ((unsigned int)b & 255);
365  else if (rsumlen == 2)
366  r = ((unsigned int)b & 65535);
367  else if (rsumlen == 3)
368  r = ((unsigned int)a & 255) << 16 | ((unsigned int)b & 65535);
369  else
370  r = ((unsigned int)a & 65535) << 16 | ((unsigned int)b & 65535);
371  unsigned int h = r & hm;
372  unsigned int hh = 7;
373  for (; ht[h]; h = (h + hh++) & hm)
374  {
375  size_t blkno = ht[h] - 1;
376  if (rsums[blkno] != r)
377  continue;
378  if (found[blkno])
379  continue;
380  if (sql == 2)
381  {
382  if (eof || blkno + 1 >= nblks)
383  continue;
384  pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
385  pushbackp = buf2;
386  if (!pushback)
387  continue;
388  if (!checkRsum(blkno + 1, buf2, blksize))
389  continue;
390  }
391  if (!checkChecksumRotated(blkno, buf, blksize, i + 1))
392  continue;
393  if (sql == 2 && !checkChecksum(blkno + 1, buf2, blksize))
394  continue;
395  writeBlock(blkno, wfp, buf, blksize, i + 1, found);
396  if (sql == 2)
397  {
398  writeBlock(blkno + 1, wfp, buf2, blksize, 0, found);
399  pushback = 0;
400  blkno++;
401  }
402  while (!eof)
403  {
404  blkno++;
405  pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
406  pushbackp = buf2;
407  if (!pushback)
408  break;
409  if (!checkRsum(blkno, buf2, blksize))
410  break;
411  if (!checkChecksum(blkno, buf2, blksize))
412  break;
413  writeBlock(blkno, wfp, buf2, blksize, 0, found);
414  pushback = 0;
415  }
416  init = false;
417  memset(buf, 0, blksize);
418  a = b = 0;
419  i = size_t(-1); // start with 0 on next iteration
420  break;
421  }
422  }
423  }
424  delete[] buf2;
425  delete[] buf;
426  delete[] ht;
427  }
428  else if (chksumlen >= 16)
429  {
430  // dummy variant, just check the checksums
431  size_t bufl = 4096;
432  off_t off = 0;
433  unsigned char *buf = new unsigned char[bufl];
434  for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
435  {
436  if (off > blocks[blkno].off)
437  continue;
438  size_t blksize = blocks[blkno].size;
439  if (blksize > bufl)
440  {
441  delete[] buf;
442  bufl = blksize;
443  buf = new unsigned char[bufl];
444  }
445  size_t skip = blocks[blkno].off - off;
446  while (skip)
447  {
448  size_t l = skip > bufl ? bufl : skip;
449  if (fread(buf, l, 1, fp) != 1)
450  break;
451  skip -= l;
452  off += l;
453  }
454  if (fread(buf, blksize, 1, fp) != 1)
455  break;
456  if (checkChecksum(blkno, buf, blksize))
457  writeBlock(blkno, wfp, buf, blksize, 0, found);
458  off += blksize;
459  }
460  }
461  if (!found[nblks])
462  return;
463  // now throw out all of the blocks we found
464  std::vector<MediaBlock> nblocks;
465  std::vector<unsigned char> nchksums;
466  std::vector<unsigned int> nrsums;
467 
468  for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
469  {
470  if (!found[blkno])
471  {
472  // still need it
473  nblocks.push_back(blocks[blkno]);
474  if (chksumlen && (blkno + 1) * chksumlen <= chksums.size())
475  {
476  nchksums.resize(nblocks.size() * chksumlen);
477  memcpy(&nchksums[(nblocks.size() - 1) * chksumlen], &chksums[blkno * chksumlen], chksumlen);
478  }
479  if (rsumlen && (blkno + 1) <= rsums.size())
480  nrsums.push_back(rsums[blkno]);
481  }
482  }
483  blocks = nblocks;
484  chksums = nchksums;
485  rsums = nrsums;
486 }
487 
488 std::string
490 {
491  std::string s;
492  size_t i, j;
493 
494  if (filesize != off_t(-1))
495  {
496  long long size = filesize;
497  s = zypp::str::form("[ BlockList, file size %lld\n", size);
498  }
499  else
500  s = "[ BlockList, filesize unknown\n";
501  if (!haveblocks)
502  s += " No block information\n";
503  if (chksumpad)
504  s += zypp::str::form(" Checksum pad %zd\n", chksumpad);
505  if (rsumpad)
506  s += zypp::str::form(" Rsum pad %zd\n", rsumpad);
507  for (i = 0; i < blocks.size(); ++i)
508  {
509  long long off=blocks[i].off;
510  long long size=blocks[i].size;
511  s += zypp::str::form(" (%8lld, %8lld)", off, size);
512  if (chksumlen && chksums.size() >= (i + 1) * chksumlen)
513  {
514  s += " " + chksumtype + ":";
515  for (j = 0; j < size_t(chksumlen); j++)
516  s += zypp::str::form("%02hhx", chksums[i * chksumlen + j]);
517  }
518  if (rsumlen && rsums.size() > i)
519  {
520  s += " RSUM:";
521  s += zypp::str::form("%0*x", 2 * rsumlen, rsums[i]);
522  }
523  s += "\n";
524  }
525  s += "]";
526  return s;
527 }
528 
529  } // namespace media
530 } // namespace zypp
531 
Compute Message Digests (MD5, SHA1 etc)
Definition: Digest.h:45
std::vector< unsigned char > digestVector()
get vector of unsigned char representation of the digest
Definition: Digest.cc:198
static size_t fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
a single block from the blocklist, consisting of an offset and a size
bool create(const std::string &name)
initialize creation of a new message digest
Definition: Digest.cc:142
std::string form(const char *format,...)
Printf style construction of std::string.
Definition: String.cc:34
std::string asString(const std::string &t)
Global asString() that works with std::string too.
Definition: String.h:125
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm
Definition: Digest.cc:216