libzypp  17.14.0
MetaLinkParser.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
14 #include "zypp/base/Logger.h"
15 
16 #include <sys/types.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include <vector>
22 #include <algorithm>
23 #include <iostream>
24 #include <fstream>
25 
26 #include <libxml2/libxml/SAX2.h>
27 
28 using namespace std;
29 using namespace zypp::base;
30 
31 namespace zypp {
32  namespace media {
33 
34 enum state {
53 };
54 
55 struct stateswitch {
56  enum state from;
57  string ename;
58  enum state to;
59  int docontent;
60 };
61 
62 static struct stateswitch stateswitches[] = {
63  { STATE_START, "metalink", STATE_METALINK, 0 },
64  { STATE_METALINK, "files", STATE_FILES, 0 },
65  { STATE_METALINK, "file", STATE_M4FILE, 0 },
66  { STATE_FILES, "file", STATE_FILE, 0 },
67  { STATE_FILE, "size", STATE_SIZE, 1 },
68  { STATE_FILE, "verification", STATE_VERIFICATION, 0 },
69  { STATE_FILE, "resources", STATE_RESOURCES, 0 },
70  { STATE_VERIFICATION, "hash", STATE_HASH, 1 },
71  { STATE_VERIFICATION, "pieces", STATE_PIECES, 0 },
72  { STATE_PIECES, "hash", STATE_PHASH, 1 },
73  { STATE_RESOURCES, "url", STATE_URL, 1 },
74  { STATE_M4FILE, "size", STATE_M4SIZE, 1 },
75  { STATE_M4FILE, "hash", STATE_M4HASH, 1},
76  { STATE_M4FILE, "url", STATE_M4URL, 1},
77  { STATE_M4FILE, "pieces", STATE_M4PIECES, 0},
78  { STATE_M4PIECES, "hash", STATE_M4PHASH, 1 },
79  { NUMSTATES }
80 };
81 
82 struct ml_url {
84  : priority( 0 )
85  {}
86  int priority;
87  string url;
88 };
89 
90 static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
91 static void XMLCALL endElement(void *userData, const xmlChar *name);
92 static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
93 
96  : parser( nullptr )
97  , depth( 0 )
98  , state( STATE_START )
99  , statedepth( 0 )
100  , content( reinterpret_cast<char *>(malloc(256)) )
101  , lcontent( 0 )
102  , acontent( 256 )
103  , docontent( 0 )
104  , called( 0 )
105  , gotfile( 0 )
106  , size( -1 )
107  , nurls( 0 )
108  , blksize( 0 )
109  , npiece( 0 )
110  , piecel( 0 )
111  , nsha1( 0 )
112  , nzsync( 0 )
113  , chksuml( 0 )
114  {
115  struct stateswitch *sw;
116  int i;
117  memset( swtab, 0, sizeof(swtab) );
118  memset( sbtab, 0, sizeof(sbtab) );
119  for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
120  {
121  if (!swtab[sw->from])
122  swtab[sw->from] = sw;
123  sbtab[sw->to] = sw->from;
124  }
125 
126  xmlSAXHandler sax;
127  memset(&sax, 0, sizeof(sax));
128  sax.startElement = startElement;
129  sax.endElement = endElement;
130  sax.characters = characterData;
131 
132  //internally creates a copy of xmlSaxHandler, so having it as local variable is save
133  parser = xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL);
134  }
135 
137  {
138  if (parser) {
139  xmlFreeParserCtxt(parser);
140  parser = nullptr;
141  }
142  free(content);
143  }
144 
145  xmlParserCtxtPtr parser;
146  int depth;
147  enum state state;
149  char *content;
150  int lcontent;
151  int acontent;
153  struct stateswitch *swtab[NUMSTATES];
154  enum state sbtab[NUMSTATES];
155 
156  int called;
157  int gotfile;
158  off_t size;
159  vector<struct ml_url> urls;
160  int nurls;
161  size_t blksize;
162 
163  vector<unsigned char> piece;
164  int npiece;
165  int piecel;
166 
167  vector<unsigned char> sha1;
168  int nsha1;
169  vector<unsigned char> zsync;
170  int nzsync;
171 
172  vector<unsigned char> chksum;
173  int chksuml;
174 };
175 
176 static const char *
177 find_attr(const char *txt, const xmlChar **atts)
178 {
179  if(!atts) {
180  return 0;
181  }
182 
183  for (; *atts; atts += 2)
184  {
185  if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
186  return reinterpret_cast<const char*>(atts[1]);
187  }
188  return 0;
189 }
190 
191 static void XMLCALL
192 startElement(void *userData, const xmlChar *name, const xmlChar **atts)
193 {
194  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
195  struct stateswitch *sw;
196  if (pd->depth != pd->statedepth)
197  {
198  pd->depth++;
199  return;
200  }
201  pd->depth++;
202  if (!pd->swtab[pd->state])
203  return;
204  for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */
205  if (sw->ename == reinterpret_cast<const char *>(name))
206  break;
207  if (sw->from != pd->state)
208  return;
209  if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++)
210  return; /* ignore all but the first file */
211  //printf("start depth %d name %s\n", pd->depth, name);
212  pd->state = sw->to;
213  pd->docontent = sw->docontent;
214  pd->statedepth = pd->depth;
215  pd->lcontent = 0;
216  *pd->content = 0;
217  switch(pd->state)
218  {
219  case STATE_URL:
220  case STATE_M4URL:
221  {
222  const char *priority = find_attr("priority", atts);
223  const char *preference = find_attr("preference", atts);
224  int prio;
225  pd->urls.push_back(ml_url());
226  if (priority)
227  prio = atoi(priority);
228  else if (preference)
229  prio = 101 - atoi(preference);
230  else
231  prio = 999999;
232  pd->urls.back().priority = prio;
233  break;
234  }
235  case STATE_PIECES:
236  case STATE_M4PIECES:
237  {
238  const char *type = find_attr("type", atts);
239  const char *length = find_attr("length", atts);
240  size_t blksize;
241 
242  if (!type || !length)
243  {
244  pd->state = pd->sbtab[pd->state];
245  pd->statedepth--;
246  break;
247  }
248  blksize = strtoul(length, 0, 10);
249  if (!blksize || (pd->blksize && pd->blksize != blksize))
250  {
251  pd->state = pd->sbtab[pd->state];
252  pd->statedepth--;
253  break;
254  }
255  pd->blksize = blksize;
256  pd->npiece = 0;
257  pd->piece.clear();
258  if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
259  pd->piecel = 20;
260  else if (!strcmp(type, "zsync"))
261  pd->piecel = 4;
262  else
263  {
264  pd->state = pd->sbtab[pd->state];
265  pd->statedepth--;
266  break;
267  }
268  break;
269  }
270  case STATE_HASH:
271  case STATE_M4HASH:
272  {
273  const char *type = find_attr("type", atts);
274  if (!type)
275  type = "?";
276  if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
277  pd->chksuml = 20;
278  else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
279  pd->chksuml = 32;
280  else
281  {
282  pd->state = pd->sbtab[pd->state];
283  pd->statedepth--;
284  pd->docontent = 0;
285  }
286  break;
287  }
288  case STATE_PHASH:
289  case STATE_M4PHASH:
290  {
291  const char *piece = find_attr("piece", atts);
292  if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece))
293  {
294  pd->state = pd->sbtab[pd->state];
295  pd->statedepth--;
296  }
297  break;
298  }
299  default:
300  break;
301  }
302 }
303 
304 static int
305 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
306 {
307  int i;
308  for (i = 0; i < buflen; i++)
309  {
310 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \
311  : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \
312  : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \
313  : -1)
314  int v = c2h(*str);
315  str++;
316  if (v < 0)
317  return 0;
318  buf[i] = v;
319  v = c2h(*str);
320  str++;
321  if (v < 0)
322  return 0;
323  buf[i] = (buf[i] << 4) | v;
324 #undef c2h
325  }
326  return buflen;
327 }
328 
329 static void XMLCALL
330 endElement(void *userData, const xmlChar *name)
331 {
332  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
333  // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
334  if (pd->depth != pd->statedepth)
335  {
336  pd->depth--;
337  return;
338  }
339  pd->depth--;
340  pd->statedepth--;
341  switch (pd->state)
342  {
343  case STATE_SIZE:
344  case STATE_M4SIZE:
345  pd->size = (off_t)strtoull(pd->content, 0, 10);
346  break;
347  case STATE_HASH:
348  case STATE_M4HASH:
349  pd->chksum.clear();
350  pd->chksum.resize(pd->chksuml, 0);
351  if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml))
352  {
353  pd->chksum.clear();
354  pd->chksuml = 0;
355  }
356  break;
357  case STATE_PHASH:
358  case STATE_M4PHASH:
359  if (strlen(pd->content) != size_t(pd->piecel) * 2)
360  break;
361  pd->piece.resize(pd->piecel * (pd->npiece + 1), 0);
362  if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel))
363  {
364  pd->piece.resize(pd->piecel * pd->npiece, 0);
365  break;
366  }
367  pd->npiece++;
368  break;
369  case STATE_PIECES:
370  case STATE_M4PIECES:
371  if (pd->piecel == 4)
372  {
373  pd->zsync = pd->piece;
374  pd->nzsync = pd->npiece;
375  }
376  else
377  {
378  pd->sha1 = pd->piece;
379  pd->nsha1 = pd->npiece;
380  }
381  pd->piecel = pd->npiece = 0;
382  pd->piece.clear();
383  break;
384  case STATE_URL:
385  case STATE_M4URL:
386  if (*pd->content)
387  {
388  pd->urls[pd->nurls].url = string(pd->content);
389  pd->nurls++;
390  }
391  break;
392  default:
393  break;
394  }
395  pd->state = pd->sbtab[pd->state];
396  pd->docontent = 0;
397 }
398 
399 static void XMLCALL
400 characterData(void *userData, const xmlChar *s, int len)
401 {
402  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
403  int l;
404  char *c;
405  if (!pd->docontent)
406  return;
407  l = pd->lcontent + len + 1;
408  if (l > pd->acontent)
409  {
410  pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256));
411  pd->acontent = l + 256;
412  }
413  c = pd->content + pd->lcontent;
414  pd->lcontent += len;
415  while (len-- > 0)
416  *c++ = *s++;
417  *c = 0;
418 }
419 
420 
421 MetaLinkParser::MetaLinkParser()
422  : pd( new ml_parsedata )
423 {}
424 
426 {
427  delete pd;
428 }
429 
430 void
432 {
433  parse(InputStream(filename));
434 }
435 
436 void
438 {
439  char buf[4096];
440  if (!is.stream())
441  ZYPP_THROW(Exception("MetaLinkParser: no such file"));
442  while (is.stream().good())
443  {
444  is.stream().read(buf, sizeof(buf));
445  parseBytes(buf, is.stream().gcount());
446  }
447  parseEnd();
448 }
449 
450 void
451 MetaLinkParser::parseBytes(const char *buf, size_t len)
452 {
453  if (!len)
454  return;
455 
456  if (xmlParseChunk(pd->parser, buf, len, 0)) {
457  ZYPP_THROW(Exception("Parse Error"));
458  }
459 }
460 
461 static bool urlcmp(const ml_url &a, const ml_url &b)
462 {
463  return a.priority < b.priority;
464 }
465 
466 void
468 {
469  if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
470  ZYPP_THROW(Exception("Parse Error"));
471  }
472  if (pd->nurls)
473  stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp);
474 }
475 
476 std::vector<Url>
478 {
479  std::vector<Url> urls;
480  int i;
481  for (i = 0; i < pd->nurls; ++i)
482  urls.push_back(Url(pd->urls[i].url));
483  return urls;
484 }
485 
488 {
489  size_t i;
490  MediaBlockList bl(pd->size);
491  if (pd->chksuml == 20)
492  bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]);
493  else if (pd->chksuml == 32)
494  bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]);
495  if (pd->size != off_t(-1) && pd->blksize)
496  {
497  size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
498  off_t off = 0;
499  size_t size = pd->blksize;
500  for (i = 0; i < nb; i++)
501  {
502  if (i == nb - 1)
503  {
504  size = pd->size % pd->blksize;
505  if (!size)
506  size = pd->blksize;
507  }
508  size_t blkno = bl.addBlock(off, size);
509  if (int(i) < pd->nsha1)
510  {
511  bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]);
512  if (int(i) < pd->nzsync)
513  {
514  unsigned char *p = &pd->zsync[4 * i];
515  bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
516  }
517  }
518  off += pd->blksize;
519  }
520  }
521  return bl;
522 }
523 
524  } // namespace media
525 } // namespace zypp
526 
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
vector< unsigned char > piece
struct stateswitch * swtab[NUMSTATES]
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:392
vector< unsigned char > sha1
static void XMLCALL endElement(void *userData, const xmlChar *name)
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
String related utilities and Regular expression matching.
Definition: Arch.h:344
Helper to create and pass std::istream.
Definition: InputStream.h:56
vector< struct ml_url > urls
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
vector< unsigned char > zsync
void parseEnd()
tells the parser that all chunks are now processed
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
struct ml_parsedata * pd
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
#define nullptr
Definition: Easy.h:55
static bool urlcmp(const ml_url &a, const ml_url &b)
MediaBlockList getBlockList()
return the block list from the parsed metalink data
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
static struct stateswitch stateswitches[]
enum state sbtab[NUMSTATES]
Base class for Exception.
Definition: Exception.h:145
std::vector< Url > getUrls()
return the download urls from the parsed metalink data
static const char * find_attr(const char *txt, const xmlChar **atts)
std::istream & stream() const
The std::istream.
Definition: InputStream.h:93
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:1
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
vector< unsigned char > chksum
#define c2h(c)
Url manipulation class.
Definition: Url.h:87
static int hexstr2bytes(unsigned char *buf, const char *str, int buflen)