libzypp  13.10.6
MetaLinkParser.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
14 #include "zypp/base/Logger.h"
15 
16 #include <sys/types.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <expat.h>
21 
22 #include <vector>
23 #include <algorithm>
24 #include <iostream>
25 #include <fstream>
26 
27 using namespace std;
28 using namespace zypp::base;
29 
30 namespace zypp {
31  namespace media {
32 
33 enum state {
52 };
53 
54 struct stateswitch {
55  enum state from;
56  string ename;
57  enum state to;
58  int docontent;
59 };
60 
61 static struct stateswitch stateswitches[] = {
62  { STATE_START, "metalink", STATE_METALINK, 0 },
63  { STATE_METALINK, "files", STATE_FILES, 0 },
64  { STATE_METALINK, "file", STATE_M4FILE, 0 },
65  { STATE_FILES, "file", STATE_FILE, 0 },
66  { STATE_FILE, "size", STATE_SIZE, 1 },
67  { STATE_FILE, "verification", STATE_VERIFICATION, 0 },
68  { STATE_FILE, "resources", STATE_RESOURCES, 0 },
69  { STATE_VERIFICATION, "hash", STATE_HASH, 1 },
70  { STATE_VERIFICATION, "pieces", STATE_PIECES, 0 },
71  { STATE_PIECES, "hash", STATE_PHASH, 1 },
72  { STATE_RESOURCES, "url", STATE_URL, 1 },
73  { STATE_M4FILE, "size", STATE_M4SIZE, 1 },
74  { STATE_M4FILE, "hash", STATE_M4HASH, 1},
75  { STATE_M4FILE, "url", STATE_M4URL, 1},
76  { STATE_M4FILE, "pieces", STATE_M4PIECES, 0},
77  { STATE_M4PIECES, "hash", STATE_M4PHASH, 1 },
78  { NUMSTATES }
79 };
80 
81 struct ml_url {
83  : priority( 0 )
84  {}
85  int priority;
86  string url;
87 };
88 
89 static void XMLCALL startElement(void *userData, const char *name, const char **atts);
90 static void XMLCALL endElement(void *userData, const char *name);
91 static void XMLCALL characterData(void *userData, const XML_Char *s, int len);
92 
95  : parser( XML_ParserCreate(NULL) )
96  , depth( 0 )
97  , state( STATE_START )
98  , statedepth( 0 )
99  , content( reinterpret_cast<char *>(malloc(256)) )
100  , lcontent( 0 )
101  , acontent( 256 )
102  , docontent( 0 )
103  , called( 0 )
104  , gotfile( 0 )
105  , size( -1 )
106  , nurls( 0 )
107  , blksize( 0 )
108  , npiece( 0 )
109  , piecel( 0 )
110  , nsha1( 0 )
111  , nzsync( 0 )
112  , chksuml( 0 )
113  {
114  struct stateswitch *sw;
115  int i;
116  memset( swtab, 0, sizeof(swtab) );
117  memset( sbtab, 0, sizeof(sbtab) );
118  for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
119  {
120  if (!swtab[sw->from])
121  swtab[sw->from] = sw;
122  sbtab[sw->to] = sw->from;
123  }
124  XML_SetUserData(parser, this);
125  XML_SetElementHandler(parser, startElement, endElement);
126  XML_SetCharacterDataHandler(parser, characterData);
127  }
128 
130  {
131  XML_ParserFree(parser);
132  free(content);
133  }
134 
135  XML_Parser parser;
136  int depth;
137  enum state state;
139  char *content;
140  int lcontent;
141  int acontent;
143  struct stateswitch *swtab[NUMSTATES];
144  enum state sbtab[NUMSTATES];
145 
146  int called;
147  int gotfile;
148  off_t size;
149  vector<struct ml_url> urls;
150  int nurls;
151  size_t blksize;
152 
153  vector<unsigned char> piece;
154  int npiece;
155  int piecel;
156 
157  vector<unsigned char> sha1;
158  int nsha1;
159  vector<unsigned char> zsync;
160  int nzsync;
161 
162  vector<unsigned char> chksum;
163  int chksuml;
164 };
165 
166 static const char *
167 find_attr(const char *txt, const char **atts)
168 {
169  for (; *atts; atts += 2)
170  {
171  if (!strcmp(*atts, txt))
172  return atts[1];
173  }
174  return 0;
175 }
176 
177 static void XMLCALL
178 startElement(void *userData, const char *name, const char **atts)
179 {
180  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
181  struct stateswitch *sw;
182  if (pd->depth != pd->statedepth)
183  {
184  pd->depth++;
185  return;
186  }
187  pd->depth++;
188  if (!pd->swtab[pd->state])
189  return;
190  for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */
191  if (sw->ename == name)
192  break;
193  if (sw->from != pd->state)
194  return;
195  if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++)
196  return; /* ignore all but the first file */
197  //printf("start depth %d name %s\n", pd->depth, name);
198  pd->state = sw->to;
199  pd->docontent = sw->docontent;
200  pd->statedepth = pd->depth;
201  pd->lcontent = 0;
202  *pd->content = 0;
203  switch(pd->state)
204  {
205  case STATE_URL:
206  case STATE_M4URL:
207  {
208  const char *priority = find_attr("priority", atts);
209  const char *preference = find_attr("preference", atts);
210  int prio;
211  pd->urls.push_back(ml_url());
212  if (priority)
213  prio = atoi(priority);
214  else if (preference)
215  prio = 101 - atoi(preference);
216  else
217  prio = 999999;
218  pd->urls.back().priority = prio;
219  break;
220  }
221  case STATE_PIECES:
222  case STATE_M4PIECES:
223  {
224  const char *type = find_attr("type", atts);
225  const char *length = find_attr("length", atts);
226  size_t blksize;
227 
228  if (!type || !length)
229  {
230  pd->state = pd->sbtab[pd->state];
231  pd->statedepth--;
232  break;
233  }
234  blksize = strtoul(length, 0, 10);
235  if (!blksize || (pd->blksize && pd->blksize != blksize))
236  {
237  pd->state = pd->sbtab[pd->state];
238  pd->statedepth--;
239  break;
240  }
241  pd->blksize = blksize;
242  pd->npiece = 0;
243  pd->piece.clear();
244  if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
245  pd->piecel = 20;
246  else if (!strcmp(type, "zsync"))
247  pd->piecel = 4;
248  else
249  {
250  pd->state = pd->sbtab[pd->state];
251  pd->statedepth--;
252  break;
253  }
254  break;
255  }
256  case STATE_HASH:
257  case STATE_M4HASH:
258  {
259  const char *type = find_attr("type", atts);
260  if (!type)
261  type = "?";
262  if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
263  pd->chksuml = 20;
264  else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
265  pd->chksuml = 32;
266  else
267  {
268  pd->state = pd->sbtab[pd->state];
269  pd->statedepth--;
270  pd->docontent = 0;
271  }
272  break;
273  }
274  case STATE_PHASH:
275  case STATE_M4PHASH:
276  {
277  const char *piece = find_attr("piece", atts);
278  if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece))
279  {
280  pd->state = pd->sbtab[pd->state];
281  pd->statedepth--;
282  }
283  break;
284  }
285  default:
286  break;
287  }
288 }
289 
290 static int
291 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
292 {
293  int i;
294  for (i = 0; i < buflen; i++)
295  {
296 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \
297  : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \
298  : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \
299  : -1)
300  int v = c2h(*str);
301  str++;
302  if (v < 0)
303  return 0;
304  buf[i] = v;
305  v = c2h(*str);
306  str++;
307  if (v < 0)
308  return 0;
309  buf[i] = (buf[i] << 4) | v;
310 #undef c2h
311  }
312  return buflen;
313 }
314 
315 static void XMLCALL
316 endElement(void *userData, const char *name)
317 {
318  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
319  // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
320  if (pd->depth != pd->statedepth)
321  {
322  pd->depth--;
323  return;
324  }
325  pd->depth--;
326  pd->statedepth--;
327  switch (pd->state)
328  {
329  case STATE_SIZE:
330  case STATE_M4SIZE:
331  pd->size = (off_t)strtoull(pd->content, 0, 10);
332  break;
333  case STATE_HASH:
334  case STATE_M4HASH:
335  pd->chksum.clear();
336  pd->chksum.resize(pd->chksuml, 0);
337  if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml))
338  {
339  pd->chksum.clear();
340  pd->chksuml = 0;
341  }
342  break;
343  case STATE_PHASH:
344  case STATE_M4PHASH:
345  if (strlen(pd->content) != size_t(pd->piecel) * 2)
346  break;
347  pd->piece.resize(pd->piecel * (pd->npiece + 1), 0);
348  if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel))
349  {
350  pd->piece.resize(pd->piecel * pd->npiece, 0);
351  break;
352  }
353  pd->npiece++;
354  break;
355  case STATE_PIECES:
356  case STATE_M4PIECES:
357  if (pd->piecel == 4)
358  {
359  pd->zsync = pd->piece;
360  pd->nzsync = pd->npiece;
361  }
362  else
363  {
364  pd->sha1 = pd->piece;
365  pd->nsha1 = pd->npiece;
366  }
367  pd->piecel = pd->npiece = 0;
368  pd->piece.clear();
369  break;
370  case STATE_URL:
371  case STATE_M4URL:
372  if (*pd->content)
373  {
374  pd->urls[pd->nurls].url = string(pd->content);
375  pd->nurls++;
376  }
377  break;
378  default:
379  break;
380  }
381  pd->state = pd->sbtab[pd->state];
382  pd->docontent = 0;
383 }
384 
385 static void XMLCALL
386 characterData(void *userData, const XML_Char *s, int len)
387 {
388  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
389  int l;
390  char *c;
391  if (!pd->docontent)
392  return;
393  l = pd->lcontent + len + 1;
394  if (l > pd->acontent)
395  {
396  pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256));
397  pd->acontent = l + 256;
398  }
399  c = pd->content + pd->lcontent;
400  pd->lcontent += len;
401  while (len-- > 0)
402  *c++ = *s++;
403  *c = 0;
404 }
405 
406 
407 MetaLinkParser::MetaLinkParser()
408  : pd( new ml_parsedata )
409 {}
410 
412 {
413  delete pd;
414 }
415 
416 void
417 MetaLinkParser::parse(const Pathname &filename)
418 {
419  parse(InputStream(filename));
420 }
421 
422 void
424 {
425  char buf[4096];
426  if (!is.stream())
427  ZYPP_THROW(Exception("MetaLinkParser: no such file"));
428  while (is.stream().good())
429  {
430  is.stream().read(buf, sizeof(buf));
431  parseBytes(buf, is.stream().gcount());
432  }
433  parseEnd();
434 }
435 
436 void
437 MetaLinkParser::parseBytes(const char *buf, size_t len)
438 {
439  if (!len)
440  return;
441  if (XML_Parse(pd->parser, buf, len, 0) == XML_STATUS_ERROR)
442  ZYPP_THROW(Exception("Parse Error"));
443 }
444 
445 static bool urlcmp(const ml_url &a, const ml_url &b)
446 {
447  return a.priority < b.priority;
448 }
449 
450 void
452 {
453  if (XML_Parse(pd->parser, 0, 0, 1) == XML_STATUS_ERROR)
454  ZYPP_THROW(Exception("Parse Error"));
455  if (pd->nurls)
456  stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp);
457 }
458 
459 std::vector<Url>
461 {
462  std::vector<Url> urls;
463  int i;
464  for (i = 0; i < pd->nurls; ++i)
465  urls.push_back(Url(pd->urls[i].url));
466  return urls;
467 }
468 
471 {
472  size_t i;
473  MediaBlockList bl(pd->size);
474  if (pd->chksuml == 20)
475  bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]);
476  else if (pd->chksuml == 32)
477  bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]);
478  if (pd->size != off_t(-1) && pd->blksize)
479  {
480  size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
481  off_t off = 0;
482  size_t size = pd->blksize;
483  for (i = 0; i < nb; i++)
484  {
485  if (i == nb - 1)
486  {
487  size = pd->size % pd->blksize;
488  if (!size)
489  size = pd->blksize;
490  }
491  size_t blkno = bl.addBlock(off, size);
492  if (int(i) < pd->nsha1)
493  {
494  bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]);
495  if (int(i) < pd->nzsync)
496  {
497  unsigned char *p = &pd->zsync[4 * i];
498  bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
499  }
500  }
501  off += pd->blksize;
502  }
503  }
504  return bl;
505 }
506 
507  } // namespace media
508 } // namespace zypp
509 
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
vector< unsigned char > piece
struct stateswitch * swtab[NUMSTATES]
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:320
static void XMLCALL characterData(void *userData, const XML_Char *s, int len)
vector< unsigned char > sha1
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
Helper to create and pass std::istream.
Definition: InputStream.h:56
vector< struct ml_url > urls
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
static const char * find_attr(const char *txt, const char **atts)
vector< unsigned char > zsync
void parseEnd()
tells the parser that all chunks are now processed
static void XMLCALL startElement(void *userData, const char *name, const char **atts)
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
static void XMLCALL endElement(void *userData, const char *name)
struct ml_parsedata * pd
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
static bool urlcmp(const ml_url &a, const ml_url &b)
MediaBlockList getBlockList()
return the block list from the parsed metalink data
std::istream & stream() const
The std::istream.
Definition: InputStream.h:93
static struct stateswitch stateswitches[]
enum state sbtab[NUMSTATES]
Base class for Exception.
Definition: Exception.h:143
std::vector< Url > getUrls()
return the download urls from the parsed metalink data
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
vector< unsigned char > chksum
#define c2h(c)
Url manipulation class.
Definition: Url.h:87
static int hexstr2bytes(unsigned char *buf, const char *str, int buflen)