libzypp  17.23.6
MetaLinkParser.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
14 #include <zypp/base/Logger.h>
15 
16 #include <sys/types.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include <vector>
22 #include <algorithm>
23 #include <iostream>
24 #include <fstream>
25 
26 #include <libxml2/libxml/SAX2.h>
27 
28 using namespace zypp::base;
29 
30 namespace zypp {
31  namespace media {
32 
33 enum state {
52 };
53 
54 struct stateswitch {
55  enum state from;
56  std::string ename;
57  enum state to;
58  int docontent;
59 };
60 
61 static struct stateswitch stateswitches[] = {
62  { STATE_START, "metalink", STATE_METALINK, 0 },
63  { STATE_METALINK, "files", STATE_FILES, 0 },
64  { STATE_METALINK, "file", STATE_M4FILE, 0 },
65  { STATE_FILES, "file", STATE_FILE, 0 },
66  { STATE_FILE, "size", STATE_SIZE, 1 },
67  { STATE_FILE, "verification", STATE_VERIFICATION, 0 },
68  { STATE_FILE, "resources", STATE_RESOURCES, 0 },
69  { STATE_VERIFICATION, "hash", STATE_HASH, 1 },
70  { STATE_VERIFICATION, "pieces", STATE_PIECES, 0 },
71  { STATE_PIECES, "hash", STATE_PHASH, 1 },
72  { STATE_RESOURCES, "url", STATE_URL, 1 },
73  { STATE_M4FILE, "size", STATE_M4SIZE, 1 },
74  { STATE_M4FILE, "hash", STATE_M4HASH, 1},
75  { STATE_M4FILE, "url", STATE_M4URL, 1},
76  { STATE_M4FILE, "pieces", STATE_M4PIECES, 0},
77  { STATE_M4PIECES, "hash", STATE_M4PHASH, 1 },
78  { NUMSTATES }
79 };
80 
81 struct ml_url {
83  : priority( 0 )
84  {}
85  int priority;
86  std::string url;
87 };
88 
89 static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
90 static void XMLCALL endElement(void *userData, const xmlChar *name);
91 static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
92 
95  : parser( nullptr )
96  , depth( 0 )
97  , state( STATE_START )
98  , statedepth( 0 )
99  , content( reinterpret_cast<char *>(malloc(256)) )
100  , lcontent( 0 )
101  , acontent( 256 )
102  , docontent( 0 )
103  , called( 0 )
104  , gotfile( 0 )
105  , size( -1 )
106  , nurls( 0 )
107  , blksize( 0 )
108  , npiece( 0 )
109  , piecel( 0 )
110  , nsha1( 0 )
111  , nzsync( 0 )
112  , chksuml( 0 )
113  {
114  struct stateswitch *sw;
115  int i;
116  memset( swtab, 0, sizeof(swtab) );
117  memset( sbtab, 0, sizeof(sbtab) );
118  for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
119  {
120  if (!swtab[sw->from])
121  swtab[sw->from] = sw;
122  sbtab[sw->to] = sw->from;
123  }
124 
125  xmlSAXHandler sax;
126  memset(&sax, 0, sizeof(sax));
127  sax.startElement = startElement;
128  sax.endElement = endElement;
129  sax.characters = characterData;
130 
131  //internally creates a copy of xmlSaxHandler, so having it as local variable is save
132  parser = xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL);
133  }
134 
136  {
137  if (parser) {
138  xmlFreeParserCtxt(parser);
139  parser = nullptr;
140  }
141  free(content);
142  }
143 
144  xmlParserCtxtPtr parser;
145  int depth;
146  enum state state;
148  char *content;
149  int lcontent;
150  int acontent;
152  struct stateswitch *swtab[NUMSTATES];
153  enum state sbtab[NUMSTATES];
154 
155  int called;
156  int gotfile;
157  off_t size;
158  std::vector<struct ml_url> urls;
159  int nurls;
160  size_t blksize;
161 
162  std::vector<unsigned char> piece;
163  int npiece;
164  int piecel;
165 
166  std::vector<unsigned char> sha1;
167  int nsha1;
168  std::vector<unsigned char> zsync;
169  int nzsync;
170 
171  std::vector<unsigned char> chksum;
172  int chksuml;
173 };
174 
175 static const char *
176 find_attr(const char *txt, const xmlChar **atts)
177 {
178  if(!atts) {
179  return 0;
180  }
181 
182  for (; *atts; atts += 2)
183  {
184  if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
185  return reinterpret_cast<const char*>(atts[1]);
186  }
187  return 0;
188 }
189 
190 static void XMLCALL
191 startElement(void *userData, const xmlChar *name, const xmlChar **atts)
192 {
193  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
194  struct stateswitch *sw;
195  if (pd->depth != pd->statedepth)
196  {
197  pd->depth++;
198  return;
199  }
200  pd->depth++;
201  if (!pd->swtab[pd->state])
202  return;
203  for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */
204  if (sw->ename == reinterpret_cast<const char *>(name))
205  break;
206  if (sw->from != pd->state)
207  return;
208  if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++)
209  return; /* ignore all but the first file */
210  //printf("start depth %d name %s\n", pd->depth, name);
211  pd->state = sw->to;
212  pd->docontent = sw->docontent;
213  pd->statedepth = pd->depth;
214  pd->lcontent = 0;
215  *pd->content = 0;
216  switch(pd->state)
217  {
218  case STATE_URL:
219  case STATE_M4URL:
220  {
221  const char *priority = find_attr("priority", atts);
222  const char *preference = find_attr("preference", atts);
223  int prio;
224  pd->urls.push_back(ml_url());
225  if (priority)
226  prio = atoi(priority);
227  else if (preference)
228  prio = 101 - atoi(preference);
229  else
230  prio = 999999;
231  pd->urls.back().priority = prio;
232  break;
233  }
234  case STATE_PIECES:
235  case STATE_M4PIECES:
236  {
237  const char *type = find_attr("type", atts);
238  const char *length = find_attr("length", atts);
239  size_t blksize;
240 
241  if (!type || !length)
242  {
243  pd->state = pd->sbtab[pd->state];
244  pd->statedepth--;
245  break;
246  }
247  blksize = strtoul(length, 0, 10);
248  if (!blksize || (pd->blksize && pd->blksize != blksize))
249  {
250  pd->state = pd->sbtab[pd->state];
251  pd->statedepth--;
252  break;
253  }
254  pd->blksize = blksize;
255  pd->npiece = 0;
256  pd->piece.clear();
257  if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
258  pd->piecel = 20;
259  else if (!strcmp(type, "zsync"))
260  pd->piecel = 4;
261  else
262  {
263  pd->state = pd->sbtab[pd->state];
264  pd->statedepth--;
265  break;
266  }
267  break;
268  }
269  case STATE_HASH:
270  case STATE_M4HASH:
271  {
272  const char *type = find_attr("type", atts);
273  if (!type)
274  type = "?";
275  if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
276  pd->chksuml = 20;
277  else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
278  pd->chksuml = 32;
279  else
280  {
281  pd->state = pd->sbtab[pd->state];
282  pd->statedepth--;
283  pd->docontent = 0;
284  }
285  break;
286  }
287  case STATE_PHASH:
288  case STATE_M4PHASH:
289  {
290  const char *piece = find_attr("piece", atts);
291  if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece))
292  {
293  pd->state = pd->sbtab[pd->state];
294  pd->statedepth--;
295  }
296  break;
297  }
298  default:
299  break;
300  }
301 }
302 
303 static int
304 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
305 {
306  int i;
307  for (i = 0; i < buflen; i++)
308  {
309 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \
310  : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \
311  : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \
312  : -1)
313  int v = c2h(*str);
314  str++;
315  if (v < 0)
316  return 0;
317  buf[i] = v;
318  v = c2h(*str);
319  str++;
320  if (v < 0)
321  return 0;
322  buf[i] = (buf[i] << 4) | v;
323 #undef c2h
324  }
325  return buflen;
326 }
327 
328 static void XMLCALL
329 endElement(void *userData, const xmlChar *name)
330 {
331  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
332  // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
333  if (pd->depth != pd->statedepth)
334  {
335  pd->depth--;
336  return;
337  }
338  pd->depth--;
339  pd->statedepth--;
340  switch (pd->state)
341  {
342  case STATE_SIZE:
343  case STATE_M4SIZE:
344  pd->size = (off_t)strtoull(pd->content, 0, 10);
345  break;
346  case STATE_HASH:
347  case STATE_M4HASH:
348  pd->chksum.clear();
349  pd->chksum.resize(pd->chksuml, 0);
350  if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml))
351  {
352  pd->chksum.clear();
353  pd->chksuml = 0;
354  }
355  break;
356  case STATE_PHASH:
357  case STATE_M4PHASH:
358  if (strlen(pd->content) != size_t(pd->piecel) * 2)
359  break;
360  pd->piece.resize(pd->piecel * (pd->npiece + 1), 0);
361  if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel))
362  {
363  pd->piece.resize(pd->piecel * pd->npiece, 0);
364  break;
365  }
366  pd->npiece++;
367  break;
368  case STATE_PIECES:
369  case STATE_M4PIECES:
370  if (pd->piecel == 4)
371  {
372  pd->zsync = pd->piece;
373  pd->nzsync = pd->npiece;
374  }
375  else
376  {
377  pd->sha1 = pd->piece;
378  pd->nsha1 = pd->npiece;
379  }
380  pd->piecel = pd->npiece = 0;
381  pd->piece.clear();
382  break;
383  case STATE_URL:
384  case STATE_M4URL:
385  if (*pd->content)
386  {
387  pd->urls[pd->nurls].url = std::string(pd->content);
388  pd->nurls++;
389  }
390  break;
391  default:
392  break;
393  }
394  pd->state = pd->sbtab[pd->state];
395  pd->docontent = 0;
396 }
397 
398 static void XMLCALL
399 characterData(void *userData, const xmlChar *s, int len)
400 {
401  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
402  int l;
403  char *c;
404  if (!pd->docontent)
405  return;
406  l = pd->lcontent + len + 1;
407  if (l > pd->acontent)
408  {
409  pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256));
410  pd->acontent = l + 256;
411  }
412  c = pd->content + pd->lcontent;
413  pd->lcontent += len;
414  while (len-- > 0)
415  *c++ = *s++;
416  *c = 0;
417 }
418 
419 
420 MetaLinkParser::MetaLinkParser()
421  : pd( new ml_parsedata )
422 {}
423 
425 {
426  delete pd;
427 }
428 
429 void
431 {
432  parse(InputStream(filename));
433 }
434 
435 void
437 {
438  char buf[4096];
439  if (!is.stream())
440  ZYPP_THROW(Exception("MetaLinkParser: no such file"));
441  while (is.stream().good())
442  {
443  is.stream().read(buf, sizeof(buf));
444  parseBytes(buf, is.stream().gcount());
445  }
446  parseEnd();
447 }
448 
449 void
450 MetaLinkParser::parseBytes(const char *buf, size_t len)
451 {
452  if (!len)
453  return;
454 
455  if (xmlParseChunk(pd->parser, buf, len, 0)) {
456  ZYPP_THROW(Exception("Parse Error"));
457  }
458 }
459 
460 static bool urlcmp(const ml_url &a, const ml_url &b)
461 {
462  return a.priority < b.priority;
463 }
464 
465 void
467 {
468  if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
469  ZYPP_THROW(Exception("Parse Error"));
470  }
471  if (pd->nurls)
472  stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp);
473 }
474 
475 std::vector<Url>
477 {
478  std::vector<Url> urls;
479  int i;
480  for (i = 0; i < pd->nurls; ++i)
481  urls.push_back(Url(pd->urls[i].url));
482  return urls;
483 }
484 
487 {
488  size_t i;
489  MediaBlockList bl(pd->size);
490  if (pd->chksuml == 20)
491  bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]);
492  else if (pd->chksuml == 32)
493  bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]);
494  if (pd->size != off_t(-1) && pd->blksize)
495  {
496  size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
497  off_t off = 0;
498  size_t size = pd->blksize;
499  for (i = 0; i < nb; i++)
500  {
501  if (i == nb - 1)
502  {
503  size = pd->size % pd->blksize;
504  if (!size)
505  size = pd->blksize;
506  }
507  size_t blkno = bl.addBlock(off, size);
508  if (int(i) < pd->nsha1)
509  {
510  bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]);
511  if (int(i) < pd->nzsync)
512  {
513  unsigned char *p = &pd->zsync[4 * i];
514  bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
515  }
516  }
517  off += pd->blksize;
518  }
519  }
520  return bl;
521 }
522 
523  } // namespace media
524 } // namespace zypp
525 
c2h
#define c2h(c)
zypp::media::stateswitch::from
enum state from
Definition: MetaLinkParser.cc:55
zypp::media::ml_parsedata::swtab
struct stateswitch * swtab[NUMSTATES]
Definition: MetaLinkParser.cc:152
zypp::media::hexstr2bytes
static int hexstr2bytes(unsigned char *buf, const char *str, int buflen)
Definition: MetaLinkParser.cc:304
zypp::media::ml_url::url
std::string url
Definition: MetaLinkParser.cc:86
zypp::media::STATE_M4HASH
Definition: MetaLinkParser.cc:43
zypp::media::STATE_M4PIECES
Definition: MetaLinkParser.cc:45
zypp::media::ml_parsedata::blksize
size_t blksize
Definition: MetaLinkParser.cc:160
zypp::media::STATE_PIECES
Definition: MetaLinkParser.cc:44
zypp::media::ml_url::ml_url
ml_url()
Definition: MetaLinkParser.cc:82
zypp::media::characterData
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
Definition: MetaLinkParser.cc:399
zypp::media::ml_url
Definition: MetaLinkParser.cc:81
zypp::media::ml_url::priority
int priority
Definition: MetaLinkParser.cc:85
zypp::media::STATE_METALINK
Definition: MetaLinkParser.cc:35
zypp::media::STATE_START
Definition: MetaLinkParser.cc:34
zypp::media::ml_parsedata::called
int called
Definition: MetaLinkParser.cc:155
zypp::Exception
Base class for Exception.
Definition: Exception.h:145
zypp::media::endElement
static void XMLCALL endElement(void *userData, const xmlChar *name)
Definition: MetaLinkParser.cc:329
zypp::media::ml_parsedata::zsync
std::vector< unsigned char > zsync
Definition: MetaLinkParser.cc:168
zypp::media::MediaBlockList
Definition: MediaBlockList.h:35
zypp::media::ml_parsedata::lcontent
int lcontent
Definition: MetaLinkParser.cc:149
zypp::media::ml_parsedata::statedepth
int statedepth
Definition: MetaLinkParser.cc:147
ZYPP_THROW
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:392
zypp::media::ml_parsedata::nsha1
int nsha1
Definition: MetaLinkParser.cc:167
zypp::media::ml_parsedata::acontent
int acontent
Definition: MetaLinkParser.cc:150
zypp::media::STATE_SIZE
Definition: MetaLinkParser.cc:39
zypp::media::ml_parsedata::parser
xmlParserCtxtPtr parser
Definition: MetaLinkParser.cc:144
zypp::media::MetaLinkParser::parseEnd
void parseEnd()
tells the parser that all chunks are now processed
Definition: MetaLinkParser.cc:466
nullptr
#define nullptr
Definition: Easy.h:55
zypp::media::STATE_VERIFICATION
Definition: MetaLinkParser.cc:41
zypp::media::ml_parsedata::sha1
std::vector< unsigned char > sha1
Definition: MetaLinkParser.cc:166
zypp::media::ml_parsedata::nurls
int nurls
Definition: MetaLinkParser.cc:159
zypp::media::MediaBlockList::setChecksum
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
Definition: MediaBlockList.cc:82
zypp::media::MetaLinkParser::getBlockList
MediaBlockList getBlockList()
return the block list from the parsed metalink data
Definition: MetaLinkParser.cc:486
zypp::media::ml_parsedata::chksum
std::vector< unsigned char > chksum
Definition: MetaLinkParser.cc:171
zypp::media::STATE_M4SIZE
Definition: MetaLinkParser.cc:40
zypp::media::stateswitch::docontent
int docontent
Definition: MetaLinkParser.cc:58
zypp::media::ml_parsedata::urls
std::vector< struct ml_url > urls
Definition: MetaLinkParser.cc:158
zypp::media::ml_parsedata::gotfile
int gotfile
Definition: MetaLinkParser.cc:156
zypp::media::state
state
Definition: MetaLinkParser.cc:33
zypp::media::MetaLinkParser::getUrls
std::vector< Url > getUrls()
return the download urls from the parsed metalink data
Definition: MetaLinkParser.cc:476
zypp::media::MediaBlockList::setRsum
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
Definition: MediaBlockList.cc:101
zypp::media::NUMSTATES
Definition: MetaLinkParser.cc:51
zypp::media::ml_parsedata::sbtab
enum state sbtab[NUMSTATES]
Definition: MetaLinkParser.cc:153
zypp::base::NonCopyable
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
Logger.h
zypp::media::MediaBlockList::setFileChecksum
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
Definition: MediaBlockList.cc:50
zypp::media::ml_parsedata::docontent
int docontent
Definition: MetaLinkParser.cc:151
zypp::media::urlcmp
static bool urlcmp(const ml_url &a, const ml_url &b)
Definition: MetaLinkParser.cc:460
zypp::InputStream
Helper to create and pass std::istream.
Definition: InputStream.h:56
zypp::media::STATE_URL
Definition: MetaLinkParser.cc:49
zypp
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:1
zypp::media::STATE_M4URL
Definition: MetaLinkParser.cc:50
zypp::media::ml_parsedata::nzsync
int nzsync
Definition: MetaLinkParser.cc:169
zypp::media::ml_parsedata::~ml_parsedata
~ml_parsedata()
Definition: MetaLinkParser.cc:135
zypp::InputStream::stream
std::istream & stream() const
The std::istream.
Definition: InputStream.h:93
zypp::media::stateswitches
static struct stateswitch stateswitches[]
Definition: MetaLinkParser.cc:61
zypp::media::MetaLinkParser::parse
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
Definition: MetaLinkParser.cc:430
zypp::media::stateswitch
Definition: MetaLinkParser.cc:54
zypp::media::STATE_RESOURCES
Definition: MetaLinkParser.cc:48
zypp::media::STATE_HASH
Definition: MetaLinkParser.cc:42
zypp::media::STATE_M4FILE
Definition: MetaLinkParser.cc:38
zypp::media::ml_parsedata::ml_parsedata
ml_parsedata()
Definition: MetaLinkParser.cc:94
zypp::media::ml_parsedata::chksuml
int chksuml
Definition: MetaLinkParser.cc:172
zypp::media::ml_parsedata::size
off_t size
Definition: MetaLinkParser.cc:157
zypp::media::find_attr
static const char * find_attr(const char *txt, const xmlChar **atts)
Definition: MetaLinkParser.cc:176
zypp::media::stateswitch::ename
std::string ename
Definition: MetaLinkParser.cc:56
zypp::media::ml_parsedata
Definition: MetaLinkParser.cc:93
zypp::media::startElement
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
Definition: MetaLinkParser.cc:191
zypp::media::ml_parsedata::state
enum state state
Definition: MetaLinkParser.cc:146
zypp::media::ml_parsedata::piecel
int piecel
Definition: MetaLinkParser.cc:164
zypp::media::MediaBlockList::addBlock
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
Definition: MediaBlockList.cc:42
MetaLinkParser.h
zypp::filesystem::Pathname
Pathname.
Definition: Pathname.h:44
zypp::media::stateswitch::to
enum state to
Definition: MetaLinkParser.cc:57
zypp::base
Definition: DrunkenBishop.cc:24
zypp::media::MetaLinkParser::pd
struct ml_parsedata * pd
Definition: MetaLinkParser.h:66
zypp::media::STATE_PHASH
Definition: MetaLinkParser.cc:46
zypp::media::MetaLinkParser::parseBytes
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
Definition: MetaLinkParser.cc:450
zypp::media::ml_parsedata::piece
std::vector< unsigned char > piece
Definition: MetaLinkParser.cc:162
zypp::media::STATE_FILES
Definition: MetaLinkParser.cc:36
str
String related utilities and Regular expression matching.
zypp::media::MetaLinkParser::~MetaLinkParser
~MetaLinkParser()
Definition: MetaLinkParser.cc:424
zypp::media::ml_parsedata::npiece
int npiece
Definition: MetaLinkParser.cc:163
zypp::media::STATE_FILE
Definition: MetaLinkParser.cc:37
zypp::Url
Url manipulation class.
Definition: Url.h:87
zypp::media::STATE_M4PHASH
Definition: MetaLinkParser.cc:47
zypp::media::ml_parsedata::depth
int depth
Definition: MetaLinkParser.cc:145
zypp::media::ml_parsedata::content
char * content
Definition: MetaLinkParser.cc:148