libzypp 17.31.7
metalinkparser.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
13#include "metalinkparser.h"
14#include <zypp-core/base/Logger.h>
15#include <zypp-core/ByteArray.h>
16#include <zypp-core/AutoDispose.h>
17
18#include <stack>
19#include <vector>
20#include <algorithm>
21
22#include <libxml2/libxml/SAX2.h>
23
24using namespace zypp::base;
25
26namespace zypp::media {
46 };
47
48 struct transition {
49 std::string elementName; //< Name of the element for the transition to trigger
50 ParserState transitionTo; //< The state we go into when the element name in \a elementName is encountered
51 int docontent; //< Store the content of the element in the \a content member
52 };
53
59 const std::unordered_map<ParserState, std::vector<transition> > & transitions () {
60 static std::unordered_map<ParserState, std::vector<transition> > map {
61 { STATE_START, {
62 { "metalink", STATE_METALINK, 0},
63 }
64 },
66 { "files", STATE_FILES, 0 },
67 { "file", STATE_M4FILE, 0 },
68 }
69 },
70 { STATE_FILES, {
71 { "file", STATE_FILE, 0},
72 }
73 },
74 { STATE_FILE, {
75 { "size", STATE_SIZE, 1 },
76 { "verification", STATE_VERIFICATION, 0 },
77 { "resources", STATE_RESOURCES, 0 },
78 }
79 },
81 { "hash", STATE_HASH, 1 },
82 { "pieces", STATE_PIECES, 0 },
83 }
84 },
85 { STATE_PIECES, {
86 { "hash", STATE_PHASH, 1 },
87 }
88 },
90 { "url", STATE_URL, 1 },
91 }
92 },
93 { STATE_M4FILE, {
94 { "size", STATE_M4SIZE, 1 },
95 { "hash", STATE_M4HASH, 1},
96 { "url", STATE_M4URL, 1},
97 { "pieces", STATE_M4PIECES, 0},
98 }
99 },
100 { STATE_M4PIECES, {
101 { "hash", STATE_M4PHASH, 1 },
102 }
103 },
104 };
105
106 return map;
107 }
108
109static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
110static void XMLCALL endElement(void *userData, const xmlChar *name);
111static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
112
115 : parser( nullptr )
116 , state( STATE_START )
117 , depth( 0 )
118 , statedepth( 0 )
119 , docontent( 0 )
120 , gotfile( 0 )
121 , size( -1 )
122 , blksize( 0 )
123 , piecel( 0 )
124 , chksuml( 0 )
125 {
126 content.reserve( 256 );
127
128 xmlSAXHandler sax;
129 memset(&sax, 0, sizeof(sax));
130 sax.startElement = startElement;
131 sax.endElement = endElement;
132 sax.characters = characterData;
133
134 //internally creates a copy of xmlSaxHandler, so having it as local variable is save
135 parser = AutoDispose<xmlParserCtxtPtr>( xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL), xmlFreeParserCtxt );
136 }
137
138 void doTransition ( const transition &t ) {
139 parentStates.push( state );
143 content.clear();
144 }
145
146 void popState () {
147 state = parentStates.top();
148 statedepth--;
149 parentStates.pop();
150
151 }
152
154
155 ParserState state; //< current state as defined in \ref stateswitch
156 std::stack<ParserState> parentStates;
157
158 int depth; //< current element depth of traversing the document elements
159
166
167 std::string content; //< content of the current element
168 int docontent; //< should the content of the current elem be parsed
169
171 off_t size;
172 std::vector<MetalinkMirror> urls;
173 size_t blksize;
174
175 std::vector<UByteArray> piece;
177
178 std::vector<UByteArray> sha1;
179 std::vector<UByteArray> zsync;
180
183};
184
189static const char *
190find_attr(const char *txt, const xmlChar **atts)
191{
192 if(!atts) {
193 return nullptr;
194 }
195
196 for (; *atts; atts += 2)
197 {
198 if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
199 return reinterpret_cast<const char*>(atts[1]);
200 }
201 return nullptr;
202}
203
204static void XMLCALL
205startElement(void *userData, const xmlChar *name, const xmlChar **atts)
206{
207 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
208
209 // if the current element depth does not match the expected depth for the current state we
210 // ignore the element and just increase the depth
211 if (pd->depth != pd->statedepth) {
212 pd->depth++;
213 return;
214 }
215 pd->depth++;
216
217 const auto &trMap = transitions();
218 const auto currStateTrs = trMap.find( pd->state );
219 if ( currStateTrs == trMap.end() )
220 return;
221
222 // check if the current element name is part of our transitions
223 auto foundTr = std::find_if( currStateTrs->second.begin(), currStateTrs->second.end(), [name]( const auto &tr ){
224 return tr.elementName == reinterpret_cast<const char *>(name);
225 });
226
227 if ( foundTr == currStateTrs->second.end() ) {
228 // we found no possible transition, ignore
229 return;
230 }
231
232 if ( ( foundTr->transitionTo == STATE_FILE || foundTr->transitionTo == STATE_M4FILE ) && pd->gotfile++)
233 return; /* ignore all but the first file */
234
235 // advance the state machine and prepare variables for the new state
236 pd->doTransition( *foundTr );
237
238 switch(pd->state)
239 {
240 case STATE_URL:
241 case STATE_M4URL:
242 {
243 const char *priority = find_attr("priority", atts);
244 const char *preference = find_attr("preference", atts);
245 const char *maxconnections = find_attr("maxconnections", atts);
246 int prio;
247 auto &mirr = pd->urls.emplace_back();
248 if (priority)
249 prio = str::strtonum<int>(priority);
250 else if (preference)
251 prio = 101 - str::strtonum<int>(preference);
252 else
253 prio = 999999;
254 mirr.priority = prio;
255
256 if ( maxconnections )
257 mirr.maxConnections = str::strtonum<int>( maxconnections );
258
259 break;
260 }
261 case STATE_PIECES:
262 case STATE_M4PIECES:
263 {
264 const char *type = find_attr("type", atts);
265 const char *length = find_attr("length", atts);
266 size_t blksize;
267
268 if (!type || !length)
269 {
270 pd->popState();
271 break;
272 }
273 blksize = str::strtonum<unsigned long>(length);
274 if (!blksize || (pd->blksize && pd->blksize != blksize))
275 {
276 pd->popState();
277 break;
278 }
279 pd->blksize = blksize;
280 pd->piece.clear();
281 if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
282 pd->piecel = 20;
283 else if (!strcmp(type, "zsync"))
284 pd->piecel = 4;
285 else
286 {
287 pd->popState();
288 break;
289 }
290 break;
291 }
292 case STATE_HASH:
293 case STATE_M4HASH:
294 {
295 const char *type = find_attr("type", atts);
296 if (!type)
297 type = "?";
298 if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
299 pd->chksuml = 20;
300 else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
301 pd->chksuml = 32;
302 else
303 {
304 pd->popState();
305 pd->docontent = 0;
306 }
307 break;
308 }
309 case STATE_PHASH:
310 case STATE_M4PHASH:
311 {
312 const char *piece = find_attr("piece", atts);
313 if ( pd->state == STATE_PHASH && (!piece || str::strtonum<uint>(piece) != pd->piece.size()) )
314 {
315 pd->popState();
316 }
317 break;
318 }
319 default:
320 break;
321 }
322}
323
325{
326 return Digest::hexStringToUByteArray( str );
327}
328
329static void XMLCALL
330endElement(void *userData, const xmlChar *)
331{
332 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
333 //printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
334 if (pd->depth != pd->statedepth)
335 {
336 pd->depth--;
337 return;
338 }
339 switch (pd->state)
340 {
341 case STATE_SIZE:
342 case STATE_M4SIZE:
343 pd->size = (off_t)str::strtonum<off_t>(pd->content); //strtoull(pd->content, 0, 10);
344 break;
345 case STATE_HASH:
346 case STATE_M4HASH:
347 pd->chksum.clear();
348 pd->chksum = hexstr2bytes( pd->content );
349 if ( pd->content.length() != size_t(pd->chksuml) * 2 || !pd->chksum.size() )
350 {
351 pd->chksum.clear();
352 pd->chksuml = 0;
353 }
354 break;
355 case STATE_PHASH:
356 case STATE_M4PHASH: {
357 if ( pd->content.length() != size_t(pd->piecel) * 2 )
358 break;
359 UByteArray pieceHash = hexstr2bytes( pd->content );
360 if ( !pieceHash.size() )
361 pieceHash.resize( pd->piecel, 0 );
362 pd->piece.push_back( pieceHash );
363 break;
364 }
365 case STATE_PIECES:
366 case STATE_M4PIECES:
367 if (pd->piecel == 4)
368 pd->zsync = pd->piece;
369 else
370 pd->sha1 = pd->piece;
371
372 pd->piecel = 0;
373 pd->piece.clear();
374 break;
375 case STATE_URL:
376 case STATE_M4URL:
377 if ( pd->content.length() )
378 pd->urls.back().url = std::string(pd->content);
379 else
380 // without a actual URL the mirror is useless
381 pd->urls.pop_back();
382 break;
383 default:
384 break;
385 }
386
387 pd->depth--;
388 pd->popState();
389 pd->docontent = 0;
390}
391
392static void XMLCALL
393characterData(void *userData, const xmlChar *s, int len)
394{
395 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
396 if (!pd->docontent)
397 return;
398
399 if ( pd->content.length() + len + 1 > pd->content.capacity() )
400 pd->content.reserve( pd->content.capacity() + 256 );
401 pd->content.append( s, s+len );
402}
403
404
406 : pd( new ml_parsedata )
407{}
408
410{
411 delete pd;
412}
413
414void
416{
417 MIL << "Begin parse " << filename << std::endl;
418 parse(InputStream(filename));
419 MIL << "End parse " << filename << std::endl;
420}
421
422void
424{
425 char buf[4096];
426 if (!is.stream())
427 ZYPP_THROW(Exception("MetaLinkParser: no such file"));
428 while (is.stream().good())
429 {
430 is.stream().read(buf, sizeof(buf));
431 parseBytes(buf, is.stream().gcount());
432 }
433 parseEnd();
434}
435
436void
437MetaLinkParser::parseBytes(const char *buf, size_t len)
438{
439 if (!len)
440 return;
441
442 if (xmlParseChunk(pd->parser, buf, len, 0)) {
443 ZYPP_THROW(Exception("Parse Error"));
444 }
445}
446
447void
449{
450 if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
451 ZYPP_THROW(Exception("Parse Error"));
452 }
453 if (pd->urls.size() ) {
454 stable_sort(pd->urls.begin(), pd->urls.end(), []( const auto &a, const auto &b ){
455 return a.priority < b.priority;
456 });
457 }
458}
459
460std::vector<Url>
462{
463 std::vector<Url> urls;
464 for ( const auto &mirr : pd->urls )
465 urls.push_back( mirr.url );
466 return urls;
467}
468
469const std::vector<MetalinkMirror> &MetaLinkParser::getMirrors() const
470{
471 return pd->urls;
472}
473
475{
477 if (pd->chksuml == 20)
478 bl.setFileChecksum("SHA1", pd->chksuml, pd->chksum.data() );
479 else if (pd->chksuml == 32)
480 bl.setFileChecksum("SHA256", pd->chksuml, pd->chksum.data());
481 if (pd->size != off_t(-1) && pd->blksize)
482 {
483 size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
484 off_t off = 0;
485 size_t size = pd->blksize;
486 for ( size_t i = 0; i < nb; i++ )
487 {
488 if (i == nb - 1)
489 {
490 size = pd->size % pd->blksize;
491 if (!size)
492 size = pd->blksize;
493 }
494 size_t blkno = bl.addBlock(off, size);
495 if ( i < pd->sha1.size())
496 {
497 bl.setChecksum(blkno, "SHA1", 20, pd->sha1[i].data());
498 if ( i < pd->zsync.size())
499 {
500 unsigned char *p = pd->zsync[i].data();
501 bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
502 }
503 }
504 off += pd->blksize;
505 }
506 }
507 return bl;
508}
509
510const std::vector<UByteArray> &MetaLinkParser::getZsyncBlockHashes() const
511{
512 return pd->zsync;
513}
514
515const std::vector<UByteArray> &MetaLinkParser::getSHA1BlockHashes() const
516{
517 return pd->sha1;
518}
519
520} // namespace zypp::media
Base class for Exception.
Definition: Exception.h:146
Helper to create and pass std::istream.
Definition: inputstream.h:57
std::istream & stream() const
The std::istream.
Definition: inputstream.h:93
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
void parseEnd()
tells the parser that all chunks are now processed
struct ml_parsedata * pd
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
MediaBlockList getBlockList() const
return the block list from the parsed metalink data
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
const std::vector< UByteArray > & getSHA1BlockHashes() const
const std::vector< UByteArray > & getZsyncBlockHashes() const
const std::vector< MetalinkMirror > & getMirrors() const
return the mirrors from the parsed metalink data
std::vector< Url > getUrls() const
return the download urls from the parsed metalink data
unsigned short a
unsigned short b
String related utilities and Regular expression matching.
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
static const char * find_attr(const char *txt, const xmlChar **atts)
Look up a xml attribute in the passed array atts.
static void XMLCALL endElement(void *userData, const xmlChar *name)
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
UByteArray hexstr2bytes(std::string str)
const std::unordered_map< ParserState, std::vector< transition > > & transitions()
std::vector< UByteArray > zsync
AutoDispose< xmlParserCtxtPtr > parser
void doTransition(const transition &t)
std::vector< MetalinkMirror > urls
std::vector< UByteArray > piece
std::stack< ParserState > parentStates
std::vector< UByteArray > sha1
#define nullptr
Definition: Easy.h:55
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:428
#define MIL
Definition: Logger.h:96