libzypp 17.31.23
metalinkparser.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
13#include "metalinkparser.h"
14#include <zypp-core/base/Logger.h>
15#include <zypp-core/ByteArray.h>
16#include <zypp-core/AutoDispose.h>
17
18#include <stack>
19#include <vector>
20#include <algorithm>
21
22#include <libxml2/libxml/SAX2.h>
23
24using namespace zypp::base;
25
26namespace zypp::env
27{
29 inline bool ZYPP_METALINK_DEBUG()
30 {
31 static bool val = [](){
32 const char * env = getenv("ZYPP_METALINK_DEBUG");
33 return( env && zypp::str::strToBool( env, true ) );
34 }();
35 return val;
36 }
37}
38
39namespace zypp::media {
59 };
60
61 struct transition {
62 std::string elementName; //< Name of the element for the transition to trigger
63 ParserState transitionTo; //< The state we go into when the element name in \a elementName is encountered
64 int docontent; //< Store the content of the element in the \a content member
65 };
66
72 const std::unordered_map<ParserState, std::vector<transition> > & transitions () {
73 static std::unordered_map<ParserState, std::vector<transition> > map {
74 { STATE_START, {
75 { "metalink", STATE_METALINK, 0},
76 }
77 },
79 { "files", STATE_FILES, 0 },
80 { "file", STATE_M4FILE, 0 },
81 }
82 },
83 { STATE_FILES, {
84 { "file", STATE_FILE, 0},
85 }
86 },
87 { STATE_FILE, {
88 { "size", STATE_SIZE, 1 },
89 { "verification", STATE_VERIFICATION, 0 },
90 { "resources", STATE_RESOURCES, 0 },
91 }
92 },
94 { "hash", STATE_HASH, 1 },
95 { "pieces", STATE_PIECES, 0 },
96 }
97 },
98 { STATE_PIECES, {
99 { "hash", STATE_PHASH, 1 },
100 }
101 },
102 { STATE_RESOURCES, {
103 { "url", STATE_URL, 1 },
104 }
105 },
106 { STATE_M4FILE, {
107 { "size", STATE_M4SIZE, 1 },
108 { "hash", STATE_M4HASH, 1},
109 { "url", STATE_M4URL, 1},
110 { "pieces", STATE_M4PIECES, 0},
111 }
112 },
113 { STATE_M4PIECES, {
114 { "hash", STATE_M4PHASH, 1 },
115 }
116 },
117 };
118
119 return map;
120 }
121
122static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
123static void XMLCALL endElement(void *userData, const xmlChar *name);
124static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
125
128 : parser( nullptr )
129 , state( STATE_START )
130 , depth( 0 )
131 , statedepth( 0 )
132 , docontent( 0 )
133 , gotfile( 0 )
134 , size( -1 )
135 , blksize( 0 )
136 , piecel( 0 )
137 , chksuml( 0 )
138 {
139 content.reserve( 256 );
140
141 xmlSAXHandler sax;
142 memset(&sax, 0, sizeof(sax));
143 sax.startElement = startElement;
144 sax.endElement = endElement;
145 sax.characters = characterData;
146
147 //internally creates a copy of xmlSaxHandler, so having it as local variable is save
148 parser = AutoDispose<xmlParserCtxtPtr>( xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL), xmlFreeParserCtxt );
149 }
150
151 void doTransition ( const transition &t ) {
152 parentStates.push( state );
156 content.clear();
157 }
158
159 void popState () {
160 state = parentStates.top();
161 statedepth--;
162 parentStates.pop();
163
164 }
165
167
168 ParserState state; //< current state as defined in \ref stateswitch
169 std::stack<ParserState> parentStates;
170
171 int depth; //< current element depth of traversing the document elements
172
179
180 std::string content; //< content of the current element
181 int docontent; //< should the content of the current elem be parsed
182
184 off_t size;
185 std::vector<MetalinkMirror> urls;
186 size_t blksize;
187
188 std::vector<UByteArray> piece;
190
191 std::vector<UByteArray> sha1;
192 std::vector<UByteArray> zsync;
193
196};
197
202static const char *
203find_attr(const char *txt, const xmlChar **atts)
204{
205 if(!atts) {
206 return nullptr;
207 }
208
209 for (; *atts; atts += 2)
210 {
211 if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
212 return reinterpret_cast<const char*>(atts[1]);
213 }
214 return nullptr;
215}
216
217static void XMLCALL
218startElement(void *userData, const xmlChar *name, const xmlChar **atts)
219{
220 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
221
222 // if the current element depth does not match the expected depth for the current state we
223 // ignore the element and just increase the depth
224 if (pd->depth != pd->statedepth) {
225 pd->depth++;
226 return;
227 }
228 pd->depth++;
229
230 const auto &trMap = transitions();
231 const auto currStateTrs = trMap.find( pd->state );
232 if ( currStateTrs == trMap.end() )
233 return;
234
235 // check if the current element name is part of our transitions
236 auto foundTr = std::find_if( currStateTrs->second.begin(), currStateTrs->second.end(), [name]( const auto &tr ){
237 return tr.elementName == reinterpret_cast<const char *>(name);
238 });
239
240 if ( foundTr == currStateTrs->second.end() ) {
241 // we found no possible transition, ignore
242 return;
243 }
244
245 if ( ( foundTr->transitionTo == STATE_FILE || foundTr->transitionTo == STATE_M4FILE ) && pd->gotfile++)
246 return; /* ignore all but the first file */
247
248 // advance the state machine and prepare variables for the new state
249 pd->doTransition( *foundTr );
250
251 switch(pd->state)
252 {
253 case STATE_URL:
254 case STATE_M4URL:
255 {
256 const char *priority = find_attr("priority", atts);
257 const char *preference = find_attr("preference", atts);
258 const char *maxconnections = find_attr("maxconnections", atts);
259 int prio;
260 auto &mirr = pd->urls.emplace_back();
261 if (priority)
262 prio = str::strtonum<int>(priority);
263 else if (preference)
264 prio = 101 - str::strtonum<int>(preference);
265 else
266 prio = 999999;
267 mirr.priority = prio;
268
269 if ( maxconnections )
270 mirr.maxConnections = str::strtonum<int>( maxconnections );
271
272 break;
273 }
274 case STATE_PIECES:
275 case STATE_M4PIECES:
276 {
277 const char *type = find_attr("type", atts);
278 const char *length = find_attr("length", atts);
279 size_t blksize;
280
281 if (!type || !length)
282 {
283 pd->popState();
284 break;
285 }
286 blksize = str::strtonum<unsigned long>(length);
287 if (!blksize || (pd->blksize && pd->blksize != blksize))
288 {
289 pd->popState();
290 break;
291 }
292 pd->blksize = blksize;
293 pd->piece.clear();
294 if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
295 pd->piecel = 20;
296 else if (!strcmp(type, "zsync"))
297 pd->piecel = 4;
298 else
299 {
300 pd->popState();
301 break;
302 }
303 break;
304 }
305 case STATE_HASH:
306 case STATE_M4HASH:
307 {
308 const char *type = find_attr("type", atts);
309 if (!type)
310 type = "?";
311 if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
312 pd->chksuml = 20;
313 else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
314 pd->chksuml = 32;
315 else
316 {
317 pd->popState();
318 pd->docontent = 0;
319 }
320 break;
321 }
322 case STATE_PHASH:
323 case STATE_M4PHASH:
324 {
325 const char *piece = find_attr("piece", atts);
326 if ( pd->state == STATE_PHASH && (!piece || str::strtonum<uint>(piece) != pd->piece.size()) )
327 {
328 pd->popState();
329 }
330 break;
331 }
332 default:
333 break;
334 }
335}
336
338{
339 return Digest::hexStringToUByteArray( str );
340}
341
342static void XMLCALL
343endElement(void *userData, const xmlChar *)
344{
345 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
346 //printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
347 if (pd->depth != pd->statedepth)
348 {
349 pd->depth--;
350 return;
351 }
352 switch (pd->state)
353 {
354 case STATE_SIZE:
355 case STATE_M4SIZE:
356 pd->size = (off_t)str::strtonum<off_t>(pd->content); //strtoull(pd->content, 0, 10);
357 break;
358 case STATE_HASH:
359 case STATE_M4HASH:
360 pd->chksum.clear();
361 pd->chksum = hexstr2bytes( pd->content );
362 if ( pd->content.length() != size_t(pd->chksuml) * 2 || !pd->chksum.size() )
363 {
364 pd->chksum.clear();
365 pd->chksuml = 0;
366 }
367 break;
368 case STATE_PHASH:
369 case STATE_M4PHASH: {
370 if ( pd->content.length() != size_t(pd->piecel) * 2 )
371 break;
372 UByteArray pieceHash = hexstr2bytes( pd->content );
373 if ( !pieceHash.size() )
374 pieceHash.resize( pd->piecel, 0 );
375 pd->piece.push_back( pieceHash );
376 break;
377 }
378 case STATE_PIECES:
379 case STATE_M4PIECES:
380 if (pd->piecel == 4)
381 pd->zsync = pd->piece;
382 else
383 pd->sha1 = pd->piece;
384
385 pd->piecel = 0;
386 pd->piece.clear();
387 break;
388 case STATE_URL:
389 case STATE_M4URL:
390 if ( pd->content.length() )
391 pd->urls.back().url = std::string(pd->content);
392 else
393 // without a actual URL the mirror is useless
394 pd->urls.pop_back();
395 break;
396 default:
397 break;
398 }
399
400 pd->depth--;
401 pd->popState();
402 pd->docontent = 0;
403}
404
405static void XMLCALL
406characterData(void *userData, const xmlChar *s, int len)
407{
408 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
409 if (!pd->docontent)
410 return;
411
412 if ( pd->content.length() + len + 1 > pd->content.capacity() )
413 pd->content.reserve( pd->content.capacity() + 256 );
414 pd->content.append( s, s+len );
415}
416
417
419 : pd( new ml_parsedata )
420{}
421
423{
424 delete pd;
425}
426
427void
429{
430 parse(InputStream(filename));
431}
432
433void
435{
436 char buf[4096];
437 if (!is.stream())
438 ZYPP_THROW(Exception("MetaLinkParser: no such file"));
439 while (is.stream().good())
440 {
441 is.stream().read(buf, sizeof(buf));
442 parseBytes(buf, is.stream().gcount());
443 }
444 parseEnd();
445 MIL << "Parsed " << pd->urls.size() << " mirrors from " << is.path() << std::endl;
446 if ( env::ZYPP_METALINK_DEBUG() ) {
447 for ( const auto &mirr : pd->urls )
448 DBG << "- " << mirr.priority << " " << mirr.url << std::endl;
449 }
450}
451
452void
453MetaLinkParser::parseBytes(const char *buf, size_t len)
454{
455 if (!len)
456 return;
457
458 if (xmlParseChunk(pd->parser, buf, len, 0)) {
459 ZYPP_THROW(Exception("Parse Error"));
460 }
461}
462
463void
465{
466 if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
467 ZYPP_THROW(Exception("Parse Error"));
468 }
469 if (pd->urls.size() ) {
470 stable_sort(pd->urls.begin(), pd->urls.end(), []( const auto &a, const auto &b ){
471 return a.priority < b.priority;
472 });
473 }
474}
475
476std::vector<Url>
478{
479 std::vector<Url> urls;
480 for ( const auto &mirr : pd->urls )
481 urls.push_back( mirr.url );
482 return urls;
483}
484
485const std::vector<MetalinkMirror> &MetaLinkParser::getMirrors() const
486{
487 return pd->urls;
488}
489
491{
493 if (pd->chksuml == 20)
494 bl.setFileChecksum("SHA1", pd->chksuml, pd->chksum.data() );
495 else if (pd->chksuml == 32)
496 bl.setFileChecksum("SHA256", pd->chksuml, pd->chksum.data());
497 if (pd->size != off_t(-1) && pd->blksize)
498 {
499 size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
500 off_t off = 0;
501 size_t size = pd->blksize;
502 for ( size_t i = 0; i < nb; i++ )
503 {
504 if (i == nb - 1)
505 {
506 size = pd->size % pd->blksize;
507 if (!size)
508 size = pd->blksize;
509 }
510 size_t blkno = bl.addBlock(off, size);
511 if ( i < pd->sha1.size())
512 {
513 bl.setChecksum(blkno, "SHA1", 20, pd->sha1[i].data());
514 if ( i < pd->zsync.size())
515 {
516 unsigned char *p = pd->zsync[i].data();
517 bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
518 }
519 }
520 off += pd->blksize;
521 }
522 }
523 return bl;
524}
525
526const std::vector<UByteArray> &MetaLinkParser::getZsyncBlockHashes() const
527{
528 return pd->zsync;
529}
530
531const std::vector<UByteArray> &MetaLinkParser::getSHA1BlockHashes() const
532{
533 return pd->sha1;
534}
535
536} // namespace zypp::media
Reference counted access to a Tp object calling a custom Dispose function when the last AutoDispose h...
Definition: AutoDispose.h:94
Base class for Exception.
Definition: Exception.h:146
Helper to create and pass std::istream.
Definition: inputstream.h:57
std::istream & stream() const
The std::istream.
Definition: inputstream.h:93
const Pathname & path() const
Path to the input file or empty if no file.
Definition: inputstream.h:111
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
void parseEnd()
tells the parser that all chunks are now processed
struct ml_parsedata * pd
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
MediaBlockList getBlockList() const
return the block list from the parsed metalink data
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
const std::vector< UByteArray > & getSHA1BlockHashes() const
const std::vector< UByteArray > & getZsyncBlockHashes() const
const std::vector< MetalinkMirror > & getMirrors() const
return the mirrors from the parsed metalink data
std::vector< Url > getUrls() const
return the download urls from the parsed metalink data
unsigned short a
unsigned short b
String related utilities and Regular expression matching.
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
Definition: Env.h:23
bool ZYPP_METALINK_DEBUG()
Hack to circumvent the currently poor –root support.
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
static const char * find_attr(const char *txt, const xmlChar **atts)
Look up a xml attribute in the passed array atts.
static void XMLCALL endElement(void *userData, const xmlChar *name)
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
UByteArray hexstr2bytes(std::string str)
const std::unordered_map< ParserState, std::vector< transition > > & transitions()
bool strToBool(const C_Str &str, bool default_r)
Parse str into a bool depending on the default value.
Definition: String.h:429
std::vector< UByteArray > zsync
AutoDispose< xmlParserCtxtPtr > parser
void doTransition(const transition &t)
std::vector< MetalinkMirror > urls
std::vector< UByteArray > piece
std::stack< ParserState > parentStates
std::vector< UByteArray > sha1
#define nullptr
Definition: Easy.h:55
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:428
#define DBG
Definition: Logger.h:95
#define MIL
Definition: Logger.h:96