libzypp
10.5.0
|
00001 /*---------------------------------------------------------------------\ 00002 | ____ _ __ __ ___ | 00003 | |__ / \ / / . \ . \ | 00004 | / / \ V /| _/ _/ | 00005 | / /__ | | | | | | | 00006 | /_____||_| |_| |_| | 00007 | | 00008 \---------------------------------------------------------------------*/ 00013 #include "zypp/media/MetaLinkParser.h" 00014 #include "zypp/base/Logger.h" 00015 00016 #include <sys/types.h> 00017 #include <stdio.h> 00018 #include <stdlib.h> 00019 #include <string.h> 00020 #include <expat.h> 00021 00022 #include <vector> 00023 #include <algorithm> 00024 #include <iostream> 00025 #include <fstream> 00026 00027 using namespace std; 00028 using namespace zypp::base; 00029 00030 namespace zypp { 00031 namespace media { 00032 00033 enum state { 00034 STATE_START, 00035 STATE_METALINK, 00036 STATE_FILES, 00037 STATE_FILE, 00038 STATE_M4FILE, 00039 STATE_SIZE, 00040 STATE_M4SIZE, 00041 STATE_VERIFICATION, 00042 STATE_HASH, 00043 STATE_M4HASH, 00044 STATE_PIECES, 00045 STATE_M4PIECES, 00046 STATE_PHASH, 00047 STATE_M4PHASH, 00048 STATE_RESOURCES, 00049 STATE_URL, 00050 STATE_M4URL, 00051 NUMSTATES 00052 }; 00053 00054 struct stateswitch { 00055 enum state from; 00056 string ename; 00057 enum state to; 00058 int docontent; 00059 }; 00060 00061 static struct stateswitch stateswitches[] = { 00062 { STATE_START, "metalink", STATE_METALINK, 0 }, 00063 { STATE_METALINK, "files", STATE_FILES, 0 }, 00064 { STATE_METALINK, "file", STATE_M4FILE, 0 }, 00065 { STATE_FILES, "file", STATE_FILE, 0 }, 00066 { STATE_FILE, "size", STATE_SIZE, 1 }, 00067 { STATE_FILE, "verification", STATE_VERIFICATION, 0 }, 00068 { STATE_FILE, "resources", STATE_RESOURCES, 0 }, 00069 { STATE_VERIFICATION, "hash", STATE_HASH, 1 }, 00070 { STATE_VERIFICATION, "pieces", STATE_PIECES, 0 }, 00071 { STATE_PIECES, "hash", STATE_PHASH, 1 }, 00072 { STATE_RESOURCES, "url", STATE_URL, 1 }, 00073 { STATE_M4FILE, "size", STATE_M4SIZE, 1 }, 00074 { STATE_M4FILE, "hash", STATE_M4HASH, 1}, 00075 { STATE_M4FILE, "url", STATE_M4URL, 1}, 00076 { STATE_M4FILE, "pieces", STATE_M4PIECES, 0}, 00077 { STATE_M4PIECES, "hash", STATE_M4PHASH, 1 }, 00078 { NUMSTATES } 00079 }; 00080 00081 struct ml_url { 00082 ml_url() 00083 : priority( 0 ) 00084 {} 00085 int priority; 00086 string url; 00087 }; 00088 00089 static void XMLCALL startElement(void *userData, const char *name, const char **atts); 00090 static void XMLCALL endElement(void *userData, const char *name); 00091 static void XMLCALL characterData(void *userData, const XML_Char *s, int len); 00092 00093 struct ml_parsedata : private zypp::base::NonCopyable { 00094 ml_parsedata() 00095 : parser( XML_ParserCreate(NULL) ) 00096 , depth( 0 ) 00097 , state( STATE_START ) 00098 , statedepth( 0 ) 00099 , content( reinterpret_cast<char *>(malloc(256)) ) 00100 , lcontent( 0 ) 00101 , acontent( 256 ) 00102 , docontent( 0 ) 00103 , called( 0 ) 00104 , gotfile( 0 ) 00105 , size( -1 ) 00106 , nurls( 0 ) 00107 , blksize( 0 ) 00108 , npiece( 0 ) 00109 , piecel( 0 ) 00110 , nsha1( 0 ) 00111 , nzsync( 0 ) 00112 , chksuml( 0 ) 00113 { 00114 struct stateswitch *sw; 00115 int i; 00116 memset( swtab, 0, sizeof(swtab) ); 00117 memset( sbtab, 0, sizeof(sbtab) ); 00118 for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) 00119 { 00120 if (!swtab[sw->from]) 00121 swtab[sw->from] = sw; 00122 sbtab[sw->to] = sw->from; 00123 } 00124 XML_SetUserData(parser, this); 00125 XML_SetElementHandler(parser, startElement, endElement); 00126 XML_SetCharacterDataHandler(parser, characterData); 00127 } 00128 00129 ~ml_parsedata() 00130 { 00131 XML_ParserFree(parser); 00132 free(content); 00133 } 00134 00135 XML_Parser parser; 00136 int depth; 00137 enum state state; 00138 int statedepth; 00139 char *content; 00140 int lcontent; 00141 int acontent; 00142 int docontent; 00143 struct stateswitch *swtab[NUMSTATES]; 00144 enum state sbtab[NUMSTATES]; 00145 00146 int called; 00147 int gotfile; 00148 off_t size; 00149 vector<struct ml_url> urls; 00150 int nurls; 00151 size_t blksize; 00152 00153 vector<unsigned char> piece; 00154 int npiece; 00155 int piecel; 00156 00157 vector<unsigned char> sha1; 00158 int nsha1; 00159 vector<unsigned char> zsync; 00160 int nzsync; 00161 00162 vector<unsigned char> chksum; 00163 int chksuml; 00164 }; 00165 00166 static const char * 00167 find_attr(const char *txt, const char **atts) 00168 { 00169 for (; *atts; atts += 2) 00170 { 00171 if (!strcmp(*atts, txt)) 00172 return atts[1]; 00173 } 00174 return 0; 00175 } 00176 00177 static void XMLCALL 00178 startElement(void *userData, const char *name, const char **atts) 00179 { 00180 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData); 00181 struct stateswitch *sw; 00182 if (pd->depth != pd->statedepth) 00183 { 00184 pd->depth++; 00185 return; 00186 } 00187 pd->depth++; 00188 if (!pd->swtab[pd->state]) 00189 return; 00190 for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ 00191 if (sw->ename == name) 00192 break; 00193 if (sw->from != pd->state) 00194 return; 00195 if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++) 00196 return; /* ignore all but the first file */ 00197 //printf("start depth %d name %s\n", pd->depth, name); 00198 pd->state = sw->to; 00199 pd->docontent = sw->docontent; 00200 pd->statedepth = pd->depth; 00201 pd->lcontent = 0; 00202 *pd->content = 0; 00203 switch(pd->state) 00204 { 00205 case STATE_URL: 00206 case STATE_M4URL: 00207 { 00208 const char *priority = find_attr("priority", atts); 00209 const char *preference = find_attr("preference", atts); 00210 int prio; 00211 pd->urls.push_back(ml_url()); 00212 if (priority) 00213 prio = atoi(priority); 00214 else if (preference) 00215 prio = 101 - atoi(preference); 00216 else 00217 prio = 999999; 00218 pd->urls.back().priority = prio; 00219 break; 00220 } 00221 case STATE_PIECES: 00222 case STATE_M4PIECES: 00223 { 00224 const char *type = find_attr("type", atts); 00225 const char *length = find_attr("length", atts); 00226 size_t blksize; 00227 00228 if (!type || !length) 00229 { 00230 pd->state = pd->sbtab[pd->state]; 00231 pd->statedepth--; 00232 break; 00233 } 00234 blksize = strtoul(length, 0, 10); 00235 if (!blksize || (pd->blksize && pd->blksize != blksize)) 00236 { 00237 pd->state = pd->sbtab[pd->state]; 00238 pd->statedepth--; 00239 break; 00240 } 00241 pd->blksize = blksize; 00242 pd->npiece = 0; 00243 pd->piece.clear(); 00244 if (!strcmp(type, "sha1") || !strcmp(type, "sha-1")) 00245 pd->piecel = 20; 00246 else if (!strcmp(type, "zsync")) 00247 pd->piecel = 4; 00248 else 00249 { 00250 pd->state = pd->sbtab[pd->state]; 00251 pd->statedepth--; 00252 break; 00253 } 00254 break; 00255 } 00256 case STATE_HASH: 00257 case STATE_M4HASH: 00258 { 00259 const char *type = find_attr("type", atts); 00260 if (!type) 00261 type = "?"; 00262 if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20) 00263 pd->chksuml = 20; 00264 else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256")) 00265 pd->chksuml = 32; 00266 else 00267 { 00268 pd->state = pd->sbtab[pd->state]; 00269 pd->statedepth--; 00270 pd->docontent = 0; 00271 } 00272 break; 00273 } 00274 case STATE_PHASH: 00275 case STATE_M4PHASH: 00276 { 00277 const char *piece = find_attr("piece", atts); 00278 if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece)) 00279 { 00280 pd->state = pd->sbtab[pd->state]; 00281 pd->statedepth--; 00282 } 00283 break; 00284 } 00285 default: 00286 break; 00287 } 00288 } 00289 00290 static int 00291 hexstr2bytes(unsigned char *buf, const char *str, int buflen) 00292 { 00293 int i; 00294 for (i = 0; i < buflen; i++) 00295 { 00296 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \ 00297 : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \ 00298 : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \ 00299 : -1) 00300 int v = c2h(*str); 00301 str++; 00302 if (v < 0) 00303 return 0; 00304 buf[i] = v; 00305 v = c2h(*str); 00306 str++; 00307 if (v < 0) 00308 return 0; 00309 buf[i] = (buf[i] << 4) | v; 00310 #undef c2h 00311 } 00312 return buflen; 00313 } 00314 00315 static void XMLCALL 00316 endElement(void *userData, const char *name) 00317 { 00318 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData); 00319 // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name); 00320 if (pd->depth != pd->statedepth) 00321 { 00322 pd->depth--; 00323 return; 00324 } 00325 pd->depth--; 00326 pd->statedepth--; 00327 switch (pd->state) 00328 { 00329 case STATE_SIZE: 00330 case STATE_M4SIZE: 00331 pd->size = (off_t)strtoull(pd->content, 0, 10); 00332 break; 00333 case STATE_HASH: 00334 case STATE_M4HASH: 00335 pd->chksum.clear(); 00336 pd->chksum.resize(pd->chksuml, 0); 00337 if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml)) 00338 { 00339 pd->chksum.clear(); 00340 pd->chksuml = 0; 00341 } 00342 break; 00343 case STATE_PHASH: 00344 case STATE_M4PHASH: 00345 if (strlen(pd->content) != size_t(pd->piecel) * 2) 00346 break; 00347 pd->piece.resize(pd->piecel * (pd->npiece + 1), 0); 00348 if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel)) 00349 { 00350 pd->piece.resize(pd->piecel * pd->npiece, 0); 00351 break; 00352 } 00353 pd->npiece++; 00354 break; 00355 case STATE_PIECES: 00356 case STATE_M4PIECES: 00357 if (pd->piecel == 4) 00358 { 00359 pd->zsync = pd->piece; 00360 pd->nzsync = pd->npiece; 00361 } 00362 else 00363 { 00364 pd->sha1 = pd->piece; 00365 pd->nsha1 = pd->npiece; 00366 } 00367 pd->piecel = pd->npiece = 0; 00368 pd->piece.clear(); 00369 break; 00370 case STATE_URL: 00371 case STATE_M4URL: 00372 if (*pd->content) 00373 { 00374 pd->urls[pd->nurls].url = string(pd->content); 00375 pd->nurls++; 00376 } 00377 break; 00378 default: 00379 break; 00380 } 00381 pd->state = pd->sbtab[pd->state]; 00382 pd->docontent = 0; 00383 } 00384 00385 static void XMLCALL 00386 characterData(void *userData, const XML_Char *s, int len) 00387 { 00388 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData); 00389 int l; 00390 char *c; 00391 if (!pd->docontent) 00392 return; 00393 l = pd->lcontent + len + 1; 00394 if (l > pd->acontent) 00395 { 00396 pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256)); 00397 pd->acontent = l + 256; 00398 } 00399 c = pd->content + pd->lcontent; 00400 pd->lcontent += len; 00401 while (len-- > 0) 00402 *c++ = *s++; 00403 *c = 0; 00404 } 00405 00406 00407 MetaLinkParser::MetaLinkParser() 00408 : pd( new ml_parsedata ) 00409 {} 00410 00411 MetaLinkParser::~MetaLinkParser() 00412 { 00413 delete pd; 00414 } 00415 00416 void 00417 MetaLinkParser::parse(const Pathname &filename) 00418 { 00419 parse(InputStream(filename)); 00420 } 00421 00422 void 00423 MetaLinkParser::parse(const InputStream &is) 00424 { 00425 char buf[4096]; 00426 if (!is.stream()) 00427 ZYPP_THROW(Exception("MetaLinkParser: no such file")); 00428 while (is.stream().good()) 00429 { 00430 is.stream().read(buf, sizeof(buf)); 00431 parseBytes(buf, is.stream().gcount()); 00432 } 00433 parseEnd(); 00434 } 00435 00436 void 00437 MetaLinkParser::parseBytes(const char *buf, size_t len) 00438 { 00439 if (!len) 00440 return; 00441 if (XML_Parse(pd->parser, buf, len, 0) == XML_STATUS_ERROR) 00442 ZYPP_THROW(Exception("Parse Error")); 00443 } 00444 00445 static bool urlcmp(const ml_url &a, const ml_url &b) 00446 { 00447 return a.priority < b.priority; 00448 } 00449 00450 void 00451 MetaLinkParser::parseEnd() 00452 { 00453 if (XML_Parse(pd->parser, 0, 0, 1) == XML_STATUS_ERROR) 00454 ZYPP_THROW(Exception("Parse Error")); 00455 if (pd->nurls) 00456 stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp); 00457 } 00458 00459 std::vector<Url> 00460 MetaLinkParser::getUrls() 00461 { 00462 std::vector<Url> urls; 00463 int i; 00464 for (i = 0; i < pd->nurls; ++i) 00465 urls.push_back(Url(pd->urls[i].url)); 00466 return urls; 00467 } 00468 00469 MediaBlockList 00470 MetaLinkParser::getBlockList() 00471 { 00472 size_t i; 00473 MediaBlockList bl(pd->size); 00474 if (pd->chksuml == 20) 00475 bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]); 00476 else if (pd->chksuml == 32) 00477 bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]); 00478 if (pd->size != off_t(-1) && pd->blksize) 00479 { 00480 size_t nb = (pd->size + pd->blksize - 1) / pd->blksize; 00481 off_t off = 0; 00482 size_t size = pd->blksize; 00483 for (i = 0; i < nb; i++) 00484 { 00485 if (i == nb - 1) 00486 { 00487 size = pd->size % pd->blksize; 00488 if (!size) 00489 size = pd->blksize; 00490 } 00491 size_t blkno = bl.addBlock(off, size); 00492 if (int(i) < pd->nsha1) 00493 { 00494 bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]); 00495 if (int(i) < pd->nzsync) 00496 { 00497 unsigned char *p = &pd->zsync[4 * i]; 00498 bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize); 00499 } 00500 } 00501 off += pd->blksize; 00502 } 00503 } 00504 return bl; 00505 } 00506 00507 } // namespace media 00508 } // namespace zypp 00509