libzypp 8.13.6
|
00001 /*---------------------------------------------------------------------\ 00002 | ____ _ __ __ ___ | 00003 | |__ / \ / / . \ . \ | 00004 | / / \ V /| _/ _/ | 00005 | / /__ | | | | | | | 00006 | /_____||_| |_| |_| | 00007 | | 00008 \---------------------------------------------------------------------*/ 00013 #include "zypp/media/MetaLinkParser.h" 00014 #include "zypp/base/Logger.h" 00015 00016 #include <sys/types.h> 00017 #include <stdio.h> 00018 #include <stdlib.h> 00019 #include <string.h> 00020 #include <expat.h> 00021 00022 #include <vector> 00023 #include <algorithm> 00024 #include <iostream> 00025 #include <fstream> 00026 00027 using namespace std; 00028 using namespace zypp::base; 00029 00030 namespace zypp { 00031 namespace media { 00032 00033 enum state { 00034 STATE_START, 00035 STATE_METALINK, 00036 STATE_FILES, 00037 STATE_FILE, 00038 STATE_M4FILE, 00039 STATE_SIZE, 00040 STATE_M4SIZE, 00041 STATE_VERIFICATION, 00042 STATE_HASH, 00043 STATE_M4HASH, 00044 STATE_PIECES, 00045 STATE_M4PIECES, 00046 STATE_PHASH, 00047 STATE_M4PHASH, 00048 STATE_RESOURCES, 00049 STATE_URL, 00050 STATE_M4URL, 00051 NUMSTATES 00052 }; 00053 00054 struct stateswitch { 00055 enum state from; 00056 string ename; 00057 enum state to; 00058 int docontent; 00059 }; 00060 00061 static struct stateswitch stateswitches[] = { 00062 { STATE_START, "metalink", STATE_METALINK, 0 }, 00063 { STATE_METALINK, "files", STATE_FILES, 0 }, 00064 { STATE_METALINK, "file", STATE_M4FILE, 0 }, 00065 { STATE_FILES, "file", STATE_FILE, 0 }, 00066 { STATE_FILE, "size", STATE_SIZE, 1 }, 00067 { STATE_FILE, "verification", STATE_VERIFICATION, 0 }, 00068 { STATE_FILE, "resources", STATE_RESOURCES, 0 }, 00069 { STATE_VERIFICATION, "hash", STATE_HASH, 1 }, 00070 { STATE_VERIFICATION, "pieces", STATE_PIECES, 0 }, 00071 { STATE_PIECES, "hash", STATE_PHASH, 1 }, 00072 { STATE_RESOURCES, "url", STATE_URL, 1 }, 00073 { STATE_M4FILE, "size", STATE_M4SIZE, 1 }, 00074 { STATE_M4FILE, "hash", STATE_M4HASH, 1}, 00075 { STATE_M4FILE, "url", STATE_M4URL, 1}, 00076 { STATE_M4FILE, "pieces", STATE_M4PIECES, 0}, 00077 { STATE_M4PIECES, "hash", STATE_M4PHASH, 1 }, 00078 { NUMSTATES } 00079 }; 00080 00081 struct ml_url { 00082 int priority; 00083 string url; 00084 }; 00085 00086 struct ml_parsedata { 00087 XML_Parser parser; 00088 int depth; 00089 enum state state; 00090 int statedepth; 00091 char *content; 00092 int lcontent; 00093 int acontent; 00094 int docontent; 00095 struct stateswitch *swtab[NUMSTATES]; 00096 enum state sbtab[NUMSTATES]; 00097 00098 int called; 00099 int gotfile; 00100 off_t size; 00101 vector<struct ml_url> urls; 00102 int nurls; 00103 size_t blksize; 00104 00105 vector<unsigned char> piece; 00106 int npiece; 00107 int piecel; 00108 00109 vector<unsigned char> sha1; 00110 int nsha1; 00111 vector<unsigned char> zsync; 00112 int nzsync; 00113 00114 vector<unsigned char> chksum; 00115 int chksuml; 00116 }; 00117 00118 static const char * 00119 find_attr(const char *txt, const char **atts) 00120 { 00121 for (; *atts; atts += 2) 00122 { 00123 if (!strcmp(*atts, txt)) 00124 return atts[1]; 00125 } 00126 return 0; 00127 } 00128 00129 static void XMLCALL 00130 startElement(void *userData, const char *name, const char **atts) 00131 { 00132 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData); 00133 struct stateswitch *sw; 00134 if (pd->depth != pd->statedepth) 00135 { 00136 pd->depth++; 00137 return; 00138 } 00139 pd->depth++; 00140 if (!pd->swtab[pd->state]) 00141 return; 00142 for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */ 00143 if (sw->ename == name) 00144 break; 00145 if (sw->from != pd->state) 00146 return; 00147 if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++) 00148 return; /* ignore all but the first file */ 00149 //printf("start depth %d name %s\n", pd->depth, name); 00150 pd->state = sw->to; 00151 pd->docontent = sw->docontent; 00152 pd->statedepth = pd->depth; 00153 pd->lcontent = 0; 00154 *pd->content = 0; 00155 switch(pd->state) 00156 { 00157 case STATE_URL: 00158 case STATE_M4URL: 00159 { 00160 const char *priority = find_attr("priority", atts); 00161 const char *preference = find_attr("preference", atts); 00162 int prio; 00163 pd->urls.push_back(ml_url()); 00164 if (priority) 00165 prio = atoi(priority); 00166 else if (preference) 00167 prio = 101 - atoi(preference); 00168 else 00169 prio = 999999; 00170 pd->urls.back().priority = prio; 00171 break; 00172 } 00173 case STATE_PIECES: 00174 case STATE_M4PIECES: 00175 { 00176 const char *type = find_attr("type", atts); 00177 const char *length = find_attr("length", atts); 00178 size_t blksize; 00179 00180 if (!type || !length) 00181 { 00182 pd->state = pd->sbtab[pd->state]; 00183 pd->statedepth--; 00184 break; 00185 } 00186 blksize = strtoul(length, 0, 10); 00187 if (!blksize || (pd->blksize && pd->blksize != blksize)) 00188 { 00189 pd->state = pd->sbtab[pd->state]; 00190 pd->statedepth--; 00191 break; 00192 } 00193 pd->blksize = blksize; 00194 pd->npiece = 0; 00195 pd->piece.clear(); 00196 if (!strcmp(type, "sha1") || !strcmp(type, "sha-1")) 00197 pd->piecel = 20; 00198 else if (!strcmp(type, "zsync")) 00199 pd->piecel = 4; 00200 else 00201 { 00202 pd->state = pd->sbtab[pd->state]; 00203 pd->statedepth--; 00204 break; 00205 } 00206 break; 00207 } 00208 case STATE_HASH: 00209 case STATE_M4HASH: 00210 { 00211 const char *type = find_attr("type", atts); 00212 if (!type) 00213 type = "?"; 00214 if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20) 00215 pd->chksuml = 20; 00216 else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256")) 00217 pd->chksuml = 32; 00218 else 00219 { 00220 pd->state = pd->sbtab[pd->state]; 00221 pd->statedepth--; 00222 pd->docontent = 0; 00223 } 00224 break; 00225 } 00226 case STATE_PHASH: 00227 case STATE_M4PHASH: 00228 { 00229 const char *piece = find_attr("piece", atts); 00230 if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece)) 00231 { 00232 pd->state = pd->sbtab[pd->state]; 00233 pd->statedepth--; 00234 } 00235 break; 00236 } 00237 default: 00238 break; 00239 } 00240 } 00241 00242 static int 00243 hexstr2bytes(unsigned char *buf, const char *str, int buflen) 00244 { 00245 int i; 00246 for (i = 0; i < buflen; i++) 00247 { 00248 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \ 00249 : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \ 00250 : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \ 00251 : -1) 00252 int v = c2h(*str); 00253 str++; 00254 if (v < 0) 00255 return 0; 00256 buf[i] = v; 00257 v = c2h(*str); 00258 str++; 00259 if (v < 0) 00260 return 0; 00261 buf[i] = (buf[i] << 4) | v; 00262 #undef c2h 00263 } 00264 return buflen; 00265 } 00266 00267 static void XMLCALL 00268 endElement(void *userData, const char *name) 00269 { 00270 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData); 00271 // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name); 00272 if (pd->depth != pd->statedepth) 00273 { 00274 pd->depth--; 00275 return; 00276 } 00277 pd->depth--; 00278 pd->statedepth--; 00279 switch (pd->state) 00280 { 00281 case STATE_SIZE: 00282 case STATE_M4SIZE: 00283 pd->size = (off_t)strtoull(pd->content, 0, 10); 00284 break; 00285 case STATE_HASH: 00286 case STATE_M4HASH: 00287 pd->chksum.clear(); 00288 pd->chksum.resize(pd->chksuml, 0); 00289 if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml)) 00290 { 00291 pd->chksum.clear(); 00292 pd->chksuml = 0; 00293 } 00294 break; 00295 case STATE_PHASH: 00296 case STATE_M4PHASH: 00297 if (strlen(pd->content) != size_t(pd->piecel) * 2) 00298 break; 00299 pd->piece.resize(pd->piecel * (pd->npiece + 1), 0); 00300 if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel)) 00301 { 00302 pd->piece.resize(pd->piecel * pd->npiece, 0); 00303 break; 00304 } 00305 pd->npiece++; 00306 break; 00307 case STATE_PIECES: 00308 case STATE_M4PIECES: 00309 if (pd->piecel == 4) 00310 { 00311 pd->zsync = pd->piece; 00312 pd->nzsync = pd->npiece; 00313 } 00314 else 00315 { 00316 pd->sha1 = pd->piece; 00317 pd->nsha1 = pd->npiece; 00318 } 00319 pd->piecel = pd->npiece = 0; 00320 pd->piece.clear(); 00321 break; 00322 case STATE_URL: 00323 case STATE_M4URL: 00324 if (*pd->content) 00325 { 00326 pd->urls[pd->nurls].url = string(pd->content); 00327 pd->nurls++; 00328 } 00329 break; 00330 default: 00331 break; 00332 } 00333 pd->state = pd->sbtab[pd->state]; 00334 pd->docontent = 0; 00335 } 00336 00337 static void XMLCALL 00338 characterData(void *userData, const XML_Char *s, int len) 00339 { 00340 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData); 00341 int l; 00342 char *c; 00343 if (!pd->docontent) 00344 return; 00345 l = pd->lcontent + len + 1; 00346 if (l > pd->acontent) 00347 { 00348 pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256)); 00349 pd->acontent = l + 256; 00350 } 00351 c = pd->content + pd->lcontent; 00352 pd->lcontent += len; 00353 while (len-- > 0) 00354 *c++ = *s++; 00355 *c = 0; 00356 } 00357 00358 00359 MetaLinkParser::MetaLinkParser() 00360 { 00361 struct stateswitch *sw; 00362 int i; 00363 00364 pd = new ml_parsedata(); 00365 pd->size = off_t(-1); 00366 for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++) 00367 { 00368 if (!pd->swtab[sw->from]) 00369 pd->swtab[sw->from] = sw; 00370 pd->sbtab[sw->to] = sw->from; 00371 } 00372 pd->content = reinterpret_cast<char *>(malloc(256)); 00373 pd->acontent = 256; 00374 pd->lcontent = 0; 00375 pd->parser = XML_ParserCreate(NULL); 00376 XML_SetUserData(pd->parser, pd); 00377 XML_SetElementHandler(pd->parser, startElement, endElement); 00378 XML_SetCharacterDataHandler(pd->parser, characterData); 00379 } 00380 00381 MetaLinkParser::~MetaLinkParser() 00382 { 00383 XML_ParserFree(pd->parser); 00384 free(pd->content); 00385 delete pd; 00386 } 00387 00388 void 00389 MetaLinkParser::parse(const Pathname &filename) 00390 { 00391 parse(InputStream(filename)); 00392 } 00393 00394 void 00395 MetaLinkParser::parse(const InputStream &is) 00396 { 00397 char buf[4096]; 00398 if (!is.stream()) 00399 ZYPP_THROW(Exception("MetaLinkParser: no such file")); 00400 while (is.stream().good()) 00401 { 00402 is.stream().read(buf, sizeof(buf)); 00403 parseBytes(buf, is.stream().gcount()); 00404 } 00405 parseEnd(); 00406 } 00407 00408 void 00409 MetaLinkParser::parseBytes(const char *buf, size_t len) 00410 { 00411 if (!len) 00412 return; 00413 if (XML_Parse(pd->parser, buf, len, 0) == XML_STATUS_ERROR) 00414 ZYPP_THROW(Exception("Parse Error")); 00415 } 00416 00417 static bool urlcmp(const ml_url &a, const ml_url &b) 00418 { 00419 return a.priority < b.priority; 00420 } 00421 00422 void 00423 MetaLinkParser::parseEnd() 00424 { 00425 if (XML_Parse(pd->parser, 0, 0, 1) == XML_STATUS_ERROR) 00426 ZYPP_THROW(Exception("Parse Error")); 00427 if (pd->nurls) 00428 stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp); 00429 } 00430 00431 std::vector<Url> 00432 MetaLinkParser::getUrls() 00433 { 00434 std::vector<Url> urls; 00435 int i; 00436 for (i = 0; i < pd->nurls; ++i) 00437 urls.push_back(Url(pd->urls[i].url)); 00438 return urls; 00439 } 00440 00441 MediaBlockList 00442 MetaLinkParser::getBlockList() 00443 { 00444 size_t i; 00445 MediaBlockList bl(pd->size); 00446 if (pd->chksuml == 20) 00447 bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]); 00448 else if (pd->chksuml == 32) 00449 bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]); 00450 if (pd->size != off_t(-1) && pd->blksize) 00451 { 00452 size_t nb = (pd->size + pd->blksize - 1) / pd->blksize; 00453 off_t off = 0; 00454 size_t size = pd->blksize; 00455 for (i = 0; i < nb; i++) 00456 { 00457 if (i == nb - 1) 00458 { 00459 size = pd->size % pd->blksize; 00460 if (!size) 00461 size = pd->blksize; 00462 } 00463 size_t blkno = bl.addBlock(off, size); 00464 if (int(i) < pd->nsha1) 00465 { 00466 bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]); 00467 if (int(i) < pd->nzsync) 00468 { 00469 unsigned char *p = &pd->zsync[4 * i]; 00470 bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize); 00471 } 00472 } 00473 off += pd->blksize; 00474 } 00475 } 00476 return bl; 00477 } 00478 00479 } // namespace media 00480 } // namespace zypp 00481