00001
00002
00003
00004
00005
00006
00007
00008
00013 #include "zypp/media/MetaLinkParser.h"
00014 #include "zypp/base/Logger.h"
00015
00016 #include <sys/types.h>
00017 #include <stdio.h>
00018 #include <stdlib.h>
00019 #include <string.h>
00020 #include <expat.h>
00021
00022 #include <vector>
00023 #include <algorithm>
00024 #include <iostream>
00025 #include <fstream>
00026
00027 using namespace std;
00028 using namespace zypp::base;
00029
00030 namespace zypp {
00031 namespace media {
00032
00033 enum state {
00034 STATE_START,
00035 STATE_METALINK,
00036 STATE_FILES,
00037 STATE_FILE,
00038 STATE_M4FILE,
00039 STATE_SIZE,
00040 STATE_M4SIZE,
00041 STATE_VERIFICATION,
00042 STATE_HASH,
00043 STATE_M4HASH,
00044 STATE_PIECES,
00045 STATE_M4PIECES,
00046 STATE_PHASH,
00047 STATE_M4PHASH,
00048 STATE_RESOURCES,
00049 STATE_URL,
00050 STATE_M4URL,
00051 NUMSTATES
00052 };
00053
00054 struct stateswitch {
00055 enum state from;
00056 string ename;
00057 enum state to;
00058 int docontent;
00059 };
00060
00061 static struct stateswitch stateswitches[] = {
00062 { STATE_START, "metalink", STATE_METALINK, 0 },
00063 { STATE_METALINK, "files", STATE_FILES, 0 },
00064 { STATE_METALINK, "file", STATE_M4FILE, 0 },
00065 { STATE_FILES, "file", STATE_FILE, 0 },
00066 { STATE_FILE, "size", STATE_SIZE, 1 },
00067 { STATE_FILE, "verification", STATE_VERIFICATION, 0 },
00068 { STATE_FILE, "resources", STATE_RESOURCES, 0 },
00069 { STATE_VERIFICATION, "hash", STATE_HASH, 1 },
00070 { STATE_VERIFICATION, "pieces", STATE_PIECES, 0 },
00071 { STATE_PIECES, "hash", STATE_PHASH, 1 },
00072 { STATE_RESOURCES, "url", STATE_URL, 1 },
00073 { STATE_M4FILE, "size", STATE_M4SIZE, 1 },
00074 { STATE_M4FILE, "hash", STATE_M4HASH, 1},
00075 { STATE_M4FILE, "url", STATE_M4URL, 1},
00076 { STATE_M4FILE, "pieces", STATE_M4PIECES, 0},
00077 { STATE_M4PIECES, "hash", STATE_M4PHASH, 1 },
00078 { NUMSTATES }
00079 };
00080
00081 struct ml_url {
00082 ml_url()
00083 : priority( 0 )
00084 {}
00085 int priority;
00086 string url;
00087 };
00088
00089 static void XMLCALL startElement(void *userData, const char *name, const char **atts);
00090 static void XMLCALL endElement(void *userData, const char *name);
00091 static void XMLCALL characterData(void *userData, const XML_Char *s, int len);
00092
00093 struct ml_parsedata : private zypp::base::NonCopyable {
00094 ml_parsedata()
00095 : parser( XML_ParserCreate(NULL) )
00096 , depth( 0 )
00097 , state( STATE_START )
00098 , statedepth( 0 )
00099 , content( reinterpret_cast<char *>(malloc(256)) )
00100 , lcontent( 0 )
00101 , acontent( 256 )
00102 , docontent( 0 )
00103 , called( 0 )
00104 , gotfile( 0 )
00105 , size( -1 )
00106 , nurls( 0 )
00107 , blksize( 0 )
00108 , npiece( 0 )
00109 , piecel( 0 )
00110 , nsha1( 0 )
00111 , nzsync( 0 )
00112 , chksuml( 0 )
00113 {
00114 struct stateswitch *sw;
00115 int i;
00116 memset( swtab, 0, sizeof(swtab) );
00117 memset( sbtab, 0, sizeof(sbtab) );
00118 for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
00119 {
00120 if (!swtab[sw->from])
00121 swtab[sw->from] = sw;
00122 sbtab[sw->to] = sw->from;
00123 }
00124 XML_SetUserData(parser, this);
00125 XML_SetElementHandler(parser, startElement, endElement);
00126 XML_SetCharacterDataHandler(parser, characterData);
00127 }
00128
00129 ~ml_parsedata()
00130 {
00131 XML_ParserFree(parser);
00132 free(content);
00133 }
00134
00135 XML_Parser parser;
00136 int depth;
00137 enum state state;
00138 int statedepth;
00139 char *content;
00140 int lcontent;
00141 int acontent;
00142 int docontent;
00143 struct stateswitch *swtab[NUMSTATES];
00144 enum state sbtab[NUMSTATES];
00145
00146 int called;
00147 int gotfile;
00148 off_t size;
00149 vector<struct ml_url> urls;
00150 int nurls;
00151 size_t blksize;
00152
00153 vector<unsigned char> piece;
00154 int npiece;
00155 int piecel;
00156
00157 vector<unsigned char> sha1;
00158 int nsha1;
00159 vector<unsigned char> zsync;
00160 int nzsync;
00161
00162 vector<unsigned char> chksum;
00163 int chksuml;
00164 };
00165
00166 static const char *
00167 find_attr(const char *txt, const char **atts)
00168 {
00169 for (; *atts; atts += 2)
00170 {
00171 if (!strcmp(*atts, txt))
00172 return atts[1];
00173 }
00174 return 0;
00175 }
00176
00177 static void XMLCALL
00178 startElement(void *userData, const char *name, const char **atts)
00179 {
00180 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00181 struct stateswitch *sw;
00182 if (pd->depth != pd->statedepth)
00183 {
00184 pd->depth++;
00185 return;
00186 }
00187 pd->depth++;
00188 if (!pd->swtab[pd->state])
00189 return;
00190 for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)
00191 if (sw->ename == name)
00192 break;
00193 if (sw->from != pd->state)
00194 return;
00195 if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++)
00196 return;
00197
00198 pd->state = sw->to;
00199 pd->docontent = sw->docontent;
00200 pd->statedepth = pd->depth;
00201 pd->lcontent = 0;
00202 *pd->content = 0;
00203 switch(pd->state)
00204 {
00205 case STATE_URL:
00206 case STATE_M4URL:
00207 {
00208 const char *priority = find_attr("priority", atts);
00209 const char *preference = find_attr("preference", atts);
00210 int prio;
00211 pd->urls.push_back(ml_url());
00212 if (priority)
00213 prio = atoi(priority);
00214 else if (preference)
00215 prio = 101 - atoi(preference);
00216 else
00217 prio = 999999;
00218 pd->urls.back().priority = prio;
00219 break;
00220 }
00221 case STATE_PIECES:
00222 case STATE_M4PIECES:
00223 {
00224 const char *type = find_attr("type", atts);
00225 const char *length = find_attr("length", atts);
00226 size_t blksize;
00227
00228 if (!type || !length)
00229 {
00230 pd->state = pd->sbtab[pd->state];
00231 pd->statedepth--;
00232 break;
00233 }
00234 blksize = strtoul(length, 0, 10);
00235 if (!blksize || (pd->blksize && pd->blksize != blksize))
00236 {
00237 pd->state = pd->sbtab[pd->state];
00238 pd->statedepth--;
00239 break;
00240 }
00241 pd->blksize = blksize;
00242 pd->npiece = 0;
00243 pd->piece.clear();
00244 if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
00245 pd->piecel = 20;
00246 else if (!strcmp(type, "zsync"))
00247 pd->piecel = 4;
00248 else
00249 {
00250 pd->state = pd->sbtab[pd->state];
00251 pd->statedepth--;
00252 break;
00253 }
00254 break;
00255 }
00256 case STATE_HASH:
00257 case STATE_M4HASH:
00258 {
00259 const char *type = find_attr("type", atts);
00260 if (!type)
00261 type = "?";
00262 if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
00263 pd->chksuml = 20;
00264 else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
00265 pd->chksuml = 32;
00266 else
00267 {
00268 pd->state = pd->sbtab[pd->state];
00269 pd->statedepth--;
00270 pd->docontent = 0;
00271 }
00272 break;
00273 }
00274 case STATE_PHASH:
00275 case STATE_M4PHASH:
00276 {
00277 const char *piece = find_attr("piece", atts);
00278 if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece))
00279 {
00280 pd->state = pd->sbtab[pd->state];
00281 pd->statedepth--;
00282 }
00283 break;
00284 }
00285 default:
00286 break;
00287 }
00288 }
00289
00290 static int
00291 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
00292 {
00293 int i;
00294 for (i = 0; i < buflen; i++)
00295 {
00296 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \
00297 : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \
00298 : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \
00299 : -1)
00300 int v = c2h(*str);
00301 str++;
00302 if (v < 0)
00303 return 0;
00304 buf[i] = v;
00305 v = c2h(*str);
00306 str++;
00307 if (v < 0)
00308 return 0;
00309 buf[i] = (buf[i] << 4) | v;
00310 #undef c2h
00311 }
00312 return buflen;
00313 }
00314
00315 static void XMLCALL
00316 endElement(void *userData, const char *name)
00317 {
00318 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00319
00320 if (pd->depth != pd->statedepth)
00321 {
00322 pd->depth--;
00323 return;
00324 }
00325 pd->depth--;
00326 pd->statedepth--;
00327 switch (pd->state)
00328 {
00329 case STATE_SIZE:
00330 case STATE_M4SIZE:
00331 pd->size = (off_t)strtoull(pd->content, 0, 10);
00332 break;
00333 case STATE_HASH:
00334 case STATE_M4HASH:
00335 pd->chksum.clear();
00336 pd->chksum.resize(pd->chksuml, 0);
00337 if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml))
00338 {
00339 pd->chksum.clear();
00340 pd->chksuml = 0;
00341 }
00342 break;
00343 case STATE_PHASH:
00344 case STATE_M4PHASH:
00345 if (strlen(pd->content) != size_t(pd->piecel) * 2)
00346 break;
00347 pd->piece.resize(pd->piecel * (pd->npiece + 1), 0);
00348 if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel))
00349 {
00350 pd->piece.resize(pd->piecel * pd->npiece, 0);
00351 break;
00352 }
00353 pd->npiece++;
00354 break;
00355 case STATE_PIECES:
00356 case STATE_M4PIECES:
00357 if (pd->piecel == 4)
00358 {
00359 pd->zsync = pd->piece;
00360 pd->nzsync = pd->npiece;
00361 }
00362 else
00363 {
00364 pd->sha1 = pd->piece;
00365 pd->nsha1 = pd->npiece;
00366 }
00367 pd->piecel = pd->npiece = 0;
00368 pd->piece.clear();
00369 break;
00370 case STATE_URL:
00371 case STATE_M4URL:
00372 if (*pd->content)
00373 {
00374 pd->urls[pd->nurls].url = string(pd->content);
00375 pd->nurls++;
00376 }
00377 break;
00378 default:
00379 break;
00380 }
00381 pd->state = pd->sbtab[pd->state];
00382 pd->docontent = 0;
00383 }
00384
00385 static void XMLCALL
00386 characterData(void *userData, const XML_Char *s, int len)
00387 {
00388 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00389 int l;
00390 char *c;
00391 if (!pd->docontent)
00392 return;
00393 l = pd->lcontent + len + 1;
00394 if (l > pd->acontent)
00395 {
00396 pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256));
00397 pd->acontent = l + 256;
00398 }
00399 c = pd->content + pd->lcontent;
00400 pd->lcontent += len;
00401 while (len-- > 0)
00402 *c++ = *s++;
00403 *c = 0;
00404 }
00405
00406
00407 MetaLinkParser::MetaLinkParser()
00408 : pd( new ml_parsedata )
00409 {}
00410
00411 MetaLinkParser::~MetaLinkParser()
00412 {
00413 delete pd;
00414 }
00415
00416 void
00417 MetaLinkParser::parse(const Pathname &filename)
00418 {
00419 parse(InputStream(filename));
00420 }
00421
00422 void
00423 MetaLinkParser::parse(const InputStream &is)
00424 {
00425 char buf[4096];
00426 if (!is.stream())
00427 ZYPP_THROW(Exception("MetaLinkParser: no such file"));
00428 while (is.stream().good())
00429 {
00430 is.stream().read(buf, sizeof(buf));
00431 parseBytes(buf, is.stream().gcount());
00432 }
00433 parseEnd();
00434 }
00435
00436 void
00437 MetaLinkParser::parseBytes(const char *buf, size_t len)
00438 {
00439 if (!len)
00440 return;
00441 if (XML_Parse(pd->parser, buf, len, 0) == XML_STATUS_ERROR)
00442 ZYPP_THROW(Exception("Parse Error"));
00443 }
00444
00445 static bool urlcmp(const ml_url &a, const ml_url &b)
00446 {
00447 return a.priority < b.priority;
00448 }
00449
00450 void
00451 MetaLinkParser::parseEnd()
00452 {
00453 if (XML_Parse(pd->parser, 0, 0, 1) == XML_STATUS_ERROR)
00454 ZYPP_THROW(Exception("Parse Error"));
00455 if (pd->nurls)
00456 stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp);
00457 }
00458
00459 std::vector<Url>
00460 MetaLinkParser::getUrls()
00461 {
00462 std::vector<Url> urls;
00463 int i;
00464 for (i = 0; i < pd->nurls; ++i)
00465 urls.push_back(Url(pd->urls[i].url));
00466 return urls;
00467 }
00468
00469 MediaBlockList
00470 MetaLinkParser::getBlockList()
00471 {
00472 size_t i;
00473 MediaBlockList bl(pd->size);
00474 if (pd->chksuml == 20)
00475 bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]);
00476 else if (pd->chksuml == 32)
00477 bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]);
00478 if (pd->size != off_t(-1) && pd->blksize)
00479 {
00480 size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
00481 off_t off = 0;
00482 size_t size = pd->blksize;
00483 for (i = 0; i < nb; i++)
00484 {
00485 if (i == nb - 1)
00486 {
00487 size = pd->size % pd->blksize;
00488 if (!size)
00489 size = pd->blksize;
00490 }
00491 size_t blkno = bl.addBlock(off, size);
00492 if (int(i) < pd->nsha1)
00493 {
00494 bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]);
00495 if (int(i) < pd->nzsync)
00496 {
00497 unsigned char *p = &pd->zsync[4 * i];
00498 bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
00499 }
00500 }
00501 off += pd->blksize;
00502 }
00503 }
00504 return bl;
00505 }
00506
00507 }
00508 }
00509