libzypp 8.13.6

MetaLinkParser.cc

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                          ____ _   __ __ ___                          |
00003 |                         |__  / \ / / . \ . \                         |
00004 |                           / / \ V /|  _/  _/                         |
00005 |                          / /__ | | | | | |                           |
00006 |                         /_____||_| |_| |_|                           |
00007 |                                                                      |
00008 \---------------------------------------------------------------------*/
00013 #include "zypp/media/MetaLinkParser.h"
00014 #include "zypp/base/Logger.h"
00015 
00016 #include <sys/types.h>
00017 #include <stdio.h>
00018 #include <stdlib.h>
00019 #include <string.h>
00020 #include <expat.h>
00021 
00022 #include <vector>
00023 #include <algorithm>
00024 #include <iostream>
00025 #include <fstream>
00026 
00027 using namespace std;
00028 using namespace zypp::base;
00029 
00030 namespace zypp {
00031   namespace media {
00032 
00033 enum state {
00034   STATE_START,
00035   STATE_METALINK,
00036   STATE_FILES,
00037   STATE_FILE,
00038   STATE_M4FILE,
00039   STATE_SIZE,
00040   STATE_M4SIZE,
00041   STATE_VERIFICATION,
00042   STATE_HASH,
00043   STATE_M4HASH,
00044   STATE_PIECES,
00045   STATE_M4PIECES,
00046   STATE_PHASH,
00047   STATE_M4PHASH,
00048   STATE_RESOURCES,
00049   STATE_URL,
00050   STATE_M4URL,
00051   NUMSTATES
00052 };
00053 
00054 struct stateswitch {
00055   enum state from;
00056   string ename;
00057   enum state to;
00058   int docontent;
00059 };
00060 
00061 static struct stateswitch stateswitches[] = {
00062   { STATE_START,        "metalink",     STATE_METALINK, 0 },
00063   { STATE_METALINK,     "files",        STATE_FILES, 0 },
00064   { STATE_METALINK,     "file",         STATE_M4FILE, 0 },
00065   { STATE_FILES,        "file",         STATE_FILE, 0 },
00066   { STATE_FILE,         "size",         STATE_SIZE, 1 },
00067   { STATE_FILE,         "verification", STATE_VERIFICATION, 0 },
00068   { STATE_FILE,         "resources",    STATE_RESOURCES, 0 },
00069   { STATE_VERIFICATION, "hash",         STATE_HASH, 1 },
00070   { STATE_VERIFICATION, "pieces",       STATE_PIECES, 0 },
00071   { STATE_PIECES,       "hash",         STATE_PHASH, 1 },
00072   { STATE_RESOURCES,    "url",          STATE_URL, 1 },
00073   { STATE_M4FILE,       "size",         STATE_M4SIZE, 1 },
00074   { STATE_M4FILE,       "hash",         STATE_M4HASH, 1},
00075   { STATE_M4FILE,       "url",          STATE_M4URL, 1},
00076   { STATE_M4FILE,       "pieces",       STATE_M4PIECES, 0},
00077   { STATE_M4PIECES,     "hash",         STATE_M4PHASH, 1 },
00078   { NUMSTATES }
00079 };
00080 
00081 struct ml_url {
00082   int priority;
00083   string url;
00084 };
00085 
00086 struct ml_parsedata {
00087   XML_Parser parser;
00088   int depth;
00089   enum state state;
00090   int statedepth;
00091   char *content;
00092   int lcontent;
00093   int acontent;
00094   int docontent;
00095   struct stateswitch *swtab[NUMSTATES];
00096   enum state sbtab[NUMSTATES];
00097 
00098   int called;
00099   int gotfile;
00100   off_t size;
00101   vector<struct ml_url> urls;
00102   int nurls;
00103   size_t blksize;
00104 
00105   vector<unsigned char> piece;
00106   int npiece;
00107   int piecel;
00108 
00109   vector<unsigned char> sha1;
00110   int nsha1;
00111   vector<unsigned char> zsync;
00112   int nzsync;
00113 
00114   vector<unsigned char> chksum;
00115   int chksuml;
00116 };
00117 
00118 static const char *
00119 find_attr(const char *txt, const char **atts)
00120 {
00121   for (; *atts; atts += 2)
00122     {
00123       if (!strcmp(*atts, txt))
00124         return atts[1];
00125     }
00126   return 0;
00127 }
00128 
00129 static void XMLCALL
00130 startElement(void *userData, const char *name, const char **atts)
00131 {
00132   struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00133   struct stateswitch *sw;
00134   if (pd->depth != pd->statedepth)
00135     {
00136       pd->depth++;
00137       return;
00138     }
00139   pd->depth++;
00140   if (!pd->swtab[pd->state])
00141     return;
00142   for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)  /* find name in statetable */
00143     if (sw->ename == name)
00144       break;
00145   if (sw->from != pd->state)
00146     return;
00147   if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++)
00148     return;     /* ignore all but the first file */
00149   //printf("start depth %d name %s\n", pd->depth, name);
00150   pd->state = sw->to;
00151   pd->docontent = sw->docontent;
00152   pd->statedepth = pd->depth;
00153   pd->lcontent = 0;
00154   *pd->content = 0;
00155   switch(pd->state)
00156     {
00157     case STATE_URL:
00158     case STATE_M4URL:
00159       {
00160         const char *priority = find_attr("priority", atts);
00161         const char *preference = find_attr("preference", atts);
00162         int prio;
00163         pd->urls.push_back(ml_url());
00164         if (priority)
00165           prio = atoi(priority);
00166         else if (preference)
00167           prio = 101 - atoi(preference);
00168         else
00169           prio = 999999;
00170         pd->urls.back().priority = prio;
00171         break;
00172       }
00173     case STATE_PIECES:
00174     case STATE_M4PIECES:
00175       {
00176         const char *type = find_attr("type", atts);
00177         const char *length = find_attr("length", atts);
00178         size_t blksize;
00179 
00180         if (!type || !length)
00181           {
00182             pd->state = pd->sbtab[pd->state];
00183             pd->statedepth--;
00184             break;
00185           }
00186         blksize = strtoul(length, 0, 10);
00187         if (!blksize || (pd->blksize && pd->blksize != blksize))
00188           {
00189             pd->state = pd->sbtab[pd->state];
00190             pd->statedepth--;
00191             break;
00192           }
00193         pd->blksize = blksize;
00194         pd->npiece = 0;
00195         pd->piece.clear();
00196         if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
00197           pd->piecel = 20;
00198         else if (!strcmp(type, "zsync"))
00199           pd->piecel = 4;
00200         else
00201           {
00202             pd->state = pd->sbtab[pd->state];
00203             pd->statedepth--;
00204             break;
00205           }
00206         break;
00207       }
00208     case STATE_HASH:
00209     case STATE_M4HASH:
00210       {
00211         const char *type = find_attr("type", atts);
00212         if (!type)
00213           type = "?";
00214         if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
00215           pd->chksuml = 20;
00216         else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
00217           pd->chksuml = 32;
00218         else
00219           {
00220             pd->state = pd->sbtab[pd->state];
00221             pd->statedepth--;
00222             pd->docontent = 0;
00223           }
00224         break;
00225       }
00226     case STATE_PHASH:
00227     case STATE_M4PHASH:
00228       {
00229         const char *piece = find_attr("piece", atts);
00230         if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece))
00231           {
00232             pd->state = pd->sbtab[pd->state];
00233             pd->statedepth--;
00234           }
00235         break;
00236       }
00237     default:
00238       break;
00239     }
00240 }
00241 
00242 static int
00243 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
00244 {
00245   int i;
00246   for (i = 0; i < buflen; i++)
00247     {
00248 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0')              \
00249                 : ((c)>='a' && (c)<='f') ? ((c)-('a'-10))       \
00250                 : ((c)>='A' && (c)<='F') ? ((c)-('A'-10))       \
00251                 : -1)
00252       int v = c2h(*str);
00253       str++;
00254       if (v < 0)
00255         return 0;
00256       buf[i] = v;
00257       v = c2h(*str);
00258       str++;
00259       if (v < 0)
00260         return 0;
00261       buf[i] = (buf[i] << 4) | v;
00262 #undef c2h
00263     }
00264   return buflen;
00265 }
00266 
00267 static void XMLCALL
00268 endElement(void *userData, const char *name)
00269 {
00270   struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00271   // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
00272   if (pd->depth != pd->statedepth)
00273     {
00274       pd->depth--;
00275       return;
00276     }
00277   pd->depth--;
00278   pd->statedepth--;
00279   switch (pd->state)
00280     {
00281     case STATE_SIZE:
00282     case STATE_M4SIZE:
00283       pd->size = (off_t)strtoull(pd->content, 0, 10);
00284       break;
00285     case STATE_HASH:
00286     case STATE_M4HASH:
00287       pd->chksum.clear();
00288       pd->chksum.resize(pd->chksuml, 0);
00289       if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml))
00290         {
00291           pd->chksum.clear();
00292           pd->chksuml = 0;
00293         }
00294       break;
00295     case STATE_PHASH:
00296     case STATE_M4PHASH:
00297       if (strlen(pd->content) != size_t(pd->piecel) * 2)
00298         break;
00299       pd->piece.resize(pd->piecel * (pd->npiece + 1), 0);
00300       if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel))
00301         {
00302           pd->piece.resize(pd->piecel * pd->npiece, 0);
00303           break;
00304         }
00305       pd->npiece++;
00306       break;
00307     case STATE_PIECES:
00308     case STATE_M4PIECES:
00309       if (pd->piecel == 4)
00310         {
00311           pd->zsync = pd->piece;
00312           pd->nzsync = pd->npiece;
00313         }
00314       else
00315         {
00316           pd->sha1 = pd->piece;
00317           pd->nsha1 = pd->npiece;
00318         }
00319       pd->piecel = pd->npiece = 0;
00320       pd->piece.clear();
00321       break;
00322     case STATE_URL:
00323     case STATE_M4URL:
00324       if (*pd->content)
00325         {
00326           pd->urls[pd->nurls].url = string(pd->content);
00327           pd->nurls++;
00328         }
00329       break;
00330     default:
00331       break;
00332     }
00333   pd->state = pd->sbtab[pd->state];
00334   pd->docontent = 0;
00335 }
00336 
00337 static void XMLCALL
00338 characterData(void *userData, const XML_Char *s, int len)
00339 {
00340   struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00341   int l;
00342   char *c;
00343   if (!pd->docontent)
00344     return;
00345   l = pd->lcontent + len + 1;
00346   if (l > pd->acontent)
00347     {
00348       pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256));
00349       pd->acontent = l + 256;
00350     }
00351   c = pd->content + pd->lcontent;
00352   pd->lcontent += len;
00353   while (len-- > 0)
00354     *c++ = *s++;
00355   *c = 0;
00356 }
00357 
00358 
00359 MetaLinkParser::MetaLinkParser()
00360 {
00361   struct stateswitch *sw;
00362   int i;
00363  
00364   pd = new ml_parsedata();
00365   pd->size = off_t(-1);
00366   for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
00367     {
00368       if (!pd->swtab[sw->from])
00369         pd->swtab[sw->from] = sw;
00370       pd->sbtab[sw->to] = sw->from;
00371     }
00372   pd->content = reinterpret_cast<char *>(malloc(256));
00373   pd->acontent = 256;
00374   pd->lcontent = 0;
00375   pd->parser = XML_ParserCreate(NULL);
00376   XML_SetUserData(pd->parser, pd);
00377   XML_SetElementHandler(pd->parser, startElement, endElement);
00378   XML_SetCharacterDataHandler(pd->parser, characterData);
00379 }
00380 
00381 MetaLinkParser::~MetaLinkParser()
00382 {
00383   XML_ParserFree(pd->parser);
00384   free(pd->content);
00385   delete pd;
00386 }
00387 
00388 void
00389 MetaLinkParser::parse(const Pathname &filename)
00390 {
00391   parse(InputStream(filename));
00392 }
00393 
00394 void
00395 MetaLinkParser::parse(const InputStream &is)
00396 {
00397   char buf[4096];
00398   if (!is.stream())
00399     ZYPP_THROW(Exception("MetaLinkParser: no such file"));
00400   while (is.stream().good())
00401     {
00402       is.stream().read(buf, sizeof(buf));
00403       parseBytes(buf, is.stream().gcount());
00404     }
00405   parseEnd();
00406 }
00407 
00408 void
00409 MetaLinkParser::parseBytes(const char *buf, size_t len)
00410 {
00411   if (!len)
00412     return;
00413   if (XML_Parse(pd->parser, buf, len, 0) == XML_STATUS_ERROR)
00414     ZYPP_THROW(Exception("Parse Error"));
00415 }
00416 
00417 static bool urlcmp(const ml_url &a, const ml_url &b)
00418 {
00419   return a.priority < b.priority;
00420 }
00421 
00422 void
00423 MetaLinkParser::parseEnd()
00424 {
00425   if (XML_Parse(pd->parser, 0, 0, 1) == XML_STATUS_ERROR)
00426     ZYPP_THROW(Exception("Parse Error"));
00427   if (pd->nurls)
00428     stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp);
00429 }
00430 
00431 std::vector<Url>
00432 MetaLinkParser::getUrls()
00433 {
00434   std::vector<Url> urls;
00435   int i;
00436   for (i = 0; i < pd->nurls; ++i)
00437     urls.push_back(Url(pd->urls[i].url));
00438   return urls;
00439 }
00440 
00441 MediaBlockList
00442 MetaLinkParser::getBlockList()
00443 {
00444   size_t i;
00445   MediaBlockList bl(pd->size);
00446   if (pd->chksuml == 20)
00447     bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]);
00448   else if (pd->chksuml == 32)
00449     bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]);
00450   if (pd->size != off_t(-1) && pd->blksize)
00451     {
00452       size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
00453       off_t off = 0;
00454       size_t size = pd->blksize;
00455       for (i = 0; i < nb; i++)
00456         {
00457           if (i == nb - 1)
00458             {
00459               size = pd->size % pd->blksize;
00460               if (!size)
00461                 size = pd->blksize;
00462             }
00463           size_t blkno = bl.addBlock(off, size);
00464           if (int(i) < pd->nsha1)
00465             {
00466               bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]);
00467               if (int(i) < pd->nzsync)
00468                 {
00469                   unsigned char *p = &pd->zsync[4 * i];
00470                   bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
00471                 }
00472             }
00473           off += pd->blksize;
00474         }
00475     }
00476   return bl;
00477 }
00478 
00479   } // namespace media
00480 } // namespace zypp
00481