libzypp  10.5.0
MetaLinkParser.cc
Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                          ____ _   __ __ ___                          |
00003 |                         |__  / \ / / . \ . \                         |
00004 |                           / / \ V /|  _/  _/                         |
00005 |                          / /__ | | | | | |                           |
00006 |                         /_____||_| |_| |_|                           |
00007 |                                                                      |
00008 \---------------------------------------------------------------------*/
00013 #include "zypp/media/MetaLinkParser.h"
00014 #include "zypp/base/Logger.h"
00015 
00016 #include <sys/types.h>
00017 #include <stdio.h>
00018 #include <stdlib.h>
00019 #include <string.h>
00020 #include <expat.h>
00021 
00022 #include <vector>
00023 #include <algorithm>
00024 #include <iostream>
00025 #include <fstream>
00026 
00027 using namespace std;
00028 using namespace zypp::base;
00029 
00030 namespace zypp {
00031   namespace media {
00032 
00033 enum state {
00034   STATE_START,
00035   STATE_METALINK,
00036   STATE_FILES,
00037   STATE_FILE,
00038   STATE_M4FILE,
00039   STATE_SIZE,
00040   STATE_M4SIZE,
00041   STATE_VERIFICATION,
00042   STATE_HASH,
00043   STATE_M4HASH,
00044   STATE_PIECES,
00045   STATE_M4PIECES,
00046   STATE_PHASH,
00047   STATE_M4PHASH,
00048   STATE_RESOURCES,
00049   STATE_URL,
00050   STATE_M4URL,
00051   NUMSTATES
00052 };
00053 
00054 struct stateswitch {
00055   enum state from;
00056   string ename;
00057   enum state to;
00058   int docontent;
00059 };
00060 
00061 static struct stateswitch stateswitches[] = {
00062   { STATE_START,        "metalink",     STATE_METALINK, 0 },
00063   { STATE_METALINK,     "files",        STATE_FILES, 0 },
00064   { STATE_METALINK,     "file",         STATE_M4FILE, 0 },
00065   { STATE_FILES,        "file",         STATE_FILE, 0 },
00066   { STATE_FILE,         "size",         STATE_SIZE, 1 },
00067   { STATE_FILE,         "verification", STATE_VERIFICATION, 0 },
00068   { STATE_FILE,         "resources",    STATE_RESOURCES, 0 },
00069   { STATE_VERIFICATION, "hash",         STATE_HASH, 1 },
00070   { STATE_VERIFICATION, "pieces",       STATE_PIECES, 0 },
00071   { STATE_PIECES,       "hash",         STATE_PHASH, 1 },
00072   { STATE_RESOURCES,    "url",          STATE_URL, 1 },
00073   { STATE_M4FILE,       "size",         STATE_M4SIZE, 1 },
00074   { STATE_M4FILE,       "hash",         STATE_M4HASH, 1},
00075   { STATE_M4FILE,       "url",          STATE_M4URL, 1},
00076   { STATE_M4FILE,       "pieces",       STATE_M4PIECES, 0},
00077   { STATE_M4PIECES,     "hash",         STATE_M4PHASH, 1 },
00078   { NUMSTATES }
00079 };
00080 
00081 struct ml_url {
00082   ml_url()
00083     : priority( 0 )
00084   {}
00085   int priority;
00086   string url;
00087 };
00088 
00089 static void XMLCALL startElement(void *userData, const char *name, const char **atts);
00090 static void XMLCALL endElement(void *userData, const char *name);
00091 static void XMLCALL characterData(void *userData, const XML_Char *s, int len);
00092 
00093 struct ml_parsedata : private zypp::base::NonCopyable {
00094   ml_parsedata()
00095     : parser( XML_ParserCreate(NULL) )
00096     , depth( 0 )
00097     , state( STATE_START )
00098     , statedepth( 0 )
00099     , content( reinterpret_cast<char *>(malloc(256)) )
00100     , lcontent( 0 )
00101     , acontent( 256 )
00102     , docontent( 0 )
00103     , called( 0 )
00104     , gotfile( 0 )
00105     , size( -1 )
00106     , nurls( 0 )
00107     , blksize( 0 )
00108     , npiece( 0 )
00109     , piecel( 0 )
00110     , nsha1( 0 )
00111     , nzsync( 0 )
00112     , chksuml( 0 )
00113   {
00114     struct stateswitch *sw;
00115     int i;
00116     memset( swtab, 0, sizeof(swtab) );
00117     memset( sbtab, 0, sizeof(sbtab) );
00118     for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
00119     {
00120       if (!swtab[sw->from])
00121         swtab[sw->from] = sw;
00122       sbtab[sw->to] = sw->from;
00123     }
00124     XML_SetUserData(parser, this);
00125     XML_SetElementHandler(parser, startElement, endElement);
00126     XML_SetCharacterDataHandler(parser, characterData);
00127   }
00128 
00129   ~ml_parsedata()
00130   {
00131     XML_ParserFree(parser);
00132     free(content);
00133   }
00134 
00135   XML_Parser parser;
00136   int depth;
00137   enum state state;
00138   int statedepth;
00139   char *content;
00140   int lcontent;
00141   int acontent;
00142   int docontent;
00143   struct stateswitch *swtab[NUMSTATES];
00144   enum state sbtab[NUMSTATES];
00145 
00146   int called;
00147   int gotfile;
00148   off_t size;
00149   vector<struct ml_url> urls;
00150   int nurls;
00151   size_t blksize;
00152 
00153   vector<unsigned char> piece;
00154   int npiece;
00155   int piecel;
00156 
00157   vector<unsigned char> sha1;
00158   int nsha1;
00159   vector<unsigned char> zsync;
00160   int nzsync;
00161 
00162   vector<unsigned char> chksum;
00163   int chksuml;
00164 };
00165 
00166 static const char *
00167 find_attr(const char *txt, const char **atts)
00168 {
00169   for (; *atts; atts += 2)
00170     {
00171       if (!strcmp(*atts, txt))
00172         return atts[1];
00173     }
00174   return 0;
00175 }
00176 
00177 static void XMLCALL
00178 startElement(void *userData, const char *name, const char **atts)
00179 {
00180   struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00181   struct stateswitch *sw;
00182   if (pd->depth != pd->statedepth)
00183     {
00184       pd->depth++;
00185       return;
00186     }
00187   pd->depth++;
00188   if (!pd->swtab[pd->state])
00189     return;
00190   for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++)  /* find name in statetable */
00191     if (sw->ename == name)
00192       break;
00193   if (sw->from != pd->state)
00194     return;
00195   if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++)
00196     return;     /* ignore all but the first file */
00197   //printf("start depth %d name %s\n", pd->depth, name);
00198   pd->state = sw->to;
00199   pd->docontent = sw->docontent;
00200   pd->statedepth = pd->depth;
00201   pd->lcontent = 0;
00202   *pd->content = 0;
00203   switch(pd->state)
00204     {
00205     case STATE_URL:
00206     case STATE_M4URL:
00207       {
00208         const char *priority = find_attr("priority", atts);
00209         const char *preference = find_attr("preference", atts);
00210         int prio;
00211         pd->urls.push_back(ml_url());
00212         if (priority)
00213           prio = atoi(priority);
00214         else if (preference)
00215           prio = 101 - atoi(preference);
00216         else
00217           prio = 999999;
00218         pd->urls.back().priority = prio;
00219         break;
00220       }
00221     case STATE_PIECES:
00222     case STATE_M4PIECES:
00223       {
00224         const char *type = find_attr("type", atts);
00225         const char *length = find_attr("length", atts);
00226         size_t blksize;
00227 
00228         if (!type || !length)
00229           {
00230             pd->state = pd->sbtab[pd->state];
00231             pd->statedepth--;
00232             break;
00233           }
00234         blksize = strtoul(length, 0, 10);
00235         if (!blksize || (pd->blksize && pd->blksize != blksize))
00236           {
00237             pd->state = pd->sbtab[pd->state];
00238             pd->statedepth--;
00239             break;
00240           }
00241         pd->blksize = blksize;
00242         pd->npiece = 0;
00243         pd->piece.clear();
00244         if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
00245           pd->piecel = 20;
00246         else if (!strcmp(type, "zsync"))
00247           pd->piecel = 4;
00248         else
00249           {
00250             pd->state = pd->sbtab[pd->state];
00251             pd->statedepth--;
00252             break;
00253           }
00254         break;
00255       }
00256     case STATE_HASH:
00257     case STATE_M4HASH:
00258       {
00259         const char *type = find_attr("type", atts);
00260         if (!type)
00261           type = "?";
00262         if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
00263           pd->chksuml = 20;
00264         else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
00265           pd->chksuml = 32;
00266         else
00267           {
00268             pd->state = pd->sbtab[pd->state];
00269             pd->statedepth--;
00270             pd->docontent = 0;
00271           }
00272         break;
00273       }
00274     case STATE_PHASH:
00275     case STATE_M4PHASH:
00276       {
00277         const char *piece = find_attr("piece", atts);
00278         if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece))
00279           {
00280             pd->state = pd->sbtab[pd->state];
00281             pd->statedepth--;
00282           }
00283         break;
00284       }
00285     default:
00286       break;
00287     }
00288 }
00289 
00290 static int
00291 hexstr2bytes(unsigned char *buf, const char *str, int buflen)
00292 {
00293   int i;
00294   for (i = 0; i < buflen; i++)
00295     {
00296 #define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0')              \
00297                 : ((c)>='a' && (c)<='f') ? ((c)-('a'-10))       \
00298                 : ((c)>='A' && (c)<='F') ? ((c)-('A'-10))       \
00299                 : -1)
00300       int v = c2h(*str);
00301       str++;
00302       if (v < 0)
00303         return 0;
00304       buf[i] = v;
00305       v = c2h(*str);
00306       str++;
00307       if (v < 0)
00308         return 0;
00309       buf[i] = (buf[i] << 4) | v;
00310 #undef c2h
00311     }
00312   return buflen;
00313 }
00314 
00315 static void XMLCALL
00316 endElement(void *userData, const char *name)
00317 {
00318   struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00319   // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
00320   if (pd->depth != pd->statedepth)
00321     {
00322       pd->depth--;
00323       return;
00324     }
00325   pd->depth--;
00326   pd->statedepth--;
00327   switch (pd->state)
00328     {
00329     case STATE_SIZE:
00330     case STATE_M4SIZE:
00331       pd->size = (off_t)strtoull(pd->content, 0, 10);
00332       break;
00333     case STATE_HASH:
00334     case STATE_M4HASH:
00335       pd->chksum.clear();
00336       pd->chksum.resize(pd->chksuml, 0);
00337       if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml))
00338         {
00339           pd->chksum.clear();
00340           pd->chksuml = 0;
00341         }
00342       break;
00343     case STATE_PHASH:
00344     case STATE_M4PHASH:
00345       if (strlen(pd->content) != size_t(pd->piecel) * 2)
00346         break;
00347       pd->piece.resize(pd->piecel * (pd->npiece + 1), 0);
00348       if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel))
00349         {
00350           pd->piece.resize(pd->piecel * pd->npiece, 0);
00351           break;
00352         }
00353       pd->npiece++;
00354       break;
00355     case STATE_PIECES:
00356     case STATE_M4PIECES:
00357       if (pd->piecel == 4)
00358         {
00359           pd->zsync = pd->piece;
00360           pd->nzsync = pd->npiece;
00361         }
00362       else
00363         {
00364           pd->sha1 = pd->piece;
00365           pd->nsha1 = pd->npiece;
00366         }
00367       pd->piecel = pd->npiece = 0;
00368       pd->piece.clear();
00369       break;
00370     case STATE_URL:
00371     case STATE_M4URL:
00372       if (*pd->content)
00373         {
00374           pd->urls[pd->nurls].url = string(pd->content);
00375           pd->nurls++;
00376         }
00377       break;
00378     default:
00379       break;
00380     }
00381   pd->state = pd->sbtab[pd->state];
00382   pd->docontent = 0;
00383 }
00384 
00385 static void XMLCALL
00386 characterData(void *userData, const XML_Char *s, int len)
00387 {
00388   struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
00389   int l;
00390   char *c;
00391   if (!pd->docontent)
00392     return;
00393   l = pd->lcontent + len + 1;
00394   if (l > pd->acontent)
00395     {
00396       pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256));
00397       pd->acontent = l + 256;
00398     }
00399   c = pd->content + pd->lcontent;
00400   pd->lcontent += len;
00401   while (len-- > 0)
00402     *c++ = *s++;
00403   *c = 0;
00404 }
00405 
00406 
00407 MetaLinkParser::MetaLinkParser()
00408   : pd( new ml_parsedata )
00409 {}
00410 
00411 MetaLinkParser::~MetaLinkParser()
00412 {
00413   delete pd;
00414 }
00415 
00416 void
00417 MetaLinkParser::parse(const Pathname &filename)
00418 {
00419   parse(InputStream(filename));
00420 }
00421 
00422 void
00423 MetaLinkParser::parse(const InputStream &is)
00424 {
00425   char buf[4096];
00426   if (!is.stream())
00427     ZYPP_THROW(Exception("MetaLinkParser: no such file"));
00428   while (is.stream().good())
00429     {
00430       is.stream().read(buf, sizeof(buf));
00431       parseBytes(buf, is.stream().gcount());
00432     }
00433   parseEnd();
00434 }
00435 
00436 void
00437 MetaLinkParser::parseBytes(const char *buf, size_t len)
00438 {
00439   if (!len)
00440     return;
00441   if (XML_Parse(pd->parser, buf, len, 0) == XML_STATUS_ERROR)
00442     ZYPP_THROW(Exception("Parse Error"));
00443 }
00444 
00445 static bool urlcmp(const ml_url &a, const ml_url &b)
00446 {
00447   return a.priority < b.priority;
00448 }
00449 
00450 void
00451 MetaLinkParser::parseEnd()
00452 {
00453   if (XML_Parse(pd->parser, 0, 0, 1) == XML_STATUS_ERROR)
00454     ZYPP_THROW(Exception("Parse Error"));
00455   if (pd->nurls)
00456     stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp);
00457 }
00458 
00459 std::vector<Url>
00460 MetaLinkParser::getUrls()
00461 {
00462   std::vector<Url> urls;
00463   int i;
00464   for (i = 0; i < pd->nurls; ++i)
00465     urls.push_back(Url(pd->urls[i].url));
00466   return urls;
00467 }
00468 
00469 MediaBlockList
00470 MetaLinkParser::getBlockList()
00471 {
00472   size_t i;
00473   MediaBlockList bl(pd->size);
00474   if (pd->chksuml == 20)
00475     bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]);
00476   else if (pd->chksuml == 32)
00477     bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]);
00478   if (pd->size != off_t(-1) && pd->blksize)
00479     {
00480       size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
00481       off_t off = 0;
00482       size_t size = pd->blksize;
00483       for (i = 0; i < nb; i++)
00484         {
00485           if (i == nb - 1)
00486             {
00487               size = pd->size % pd->blksize;
00488               if (!size)
00489                 size = pd->blksize;
00490             }
00491           size_t blkno = bl.addBlock(off, size);
00492           if (int(i) < pd->nsha1)
00493             {
00494               bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]);
00495               if (int(i) < pd->nzsync)
00496                 {
00497                   unsigned char *p = &pd->zsync[4 * i];
00498                   bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
00499                 }
00500             }
00501           off += pd->blksize;
00502         }
00503     }
00504   return bl;
00505 }
00506 
00507   } // namespace media
00508 } // namespace zypp
00509