libzypp  17.30.0
MediaMultiCurl.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <signal.h>
16 #include <sys/wait.h>
17 #include <netdb.h>
18 #include <arpa/inet.h>
19 
20 #include <vector>
21 #include <iostream>
22 #include <algorithm>
23 
24 
25 #include <zypp/ZConfig.h>
26 #include <zypp/base/Logger.h>
28 #include <zypp-curl/parser/MetaLinkParser>
29 #include <zypp/ManagedFile.h>
30 #include <zypp-curl/private/curlhelper_p.h>
31 #include <zypp-curl/auth/CurlAuthData>
32 
33 using std::endl;
34 using namespace zypp::base;
35 
36 #undef CURLVERSION_AT_LEAST
37 #define CURLVERSION_AT_LEAST(M,N,O) LIBCURL_VERSION_NUM >= ((((M)<<8)+(N))<<8)+(O)
38 
39 namespace zypp {
40  namespace media {
41 
42 
44 
45 
46 class multifetchrequest;
47 
48 // Hack: we derive from MediaCurl just to get the storage space for
49 // settings, url, curlerrors and the like
50 
52  friend class multifetchrequest;
53 
54 public:
55  multifetchworker(int no, multifetchrequest &request, const Url &url);
57  void nextjob();
58  void run();
59  bool checkChecksum();
60  bool recheckChecksum();
61  void disableCompetition();
62 
63  void checkdns();
64  void adddnsfd(fd_set &rset, int &maxfd);
65  void dnsevent(fd_set &rset);
66 
67  int _workerno;
68 
69  int _state;
70  bool _competing;
71 
72  size_t _blkno;
73  off_t _blkstart;
74  size_t _blksize;
76 
77  double _blkstarttime;
78  size_t _blkreceived;
79  off_t _received;
80 
81  double _avgspeed;
82  double _maxspeed;
83 
84  double _sleepuntil;
85 
86 private:
87  void stealjob();
88 
89  size_t writefunction(void *ptr, size_t size);
90  static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream);
91 
92  size_t headerfunction(char *ptr, size_t size);
93  static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream);
94 
96  int _pass;
97  std::string _urlbuf;
98  off_t _off;
99  size_t _size;
101 
102  pid_t _pid;
103  int _dnspipe;
104 };
105 
106 #define WORKER_STARTING 0
107 #define WORKER_LOOKUP 1
108 #define WORKER_FETCH 2
109 #define WORKER_DISCARD 3
110 #define WORKER_DONE 4
111 #define WORKER_SLEEP 5
112 #define WORKER_BROKEN 6
113 
114 
115 
117 public:
118  multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize);
120 
121  void run(std::vector<Url> &urllist);
122 
123 protected:
124  friend class multifetchworker;
125 
129 
130  FILE *_fp;
132  MediaBlockList *_blklist;
133  off_t _filesize;
134 
135  CURLM *_multi;
136 
137  std::list<multifetchworker *> _workers;
138  bool _stealing;
140 
141  size_t _blkno;
142  off_t _blkoff;
147  bool _finished;
148  off_t _totalsize;
151 
152  double _starttime;
154 
157  double _periodavg;
158 
159 public:
160  double _timeout;
162  double _maxspeed;
164 };
165 
166 constexpr auto MIN_REQ_MIRRS = 4;
167 constexpr auto BLKSIZE = 131072;
168 constexpr auto MAXURLS = 10;
169 
171 
172 static double
174 {
175  struct timeval tv;
176  if (gettimeofday(&tv, NULL))
177  return 0;
178  return tv.tv_sec + tv.tv_usec / 1000000.;
179 }
180 
181 size_t
182 multifetchworker::writefunction(void *ptr, size_t size)
183 {
184  size_t len, cnt;
185  if (_state == WORKER_BROKEN)
186  return size ? 0 : 1;
187 
188  double now = currentTime();
189 
190  len = size > _size ? _size : size;
191  if (!len)
192  {
193  // kill this job?
194  return size;
195  }
196 
197  if (_blkstart && _off == _blkstart)
198  {
199  // make sure that the server replied with "partial content"
200  // for http requests
201  char *effurl;
202  (void)curl_easy_getinfo(_curl, CURLINFO_EFFECTIVE_URL, &effurl);
203  if (effurl && !strncasecmp(effurl, "http", 4))
204  {
205  long statuscode = 0;
206  (void)curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statuscode);
207  if (statuscode != 206)
208  return size ? 0 : 1;
209  }
210  }
211 
212  _blkreceived += len;
213  _received += len;
214 
215  _request->_lastprogress = now;
216 
217  if (_state == WORKER_DISCARD || !_request->_fp)
218  {
219  // block is no longer needed
220  // still calculate the checksum so that we can throw out bad servers
221  if (_request->_blklist)
222  _dig.update((const char *)ptr, len);
223  _off += len;
224  _size -= len;
225  return size;
226  }
227  if (fseeko(_request->_fp, _off, SEEK_SET))
228  return size ? 0 : 1;
229  cnt = fwrite(ptr, 1, len, _request->_fp);
230  if (cnt > 0)
231  {
232  _request->_fetchedsize += cnt;
233  if (_request->_blklist)
234  _dig.update((const char *)ptr, cnt);
235  _off += cnt;
236  _size -= cnt;
237  if (cnt == len)
238  return size;
239  }
240  return cnt;
241 }
242 
243 size_t
244 multifetchworker::_writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
245 {
246  multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
247  return me->writefunction(ptr, size * nmemb);
248 }
249 
250 size_t
251 multifetchworker::headerfunction(char *p, size_t size)
252 {
253  size_t l = size;
254  if (l > 9 && !strncasecmp(p, "Location:", 9))
255  {
256  std::string line(p + 9, l - 9);
257  if (line[l - 10] == '\r')
258  line.erase(l - 10, 1);
259  XXX << "#" << _workerno << ": redirecting to" << line << endl;
260  return size;
261  }
262  if (l <= 14 || l >= 128 || strncasecmp(p, "Content-Range:", 14) != 0)
263  return size;
264  p += 14;
265  l -= 14;
266  while (l && (*p == ' ' || *p == '\t'))
267  p++, l--;
268  if (l < 6 || strncasecmp(p, "bytes", 5))
269  return size;
270  p += 5;
271  l -= 5;
272  char buf[128];
273  memcpy(buf, p, l);
274  buf[l] = 0;
275  unsigned long long start, off, filesize;
276  if (sscanf(buf, "%llu-%llu/%llu", &start, &off, &filesize) != 3)
277  return size;
278  if (_request->_filesize == (off_t)-1)
279  {
280  WAR << "#" << _workerno << ": setting request filesize to " << filesize << endl;
281  _request->_filesize = filesize;
282  if (_request->_totalsize == 0 && !_request->_blklist)
283  _request->_totalsize = filesize;
284  }
285  if (_request->_filesize != (off_t)filesize)
286  {
287  XXX << "#" << _workerno << ": filesize mismatch" << endl;
289  strncpy(_curlError, "filesize mismatch", CURL_ERROR_SIZE);
290  }
291  return size;
292 }
293 
294 size_t
295 multifetchworker::_headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
296 {
297  multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
298  return me->headerfunction((char *)ptr, size * nmemb);
299 }
300 
301 multifetchworker::multifetchworker(int no, multifetchrequest &request, const Url &url)
302 : MediaCurl(url, Pathname())
303 {
304  _workerno = no;
305  _request = &request;
307  _competing = false;
308  _off = _blkstart = 0;
309  _size = _blksize = 0;
310  _pass = 0;
311  _blkno = 0;
312  _pid = 0;
313  _dnspipe = -1;
314  _blkreceived = 0;
315  _received = 0;
316  _blkstarttime = 0;
317  _avgspeed = 0;
318  _sleepuntil = 0;
320  _noendrange = false;
321 
322  Url curlUrl( clearQueryString(url) );
323  _urlbuf = curlUrl.asString();
325  if (_curl)
326  XXX << "reused worker from pool" << endl;
327  if (!_curl && !(_curl = curl_easy_init()))
328  {
330  strncpy(_curlError, "curl_easy_init failed", CURL_ERROR_SIZE);
331  return;
332  }
333  try
334  {
335  setupEasy();
336  }
337  catch (Exception &ex)
338  {
339  curl_easy_cleanup(_curl);
340  _curl = 0;
342  strncpy(_curlError, "curl_easy_setopt failed", CURL_ERROR_SIZE);
343  return;
344  }
345  curl_easy_setopt(_curl, CURLOPT_PRIVATE, this);
346  curl_easy_setopt(_curl, CURLOPT_URL, _urlbuf.c_str());
347  curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, &_writefunction);
348  curl_easy_setopt(_curl, CURLOPT_WRITEDATA, this);
349  if (_request->_filesize == off_t(-1) || !_request->_blklist || !_request->_blklist->haveChecksum(0))
350  {
351  curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, &_headerfunction);
352  curl_easy_setopt(_curl, CURLOPT_HEADERDATA, this);
353  }
354  // if this is the same host copy authorization
355  // (the host check is also what curl does when doing a redirect)
356  // (note also that unauthorized exceptions are thrown with the request host)
357  if (url.getHost() == _request->_context->_url.getHost())
358  {
359  _settings.setUsername(_request->_context->_settings.username());
360  _settings.setPassword(_request->_context->_settings.password());
361  _settings.setAuthType(_request->_context->_settings.authType());
362  if ( _settings.userPassword().size() )
363  {
364  curl_easy_setopt(_curl, CURLOPT_USERPWD, _settings.userPassword().c_str());
365  std::string use_auth = _settings.authType();
366  if (use_auth.empty())
367  use_auth = "digest,basic"; // our default
368  long auth = CurlAuthData::auth_type_str2long(use_auth);
369  if( auth != CURLAUTH_NONE)
370  {
371  XXX << "#" << _workerno << ": Enabling HTTP authentication methods: " << use_auth
372  << " (CURLOPT_HTTPAUTH=" << auth << ")" << std::endl;
373  curl_easy_setopt(_curl, CURLOPT_HTTPAUTH, auth);
374  }
375  }
376  }
377  checkdns();
378 }
379 
381 {
382  if (_curl)
383  {
385  curl_multi_remove_handle(_request->_multi, _curl);
386  if (_state == WORKER_DONE || _state == WORKER_SLEEP)
387  {
388 #if CURLVERSION_AT_LEAST(7,15,5)
389  curl_easy_setopt(_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)0);
390 #endif
391  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
392  curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, (void *)0);
393  curl_easy_setopt(_curl, CURLOPT_WRITEDATA, (void *)0);
394  curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, (void *)0);
395  curl_easy_setopt(_curl, CURLOPT_HEADERDATA, (void *)0);
397  }
398  else
399  curl_easy_cleanup(_curl);
400  _curl = 0;
401  }
402  if (_pid)
403  {
404  kill(_pid, SIGKILL);
405  int status;
406  while (waitpid(_pid, &status, 0) == -1)
407  if (errno != EINTR)
408  break;
409  _pid = 0;
410  }
411  if (_dnspipe != -1)
412  {
413  close(_dnspipe);
414  _dnspipe = -1;
415  }
416  // the destructor in MediaCurl doesn't call disconnect() if
417  // the media is not attached, so we do it here manually
418  disconnectFrom();
419 }
420 
421 static inline bool env_isset(std::string name)
422 {
423  const char *s = getenv(name.c_str());
424  return s && *s ? true : false;
425 }
426 
427 void
429 {
430  std::string host = _url.getHost();
431 
432  if (host.empty())
433  return;
434 
435  if (_request->_context->isDNSok(host))
436  return;
437 
438  // no need to do dns checking for numeric hosts
439  char addrbuf[128];
440  if (inet_pton(AF_INET, host.c_str(), addrbuf) == 1)
441  return;
442  if (inet_pton(AF_INET6, host.c_str(), addrbuf) == 1)
443  return;
444 
445  // no need to do dns checking if we use a proxy
446  if (!_settings.proxy().empty())
447  return;
448  if (env_isset("all_proxy") || env_isset("ALL_PROXY"))
449  return;
450  std::string schemeproxy = _url.getScheme() + "_proxy";
451  if (env_isset(schemeproxy))
452  return;
453  if (schemeproxy != "http_proxy")
454  {
455  std::transform(schemeproxy.begin(), schemeproxy.end(), schemeproxy.begin(), ::toupper);
456  if (env_isset(schemeproxy))
457  return;
458  }
459 
460  XXX << "checking DNS lookup of " << host << endl;
461  int pipefds[2];
462  if (pipe(pipefds))
463  {
465  strncpy(_curlError, "DNS pipe creation failed", CURL_ERROR_SIZE);
466  return;
467  }
468  _pid = fork();
469  if (_pid == pid_t(-1))
470  {
471  close(pipefds[0]);
472  close(pipefds[1]);
473  _pid = 0;
475  strncpy(_curlError, "DNS checker fork failed", CURL_ERROR_SIZE);
476  return;
477  }
478  else if (_pid == 0)
479  {
480  close(pipefds[0]);
481  // XXX: close all other file descriptors
482  struct addrinfo *ai, aihints;
483  memset(&aihints, 0, sizeof(aihints));
484  aihints.ai_family = PF_UNSPEC;
485  int tstsock = socket(PF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
486  if (tstsock == -1)
487  aihints.ai_family = PF_INET;
488  else
489  close(tstsock);
490  aihints.ai_socktype = SOCK_STREAM;
491  aihints.ai_flags = AI_CANONNAME;
492  unsigned int connecttimeout = _request->_connect_timeout;
493  if (connecttimeout)
494  alarm(connecttimeout);
495  signal(SIGALRM, SIG_DFL);
496  if (getaddrinfo(host.c_str(), NULL, &aihints, &ai))
497  _exit(1);
498  _exit(0);
499  }
500  close(pipefds[1]);
501  _dnspipe = pipefds[0];
503 }
504 
505 void
506 multifetchworker::adddnsfd(fd_set &rset, int &maxfd)
507 {
508  if (_state != WORKER_LOOKUP)
509  return;
510  FD_SET(_dnspipe, &rset);
511  if (maxfd < _dnspipe)
512  maxfd = _dnspipe;
513 }
514 
515 void
517 {
518 
519  if (_state != WORKER_LOOKUP || !FD_ISSET(_dnspipe, &rset))
520  return;
521  int status;
522  while (waitpid(_pid, &status, 0) == -1)
523  {
524  if (errno != EINTR)
525  return;
526  }
527  _pid = 0;
528  if (_dnspipe != -1)
529  {
530  close(_dnspipe);
531  _dnspipe = -1;
532  }
533  if (!WIFEXITED(status))
534  {
536  strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
538  return;
539  }
540  int exitcode = WEXITSTATUS(status);
541  XXX << "#" << _workerno << ": DNS lookup returned " << exitcode << endl;
542  if (exitcode != 0)
543  {
545  strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
547  return;
548  }
550  nextjob();
551 }
552 
553 bool
555 {
556  // XXX << "checkChecksum block " << _blkno << endl;
557  if (!_blksize || !_request->_blklist)
558  return true;
559  return _request->_blklist->verifyDigest(_blkno, _dig);
560 }
561 
562 bool
564 {
565  // XXX << "recheckChecksum block " << _blkno << endl;
566  if (!_request->_fp || !_blksize || !_request->_blklist)
567  return true;
568  if (fseeko(_request->_fp, _blkstart, SEEK_SET))
569  return false;
570  char buf[4096];
571  size_t l = _blksize;
572  _request->_blklist->createDigest(_dig); // resets digest
573  while (l)
574  {
575  size_t cnt = l > sizeof(buf) ? sizeof(buf) : l;
576  if (fread(buf, cnt, 1, _request->_fp) != 1)
577  return false;
578  _dig.update(buf, cnt);
579  l -= cnt;
580  }
581  return _request->_blklist->verifyDigest(_blkno, _dig);
582 }
583 
584 
585 void
587 {
588  if (!_request->_stealing)
589  {
590  XXX << "start stealing!" << endl;
591  _request->_stealing = true;
592  }
593  multifetchworker *best = 0;
594  std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
595  double now = 0;
596  for (; workeriter != _request->_workers.end(); ++workeriter)
597  {
598  multifetchworker *worker = *workeriter;
599  if (worker == this)
600  continue;
601  if (worker->_pass == -1)
602  continue; // do not steal!
603  if (worker->_state == WORKER_DISCARD || worker->_state == WORKER_DONE || worker->_state == WORKER_SLEEP || !worker->_blksize)
604  continue; // do not steal finished jobs
605  if (!worker->_avgspeed && worker->_blkreceived)
606  {
607  if (!now)
608  now = currentTime();
609  if (now > worker->_blkstarttime)
610  worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
611  }
612  if (!best || best->_pass > worker->_pass)
613  {
614  best = worker;
615  continue;
616  }
617  if (best->_pass < worker->_pass)
618  continue;
619  // if it is the same block, we want to know the best worker, otherwise the worst
620  if (worker->_blkstart == best->_blkstart)
621  {
622  if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed < (best->_blksize - best->_blkreceived) * worker->_avgspeed)
623  best = worker;
624  }
625  else
626  {
627  if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed > (best->_blksize - best->_blkreceived) * worker->_avgspeed)
628  best = worker;
629  }
630  }
631  if (!best)
632  {
635  _request->_finished = true;
636  return;
637  }
638  // do not sleep twice
639  if (_state != WORKER_SLEEP)
640  {
641  if (!_avgspeed && _blkreceived)
642  {
643  if (!now)
644  now = currentTime();
645  if (now > _blkstarttime)
647  }
648 
649  // lets see if we should sleep a bit
650  XXX << "me #" << _workerno << ": " << _avgspeed << ", size " << best->_blksize << endl;
651  XXX << "best #" << best->_workerno << ": " << best->_avgspeed << ", size " << (best->_blksize - best->_blkreceived) << endl;
652  if (_avgspeed && best->_avgspeed && best->_blksize - best->_blkreceived > 0 &&
653  (best->_blksize - best->_blkreceived) * _avgspeed < best->_blksize * best->_avgspeed)
654  {
655  if (!now)
656  now = currentTime();
657  double sl = (best->_blksize - best->_blkreceived) / best->_avgspeed * 2;
658  if (sl > 1)
659  sl = 1;
660  XXX << "#" << _workerno << ": going to sleep for " << sl * 1000 << " ms" << endl;
661  _sleepuntil = now + sl;
664  return;
665  }
666  }
667 
668  _competing = true;
669  best->_competing = true;
670  _blkstart = best->_blkstart;
671  _blksize = best->_blksize;
672  best->_pass++;
673  _pass = best->_pass;
674  _blkno = best->_blkno;
675  run();
676 }
677 
678 void
680 {
681  std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
682  for (; workeriter != _request->_workers.end(); ++workeriter)
683  {
684  multifetchworker *worker = *workeriter;
685  if (worker == this)
686  continue;
687  if (worker->_blkstart == _blkstart)
688  {
689  if (worker->_state == WORKER_FETCH)
690  worker->_state = WORKER_DISCARD;
691  worker->_pass = -1; /* do not steal this one, we already have it */
692  }
693  }
694 }
695 
696 
697 void
699 {
700  _noendrange = false;
701  if (_request->_stealing)
702  {
703  stealjob();
704  return;
705  }
706 
707  MediaBlockList *blklist = _request->_blklist;
708  if (!blklist)
709  {
710  _blksize = BLKSIZE;
711  if (_request->_filesize != off_t(-1))
712  {
714  {
715  stealjob();
716  return;
717  }
719  if (_blksize > BLKSIZE)
720  _blksize = BLKSIZE;
721  }
722  }
723  else
724  {
725  MediaBlock blk = blklist->getBlock(_request->_blkno);
726  while (_request->_blkoff >= (off_t)(blk.off + blk.size))
727  {
728  if (++_request->_blkno == blklist->numBlocks())
729  {
730  stealjob();
731  return;
732  }
733  blk = blklist->getBlock(_request->_blkno);
734  _request->_blkoff = blk.off;
735  }
736  _blksize = blk.off + blk.size - _request->_blkoff;
737  if (_blksize > BLKSIZE && !blklist->haveChecksum(_request->_blkno))
738  _blksize = BLKSIZE;
739  }
743  run();
744 }
745 
746 void
748 {
749  char rangebuf[128];
750 
751  if (_state == WORKER_BROKEN || _state == WORKER_DONE)
752  return; // just in case...
753  if (_noendrange)
754  sprintf(rangebuf, "%llu-", (unsigned long long)_blkstart);
755  else
756  sprintf(rangebuf, "%llu-%llu", (unsigned long long)_blkstart, (unsigned long long)_blkstart + _blksize - 1);
757  XXX << "#" << _workerno << ": BLK " << _blkno << ":" << rangebuf << " " << _url << endl;
758  if (curl_easy_setopt(_curl, CURLOPT_RANGE, !_noendrange || _blkstart != 0 ? rangebuf : (char *)0) != CURLE_OK)
759  {
762  strncpy(_curlError, "curl_easy_setopt range failed", CURL_ERROR_SIZE);
763  return;
764  }
765  if (curl_multi_add_handle(_request->_multi, _curl) != CURLM_OK)
766  {
769  strncpy(_curlError, "curl_multi_add_handle failed", CURL_ERROR_SIZE);
770  return;
771  }
772  _request->_havenewjob = true;
773  _off = _blkstart;
774  _size = _blksize;
775  if (_request->_blklist)
776  _request->_blklist->createDigest(_dig); // resets digest
778 
779  double now = currentTime();
780  _blkstarttime = now;
781  _blkreceived = 0;
782 }
783 
784 
786 
787 
788 multifetchrequest::multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) : _context(context), _filename(filename), _baseurl(baseurl)
789 {
790  _fp = fp;
791  _report = report;
792  _blklist = blklist;
793  _filesize = filesize;
794  _multi = multi;
795  _stealing = false;
796  _havenewjob = false;
797  _blkno = 0;
798  if (_blklist)
799  _blkoff = _blklist->getBlock(0).off;
800  else
801  _blkoff = 0;
802  _activeworkers = 0;
803  _lookupworkers = 0;
804  _sleepworkers = 0;
805  _minsleepuntil = 0;
806  _finished = false;
807  _fetchedsize = 0;
808  _fetchedgoodsize = 0;
809  _totalsize = 0;
811  _lastperiodfetched = 0;
812  _periodavg = 0;
813  _timeout = 0;
814  _connect_timeout = 0;
815  _maxspeed = 0;
816  _maxworkers = 0;
817  if (blklist)
818  {
819  for (size_t blkno = 0; blkno < blklist->numBlocks(); blkno++)
820  {
821  MediaBlock blk = blklist->getBlock(blkno);
822  _totalsize += blk.size;
823  }
824  }
825  else if (filesize != off_t(-1))
826  _totalsize = filesize;
827 }
828 
830 {
831  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
832  {
833  multifetchworker *worker = *workeriter;
834  *workeriter = NULL;
835  delete worker;
836  }
837  _workers.clear();
838 }
839 
840 void
841 multifetchrequest::run(std::vector<Url> &urllist)
842 {
843  int workerno = 0;
844  std::vector<Url>::iterator urliter = urllist.begin();
845  for (;;)
846  {
847  fd_set rset, wset, xset;
848  int maxfd, nqueue;
849 
850  if (_finished)
851  {
852  XXX << "finished!" << endl;
853  break;
854  }
855 
856  if ((int)_activeworkers < _maxworkers && urliter != urllist.end() && _workers.size() < MAXURLS)
857  {
858  // spawn another worker!
859  multifetchworker *worker = new multifetchworker(workerno++, *this, *urliter);
860  _workers.push_back(worker);
861  if (worker->_state != WORKER_BROKEN)
862  {
863  _activeworkers++;
864  if (worker->_state != WORKER_LOOKUP)
865  {
866  worker->nextjob();
867  }
868  else
869  _lookupworkers++;
870  }
871  ++urliter;
872  continue;
873  }
874  if (!_activeworkers)
875  {
876  WAR << "No more active workers!" << endl;
877  // show the first worker error we find
878  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
879  {
880  if ((*workeriter)->_state != WORKER_BROKEN)
881  continue;
882  ZYPP_THROW(MediaCurlException(_baseurl, "Server error", (*workeriter)->_curlError));
883  }
884  break;
885  }
886 
887  FD_ZERO(&rset);
888  FD_ZERO(&wset);
889  FD_ZERO(&xset);
890 
891  curl_multi_fdset(_multi, &rset, &wset, &xset, &maxfd);
892 
893  if (_lookupworkers)
894  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
895  (*workeriter)->adddnsfd(rset, maxfd);
896 
897  timeval tv;
898  // if we added a new job we have to call multi_perform once
899  // to make it show up in the fd set. do not sleep in this case.
900  tv.tv_sec = 0;
901  tv.tv_usec = _havenewjob ? 0 : 200000;
902  if (_sleepworkers && !_havenewjob)
903  {
904  if (_minsleepuntil == 0)
905  {
906  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
907  {
908  multifetchworker *worker = *workeriter;
909  if (worker->_state != WORKER_SLEEP)
910  continue;
911  if (!_minsleepuntil || _minsleepuntil > worker->_sleepuntil)
912  _minsleepuntil = worker->_sleepuntil;
913  }
914  }
915  double sl = _minsleepuntil - currentTime();
916  if (sl < 0)
917  {
918  sl = 0;
919  _minsleepuntil = 0;
920  }
921  if (sl < .2)
922  tv.tv_usec = sl * 1000000;
923  }
924  int r = select(maxfd + 1, &rset, &wset, &xset, &tv);
925  if (r == -1 && errno != EINTR)
926  ZYPP_THROW(MediaCurlException(_baseurl, "select() failed", "unknown error"));
927  if (r != 0 && _lookupworkers)
928  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
929  {
930  multifetchworker *worker = *workeriter;
931  if (worker->_state != WORKER_LOOKUP)
932  continue;
933  (*workeriter)->dnsevent(rset);
934  if (worker->_state != WORKER_LOOKUP)
935  _lookupworkers--;
936  }
937  _havenewjob = false;
938 
939  // run curl
940  for (;;)
941  {
942  CURLMcode mcode;
943  int tasks;
944  mcode = curl_multi_perform(_multi, &tasks);
945  if (mcode == CURLM_CALL_MULTI_PERFORM)
946  continue;
947  if (mcode != CURLM_OK)
948  ZYPP_THROW(MediaCurlException(_baseurl, "curl_multi_perform", "unknown error"));
949  break;
950  }
951 
952  double now = currentTime();
953 
954  // update periodavg
955  if (now > _lastperiodstart + .5)
956  {
957  if (!_periodavg)
959  else
962  _lastperiodstart = now;
963  }
964 
965  // wake up sleepers
966  if (_sleepworkers)
967  {
968  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
969  {
970  multifetchworker *worker = *workeriter;
971  if (worker->_state != WORKER_SLEEP)
972  continue;
973  if (worker->_sleepuntil > now)
974  continue;
975  if (_minsleepuntil == worker->_sleepuntil)
976  _minsleepuntil = 0;
977  XXX << "#" << worker->_workerno << ": sleep done, wake up" << endl;
978  _sleepworkers--;
979  // nextjob chnages the state
980  worker->nextjob();
981  }
982  }
983 
984  // collect all curl results, reschedule new jobs
985  CURLMsg *msg;
986  while ((msg = curl_multi_info_read(_multi, &nqueue)) != 0)
987  {
988  if (msg->msg != CURLMSG_DONE)
989  continue;
990  CURL *easy = msg->easy_handle;
991  CURLcode cc = msg->data.result;
992  multifetchworker *worker;
993  if (curl_easy_getinfo(easy, CURLINFO_PRIVATE, &worker) != CURLE_OK)
994  ZYPP_THROW(MediaCurlException(_baseurl, "curl_easy_getinfo", "unknown error"));
995  if (worker->_blkreceived && now > worker->_blkstarttime)
996  {
997  if (worker->_avgspeed)
998  worker->_avgspeed = (worker->_avgspeed + worker->_blkreceived / (now - worker->_blkstarttime)) / 2;
999  else
1000  worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
1001  }
1002  XXX << "#" << worker->_workerno << ": BLK " << worker->_blkno << " done code " << cc << " speed " << worker->_avgspeed << endl;
1003  curl_multi_remove_handle(_multi, easy);
1004  if (cc == CURLE_HTTP_RETURNED_ERROR)
1005  {
1006  long statuscode = 0;
1007  (void)curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &statuscode);
1008  XXX << "HTTP status " << statuscode << endl;
1009  if (statuscode == 416 && !_blklist) /* Range error */
1010  {
1011  if (_filesize == off_t(-1))
1012  {
1013  if (!worker->_noendrange)
1014  {
1015  XXX << "#" << worker->_workerno << ": retrying with no end range" << endl;
1016  worker->_noendrange = true;
1017  worker->run();
1018  continue;
1019  }
1020  worker->_noendrange = false;
1021  worker->stealjob();
1022  continue;
1023  }
1024  if (worker->_blkstart >= _filesize)
1025  {
1026  worker->nextjob();
1027  continue;
1028  }
1029  }
1030  }
1031  if (cc == 0)
1032  {
1033  if (!worker->checkChecksum())
1034  {
1035  WAR << "#" << worker->_workerno << ": checksum error, disable worker" << endl;
1036  worker->_state = WORKER_BROKEN;
1037  strncpy(worker->_curlError, "checksum error", CURL_ERROR_SIZE);
1038  _activeworkers--;
1039  continue;
1040  }
1041  if (worker->_state == WORKER_FETCH)
1042  {
1043  if (worker->_competing)
1044  {
1045  worker->disableCompetition();
1046  // multiple workers wrote into this block. We already know that our
1047  // data was correct, but maybe some other worker overwrote our data
1048  // with something broken. Thus we have to re-check the block.
1049  if (!worker->recheckChecksum())
1050  {
1051  XXX << "#" << worker->_workerno << ": recheck checksum error, refetch block" << endl;
1052  // re-fetch! No need to worry about the bad workers,
1053  // they will now be set to DISCARD. At the end of their block
1054  // they will notice that they wrote bad data and go into BROKEN.
1055  worker->run();
1056  continue;
1057  }
1058  }
1059  _fetchedgoodsize += worker->_blksize;
1060  }
1061 
1062  // make bad workers sleep a little
1063  double maxavg = 0;
1064  int maxworkerno = 0;
1065  int numbetter = 0;
1066  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1067  {
1068  multifetchworker *oworker = *workeriter;
1069  if (oworker->_state == WORKER_BROKEN)
1070  continue;
1071  if (oworker->_avgspeed > maxavg)
1072  {
1073  maxavg = oworker->_avgspeed;
1074  maxworkerno = oworker->_workerno;
1075  }
1076  if (oworker->_avgspeed > worker->_avgspeed)
1077  numbetter++;
1078  }
1079  if (maxavg && !_stealing)
1080  {
1081  double ratio = worker->_avgspeed / maxavg;
1082  ratio = 1 - ratio;
1083  if (numbetter < 3) // don't sleep that much if we're in the top two
1084  ratio = ratio * ratio;
1085  if (ratio > .01)
1086  {
1087  XXX << "#" << worker->_workerno << ": too slow ("<< ratio << ", " << worker->_avgspeed << ", #" << maxworkerno << ": " << maxavg << "), going to sleep for " << ratio * 1000 << " ms" << endl;
1088  worker->_sleepuntil = now + ratio;
1089  worker->_state = WORKER_SLEEP;
1090  _sleepworkers++;
1091  continue;
1092  }
1093  }
1094 
1095  // do rate control (if requested)
1096  // should use periodavg, but that's not what libcurl does
1097  if (_maxspeed && now > _starttime)
1098  {
1099  double avg = _fetchedsize / (now - _starttime);
1100  avg = worker->_maxspeed * _maxspeed / avg;
1101  if (avg < _maxspeed / _maxworkers)
1102  avg = _maxspeed / _maxworkers;
1103  if (avg > _maxspeed)
1104  avg = _maxspeed;
1105  if (avg < 1024)
1106  avg = 1024;
1107  worker->_maxspeed = avg;
1108 #if CURLVERSION_AT_LEAST(7,15,5)
1109  curl_easy_setopt(worker->_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)(avg));
1110 #endif
1111  }
1112 
1113  worker->nextjob();
1114  }
1115  else
1116  {
1117  worker->_state = WORKER_BROKEN;
1118  _activeworkers--;
1119  if (!_activeworkers && !(urliter != urllist.end() && _workers.size() < MAXURLS))
1120  {
1121  // end of workers reached! goodbye!
1122  worker->evaluateCurlCode(Pathname(), cc, false);
1123  }
1124  }
1125 
1126  if ( _filesize > 0 && _fetchedgoodsize > _filesize ) {
1127  ZYPP_THROW(MediaFileSizeExceededException(_baseurl, _filesize));
1128  }
1129  }
1130 
1131  // send report
1132  if (_report)
1133  {
1134  int percent = _totalsize ? (100 * (_fetchedgoodsize + _fetchedsize)) / (_totalsize + _fetchedsize) : 0;
1135 
1136  double avg = 0;
1137  if (now > _starttime)
1138  avg = _fetchedsize / (now - _starttime);
1139  if (!(*(_report))->progress(percent, _baseurl, avg, _lastperiodstart == _starttime ? avg : _periodavg))
1140  ZYPP_THROW(MediaCurlException(_baseurl, "User abort", "cancelled"));
1141  }
1142 
1143  if (_timeout && now - _lastprogress > _timeout)
1144  break;
1145  }
1146 
1147  if (!_finished)
1148  ZYPP_THROW(MediaTimeoutException(_baseurl));
1149 
1150  // print some download stats
1151  WAR << "overall result" << endl;
1152  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1153  {
1154  multifetchworker *worker = *workeriter;
1155  WAR << "#" << worker->_workerno << ": state: " << worker->_state << " received: " << worker->_received << " url: " << worker->_url << endl;
1156  }
1157 }
1158 
1159 
1161 
1162 
1163 MediaMultiCurl::MediaMultiCurl(const Url &url_r, const Pathname & attach_point_hint_r)
1164  : MediaCurl(url_r, attach_point_hint_r)
1165 {
1166  MIL << "MediaMultiCurl::MediaMultiCurl(" << url_r << ", " << attach_point_hint_r << ")" << endl;
1167  _multi = 0;
1169 }
1170 
1172 {
1174  {
1175  curl_slist_free_all(_customHeadersMetalink);
1177  }
1178  if (_multi)
1179  {
1180  curl_multi_cleanup(_multi);
1181  _multi = 0;
1182  }
1183  std::map<std::string, CURL *>::iterator it;
1184  for (it = _easypool.begin(); it != _easypool.end(); it++)
1185  {
1186  CURL *easy = it->second;
1187  if (easy)
1188  {
1189  curl_easy_cleanup(easy);
1190  it->second = NULL;
1191  }
1192  }
1193 }
1194 
1196 {
1198 
1200  {
1201  curl_slist_free_all(_customHeadersMetalink);
1203  }
1204  struct curl_slist *sl = _customHeaders;
1205  for (; sl; sl = sl->next)
1206  _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, sl->data);
1207  _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, "Accept: */*, application/metalink+xml, application/metalink4+xml");
1208 }
1209 
1210 static bool looks_like_metalink_fd(int fd)
1211 {
1212  char buf[256], *p;
1213  int l;
1214  while ((l = pread(fd, buf, sizeof(buf) - 1, (off_t)0)) == -1 && errno == EINTR)
1215  ;
1216  if (l == -1)
1217  return 0;
1218  buf[l] = 0;
1219  p = buf;
1220  while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1221  p++;
1222  if (!strncasecmp(p, "<?xml", 5))
1223  {
1224  while (*p && *p != '>')
1225  p++;
1226  if (*p == '>')
1227  p++;
1228  while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1229  p++;
1230  }
1231  bool ret = !strncasecmp(p, "<metalink", 9) ? true : false;
1232  return ret;
1233 }
1234 
1235 static bool looks_like_metalink(const Pathname & file)
1236 {
1237  int fd;
1238  if ((fd = open(file.asString().c_str(), O_RDONLY|O_CLOEXEC)) == -1)
1239  return false;
1240  bool ret = looks_like_metalink_fd(fd);
1241  close(fd);
1242  DBG << "looks_like_metalink(" << file << "): " << ret << endl;
1243  return ret;
1244 }
1245 
1246 // here we try to suppress all progress coming from a metalink download
1247 // bsc#1021291: Nevertheless send alive trigger (without stats), so UIs
1248 // are able to abort a hanging metalink download via callback response.
1249 int MediaMultiCurl::progressCallback( void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
1250 {
1251  CURL *_curl = MediaCurl::progressCallback_getcurl(clientp);
1252  if (!_curl)
1253  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1254 
1255  // bsc#408814: Don't report any sizes before we don't have data on disk. Data reported
1256  // due to redirection etc. are not interesting, but may disturb filesize checks.
1257  FILE *fp = 0;
1258  if ( curl_easy_getinfo( _curl, CURLINFO_PRIVATE, &fp ) != CURLE_OK || !fp )
1259  return MediaCurl::aliveCallback( clientp, dltotal, dlnow, ultotal, ulnow );
1260  if ( ftell( fp ) == 0 )
1261  return MediaCurl::aliveCallback( clientp, dltotal, 0.0, ultotal, ulnow );
1262 
1263  // (no longer needed due to the filesize check above?)
1264  // work around curl bug that gives us old data
1265  long httpReturnCode = 0;
1266  if (curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode ) != CURLE_OK || httpReturnCode == 0)
1267  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1268 
1269  char *ptr = NULL;
1270  bool ismetalink = false;
1271  if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1272  {
1273  std::string ct = std::string(ptr);
1274  if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1275  ismetalink = true;
1276  }
1277  if (!ismetalink && dlnow < 256)
1278  {
1279  // can't tell yet, ...
1280  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1281  }
1282  if (!ismetalink)
1283  {
1284  fflush(fp);
1285  ismetalink = looks_like_metalink_fd(fileno(fp));
1286  DBG << "looks_like_metalink_fd: " << ismetalink << endl;
1287  }
1288  if (ismetalink)
1289  {
1290  // this is a metalink file change the expected filesize
1292  // we're downloading the metalink file. Just trigger aliveCallbacks
1293  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::aliveCallback);
1294  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1295  }
1296  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::progressCallback);
1297  return MediaCurl::progressCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1298 }
1299 
1300 void MediaMultiCurl::doGetFileCopy( const OnMediaLocation &srcFile , const Pathname & target, callback::SendReport<DownloadProgressReport> & report, RequestOptions options ) const
1301 {
1302  Pathname dest = target.absolutename();
1303  if( assert_dir( dest.dirname() ) )
1304  {
1305  DBG << "assert_dir " << dest.dirname() << " failed" << endl;
1306  ZYPP_THROW( MediaSystemException(getFileUrl(srcFile.filename()), "System error on " + dest.dirname().asString()) );
1307  }
1308 
1309  ManagedFile destNew { target.extend( ".new.zypp.XXXXXX" ) };
1310  AutoFILE file;
1311  {
1312  AutoFREE<char> buf { ::strdup( (*destNew).c_str() ) };
1313  if( ! buf )
1314  {
1315  ERR << "out of memory for temp file name" << endl;
1316  ZYPP_THROW(MediaSystemException(getFileUrl(srcFile.filename()), "out of memory for temp file name"));
1317  }
1318 
1319  AutoFD tmp_fd { ::mkostemp( buf, O_CLOEXEC ) };
1320  if( tmp_fd == -1 )
1321  {
1322  ERR << "mkstemp failed for file '" << destNew << "'" << endl;
1323  ZYPP_THROW(MediaWriteException(destNew));
1324  }
1325  destNew = ManagedFile( (*buf), filesystem::unlink );
1326 
1327  file = ::fdopen( tmp_fd, "we" );
1328  if ( ! file )
1329  {
1330  ERR << "fopen failed for file '" << destNew << "'" << endl;
1331  ZYPP_THROW(MediaWriteException(destNew));
1332  }
1333  tmp_fd.resetDispose(); // don't close it here! ::fdopen moved ownership to file
1334  }
1335 
1336  DBG << "dest: " << dest << endl;
1337  DBG << "temp: " << destNew << endl;
1338 
1339  // set IFMODSINCE time condition (no download if not modified)
1340  if( PathInfo(target).isExist() && !(options & OPTION_NO_IFMODSINCE) )
1341  {
1342  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
1343  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, (long)PathInfo(target).mtime());
1344  }
1345  else
1346  {
1347  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1348  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1349  }
1350  // change header to include Accept: metalink
1351  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeadersMetalink);
1352  // change to our own progress funcion
1353  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &progressCallback);
1354  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (*file) ); // important to pass the FILE* explicitly (passing through varargs)
1355  try
1356  {
1357  MediaCurl::doGetFileCopyFile( srcFile, dest, file, report, options );
1358  }
1359  catch (Exception &ex)
1360  {
1361  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1362  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1363  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1364  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1365  ZYPP_RETHROW(ex);
1366  }
1367  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1368  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1369  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1370  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1371  long httpReturnCode = 0;
1372  CURLcode infoRet = curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode);
1373  if (infoRet == CURLE_OK)
1374  {
1375  DBG << "HTTP response: " + str::numstring(httpReturnCode) << endl;
1376  if ( httpReturnCode == 304
1377  || ( httpReturnCode == 213 && _url.getScheme() == "ftp" ) ) // not modified
1378  {
1379  DBG << "not modified: " << PathInfo(dest) << endl;
1380  return;
1381  }
1382  }
1383  else
1384  {
1385  WAR << "Could not get the response code." << endl;
1386  }
1387 
1388  bool ismetalink = false;
1389 
1390  char *ptr = NULL;
1391  if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1392  {
1393  std::string ct = std::string(ptr);
1394  if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1395  ismetalink = true;
1396  }
1397 
1398  if (!ismetalink)
1399  {
1400  // some proxies do not store the content type, so also look at the file to find
1401  // out if we received a metalink (bnc#649925)
1402  fflush(file);
1403  if (looks_like_metalink(destNew))
1404  ismetalink = true;
1405  }
1406 
1407  if (ismetalink)
1408  {
1409  bool userabort = false;
1410  Pathname failedFile = ZConfig::instance().repoCachePath() / "MultiCurl.failed";
1411  file = nullptr; // explicitly close destNew before the parser reads it.
1412  try
1413  {
1414  MetaLinkParser mlp;
1415  mlp.parse(destNew);
1416  MediaBlockList bl = mlp.getBlockList();
1417 
1418  /*
1419  * gihub issue libzipp:#277 Multicurl backend breaks with MirrorCache and Metalink with unknown filesize.
1420  * Fall back to a normal download if we have no knowledge about the filesize we want to download.
1421  */
1422  if ( !bl.haveFilesize() && ! srcFile.downloadSize() ) {
1423  XXX << "No filesize in metalink file and no expected filesize, aborting multicurl." << std::endl;
1424  ZYPP_THROW( MediaException("Multicurl requires filesize but none was provided.") );
1425  }
1426 
1427  std::vector<Url> urls = mlp.getUrls();
1428  /*
1429  * bsc#1191609 In certain locations we do not receive a suitable number of metalink mirrors, and might even
1430  * download chunks serially from one and the same server. In those cases we need to fall back to a normal download.
1431  */
1432  if ( urls.size() < MIN_REQ_MIRRS ) {
1433  ZYPP_THROW( MediaException("Multicurl enabled but not enough mirrors provided") );
1434  }
1435 
1436  XXX << bl << endl;
1437  file = fopen((*destNew).c_str(), "w+e");
1438  if (!file)
1439  ZYPP_THROW(MediaWriteException(destNew));
1440  if (PathInfo(target).isExist())
1441  {
1442  XXX << "reusing blocks from file " << target << endl;
1443  bl.reuseBlocks(file, target.asString());
1444  XXX << bl << endl;
1445  }
1446  if (bl.haveChecksum(1) && PathInfo(failedFile).isExist())
1447  {
1448  XXX << "reusing blocks from file " << failedFile << endl;
1449  bl.reuseBlocks(file, failedFile.asString());
1450  XXX << bl << endl;
1451  filesystem::unlink(failedFile);
1452  }
1453  Pathname df = srcFile.deltafile();
1454  if (!df.empty())
1455  {
1456  XXX << "reusing blocks from file " << df << endl;
1457  bl.reuseBlocks(file, df.asString());
1458  XXX << bl << endl;
1459  }
1460  try
1461  {
1462  multifetch(srcFile.filename(), file, &urls, &report, &bl, srcFile.downloadSize());
1463  }
1464  catch (MediaCurlException &ex)
1465  {
1466  userabort = ex.errstr() == "User abort";
1467  ZYPP_RETHROW(ex);
1468  }
1469  }
1470  catch (MediaFileSizeExceededException &ex) {
1471  ZYPP_RETHROW(ex);
1472  }
1473  catch (Exception &ex)
1474  {
1475  // something went wrong. fall back to normal download
1476  file = nullptr; // explicitly close destNew before moving it
1477  if (PathInfo(destNew).size() >= 63336)
1478  {
1479  ::unlink(failedFile.asString().c_str());
1480  filesystem::hardlinkCopy(destNew, failedFile);
1481  }
1482  if (userabort)
1483  {
1484  ZYPP_RETHROW(ex);
1485  }
1486  file = fopen((*destNew).c_str(), "w+e");
1487  if (!file)
1488  ZYPP_THROW(MediaWriteException(destNew));
1489  MediaCurl::doGetFileCopyFile(srcFile, dest, file, report, options | OPTION_NO_REPORT_START);
1490  }
1491  }
1492 
1493  if (::fchmod( ::fileno(file), filesystem::applyUmaskTo( 0644 )))
1494  {
1495  ERR << "Failed to chmod file " << destNew << endl;
1496  }
1497 
1498  file.resetDispose(); // we're going to close it manually here
1499  if (::fclose(file))
1500  {
1501  filesystem::unlink(destNew);
1502  ERR << "Fclose failed for file '" << destNew << "'" << endl;
1503  ZYPP_THROW(MediaWriteException(destNew));
1504  }
1505 
1506  if ( rename( destNew, dest ) != 0 )
1507  {
1508  ERR << "Rename failed" << endl;
1509  ZYPP_THROW(MediaWriteException(dest));
1510  }
1511  destNew.resetDispose(); // no more need to unlink it
1512 
1513  DBG << "done: " << PathInfo(dest) << endl;
1514 }
1515 
1516 void MediaMultiCurl::multifetch(const Pathname & filename, FILE *fp, std::vector<Url> *urllist, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) const
1517 {
1518  Url baseurl(getFileUrl(filename));
1519  if (blklist && filesize == off_t(-1) && blklist->haveFilesize())
1520  filesize = blklist->getFilesize();
1521  if (blklist && !blklist->haveBlocks() && filesize != 0)
1522  blklist = 0;
1523  if (blklist && (filesize == 0 || !blklist->numBlocks()))
1524  {
1525  checkFileDigest(baseurl, fp, blklist);
1526  return;
1527  }
1528  if (filesize == 0)
1529  return;
1530  if (!_multi)
1531  {
1532  _multi = curl_multi_init();
1533  if (!_multi)
1534  ZYPP_THROW(MediaCurlInitException(baseurl));
1535  }
1536 
1537  multifetchrequest req(this, filename, baseurl, _multi, fp, report, blklist, filesize);
1538  req._timeout = _settings.timeout();
1539  req._connect_timeout = _settings.connectTimeout();
1540  req._maxspeed = _settings.maxDownloadSpeed();
1541  req._maxworkers = _settings.maxConcurrentConnections();
1542  if (req._maxworkers > MAXURLS)
1543  req._maxworkers = MAXURLS;
1544  if (req._maxworkers <= 0)
1545  req._maxworkers = 1;
1546  std::vector<Url> myurllist;
1547  for (std::vector<Url>::iterator urliter = urllist->begin(); urliter != urllist->end(); ++urliter)
1548  {
1549  try
1550  {
1551  std::string scheme = urliter->getScheme();
1552  if (scheme == "http" || scheme == "https" || scheme == "ftp" || scheme == "tftp")
1553  {
1554  checkProtocol(*urliter);
1555  myurllist.push_back(internal::propagateQueryParams(*urliter, _url));
1556  }
1557  }
1558  catch (...)
1559  {
1560  }
1561  }
1562  if (!myurllist.size())
1563  myurllist.push_back(baseurl);
1564  req.run(myurllist);
1565  checkFileDigest(baseurl, fp, blklist);
1566 }
1567 
1568 void MediaMultiCurl::checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
1569 {
1570  if (!blklist || !blklist->haveFileChecksum())
1571  return;
1572  if (fseeko(fp, off_t(0), SEEK_SET))
1573  ZYPP_THROW(MediaCurlException(url, "fseeko", "seek error"));
1574  Digest dig;
1575  blklist->createFileDigest(dig);
1576  char buf[4096];
1577  size_t l;
1578  while ((l = fread(buf, 1, sizeof(buf), fp)) > 0)
1579  dig.update(buf, l);
1580  if (!blklist->verifyFileDigest(dig))
1581  ZYPP_THROW(MediaCurlException(url, "file verification failed", "checksum error"));
1582 }
1583 
1584 bool MediaMultiCurl::isDNSok(const std::string &host) const
1585 {
1586  return _dnsok.find(host) == _dnsok.end() ? false : true;
1587 }
1588 
1589 void MediaMultiCurl::setDNSok(const std::string &host) const
1590 {
1591  _dnsok.insert(host);
1592 }
1593 
1594 CURL *MediaMultiCurl::fromEasyPool(const std::string &host) const
1595 {
1596  if (_easypool.find(host) == _easypool.end())
1597  return 0;
1598  CURL *ret = _easypool[host];
1599  _easypool.erase(host);
1600  return ret;
1601 }
1602 
1603 void MediaMultiCurl::toEasyPool(const std::string &host, CURL *easy) const
1604 {
1605  CURL *oldeasy = _easypool[host];
1606  _easypool[host] = easy;
1607  if (oldeasy)
1608  curl_easy_cleanup(oldeasy);
1609 }
1610 
1611  } // namespace media
1612 } // namespace zypp
1613 
std::optional< KeyManagerCtx > _context
Definition: KeyRing.cc:157
#define WORKER_STARTING
#define WORKER_DISCARD
#define WORKER_DONE
#define WORKER_LOOKUP
#define WORKER_FETCH
#define WORKER_SLEEP
#define WORKER_BROKEN
AutoDispose< void * > _state
Reference counted access to a Tp object calling a custom Dispose function when the last AutoDispose h...
Definition: AutoDispose.h:94
void resetDispose()
Set no dispose function.
Definition: AutoDispose.h:176
Store and operate with byte count.
Definition: ByteCount.h:31
static const Unit MB
1000^2 Byte
Definition: ByteCount.h:60
std::string asString(unsigned field_width_r=0, unsigned unit_width_r=1) const
Auto selected Unit and precision.
Definition: ByteCount.h:133
Compute Message Digests (MD5, SHA1 etc)
Definition: Digest.h:36
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm
Definition: Digest.cc:248
Base class for Exception.
Definition: Exception.h:146
Describes a resource file located on a medium.
const ByteCount & downloadSize() const
The size of the resource on the server.
const Pathname & filename() const
The path to the resource on the medium.
const Pathname & deltafile() const
The existing deltafile that can be used to reduce download size ( zchunk or metalink )
Url manipulation class.
Definition: Url.h:92
std::string getScheme() const
Returns the scheme name of the URL.
Definition: Url.cc:533
std::string asString() const
Returns a default string representation of the Url object.
Definition: Url.cc:497
std::string getHost(EEncoding eflag=zypp::url::E_DECODED) const
Returns the hostname or IP from the URL authority.
Definition: Url.cc:588
Pathname repoCachePath() const
Path where the caches are kept (/var/cache/zypp)
Definition: ZConfig.cc:940
static ZConfig & instance()
Singleton ctor.
Definition: ZConfig.cc:823
Wrapper class for ::stat/::lstat.
Definition: PathInfo.h:221
Pathname extend(const std::string &r) const
Append string r to the last component of the path.
Definition: Pathname.h:170
Pathname dirname() const
Return all but the last component od this path.
Definition: Pathname.h:124
const std::string & asString() const
String representation.
Definition: Pathname.h:91
Pathname absolutename() const
Return this path, adding a leading '/' if relative.
Definition: Pathname.h:139
Implementation class for FTP, HTTP and HTTPS MediaHandler.
Definition: MediaCurl.h:32
virtual void setupEasy()
initializes the curl easy handle with the data from the url
Definition: MediaCurl.cc:308
Url getFileUrl(const Pathname &filename) const
concatenate the attach url and the filename to a complete download url
Definition: MediaCurl.cc:633
@ OPTION_NO_IFMODSINCE
to not add a IFMODSINCE header if target exists
Definition: MediaCurl.h:43
@ OPTION_NO_REPORT_START
do not send a start ProgressReport
Definition: MediaCurl.h:45
static void resetExpectedFileSize(void *clientp, const ByteCount &expectedFileSize)
MediaMultiCurl needs to reset the expected filesize in case a metalink file is downloaded otherwise t...
Definition: MediaCurl.cc:1388
static int aliveCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Definition: MediaCurl.cc:1331
static int progressCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Callback reporting download progress.
Definition: MediaCurl.cc:1345
Url clearQueryString(const Url &url) const
Definition: MediaCurl.cc:271
char _curlError[CURL_ERROR_SIZE]
Definition: MediaCurl.h:174
void doGetFileCopyFile(const OnMediaLocation &srcFile, const Pathname &dest, FILE *file, callback::SendReport< DownloadProgressReport > &report, RequestOptions options=OPTION_NONE) const
Definition: MediaCurl.cc:1175
void checkProtocol(const Url &url) const
check the url is supported by the curl library
Definition: MediaCurl.cc:283
void evaluateCurlCode(const zypp::Pathname &filename, CURLcode code, bool timeout) const
Evaluates a curl return code and throws the right MediaException filename Filename being downloaded c...
Definition: MediaCurl.cc:738
static CURL * progressCallback_getcurl(void *clientp)
Definition: MediaCurl.cc:1361
virtual void disconnectFrom() override
Definition: MediaCurl.cc:611
curl_slist * _customHeaders
Definition: MediaCurl.h:175
Url url() const
Url used.
Definition: MediaHandler.h:503
const Url _url
Url to handle.
Definition: MediaHandler.h:113
virtual void setupEasy() override
initializes the curl easy handle with the data from the url
std::map< std::string, CURL * > _easypool
curl_slist * _customHeadersMetalink
void multifetch(const Pathname &filename, FILE *fp, std::vector< Url > *urllist, callback::SendReport< DownloadProgressReport > *report=0, MediaBlockList *blklist=0, off_t filesize=off_t(-1)) const
static int progressCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
void setDNSok(const std::string &host) const
MediaMultiCurl(const Url &url_r, const Pathname &attach_point_hint_r)
std::set< std::string > _dnsok
bool isDNSok(const std::string &host) const
CURL * fromEasyPool(const std::string &host) const
virtual void doGetFileCopy(const OnMediaLocation &srcFile, const Pathname &targetFilename, callback::SendReport< DownloadProgressReport > &_report, RequestOptions options=OPTION_NONE) const override
void checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
void toEasyPool(const std::string &host, CURL *easy) const
callback::SendReport< DownloadProgressReport > * _report
void run(std::vector< Url > &urllist)
std::list< multifetchworker * > _workers
multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport< DownloadProgressReport > *report, MediaBlockList *blklist, off_t filesize)
const MediaMultiCurl * _context
size_t headerfunction(char *ptr, size_t size)
multifetchrequest * _request
size_t writefunction(void *ptr, size_t size)
static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
void adddnsfd(fd_set &rset, int &maxfd)
mode_t applyUmaskTo(mode_t mode_r)
Modify mode_r according to the current umask ( mode_r & ~getUmask() ).
Definition: PathInfo.h:789
ByteCount df(const Pathname &path_r)
Report free disk space on a mounted file system.
Definition: PathInfo.cc:1155
int unlink(const Pathname &path)
Like 'unlink'.
Definition: PathInfo.cc:700
int rename(const Pathname &oldpath, const Pathname &newpath)
Like 'rename'.
Definition: PathInfo.cc:742
int assert_dir(const Pathname &path, unsigned mode)
Like 'mkdir -p'.
Definition: PathInfo.cc:319
int hardlinkCopy(const Pathname &oldpath, const Pathname &newpath)
Create newpath as hardlink or copy of oldpath.
Definition: PathInfo.cc:883
static bool looks_like_metalink(const Pathname &file)
static bool looks_like_metalink_fd(int fd)
constexpr auto MAXURLS
static bool env_isset(std::string name)
static double currentTime()
constexpr auto BLKSIZE
constexpr auto MIN_REQ_MIRRS
std::string numstring(char n, int w=0)
Definition: String.h:289
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:2
AutoDispose< const Pathname > ManagedFile
A Pathname plus associated cleanup code to be executed when path is no longer needed.
Definition: ManagedFile.h:27
AutoDispose<int> calling ::close
Definition: AutoDispose.h:297
AutoDispose<FILE*> calling ::fclose
Definition: AutoDispose.h:308
#define ZYPP_RETHROW(EXCPT)
Drops a logline and rethrows, updating the CodeLocation.
Definition: Exception.h:430
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:418
#define DBG
Definition: Logger.h:95
#define MIL
Definition: Logger.h:96
#define ERR
Definition: Logger.h:98
#define WAR
Definition: Logger.h:97
#define XXX
Definition: Logger.h:94