libzypp  17.14.0
MediaMultiCurl.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
13 #include <ctype.h>
14 #include <sys/types.h>
15 #include <signal.h>
16 #include <sys/wait.h>
17 #include <netdb.h>
18 #include <arpa/inet.h>
19 
20 #include <vector>
21 #include <iostream>
22 #include <algorithm>
23 
24 
25 #include "zypp/ZConfig.h"
26 #include "zypp/base/Logger.h"
29 #include "zypp/ManagedFile.h"
30 
31 using namespace std;
32 using namespace zypp::base;
33 
34 #undef CURLVERSION_AT_LEAST
35 #define CURLVERSION_AT_LEAST(M,N,O) LIBCURL_VERSION_NUM >= ((((M)<<8)+(N))<<8)+(O)
36 
37 namespace zypp {
38  namespace media {
39 
40 
42 
43 
44 class multifetchrequest;
45 
46 // Hack: we derive from MediaCurl just to get the storage space for
47 // settings, url, curlerrors and the like
48 
50  friend class multifetchrequest;
51 
52 public:
53  multifetchworker(int no, multifetchrequest &request, const Url &url);
55  void nextjob();
56  void run();
57  bool checkChecksum();
58  bool recheckChecksum();
59  void disableCompetition();
60 
61  void checkdns();
62  void adddnsfd(fd_set &rset, int &maxfd);
63  void dnsevent(fd_set &rset);
64 
65  int _workerno;
66 
67  int _state;
68  bool _competing;
69 
70  size_t _blkno;
71  off_t _blkstart;
72  size_t _blksize;
74 
75  double _blkstarttime;
76  size_t _blkreceived;
77  off_t _received;
78 
79  double _avgspeed;
80  double _maxspeed;
81 
82  double _sleepuntil;
83 
84 private:
85  void stealjob();
86 
87  size_t writefunction(void *ptr, size_t size);
88  static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream);
89 
90  size_t headerfunction(char *ptr, size_t size);
91  static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream);
92 
94  int _pass;
95  string _urlbuf;
96  off_t _off;
97  size_t _size;
99 
100  pid_t _pid;
101  int _dnspipe;
102 };
103 
104 #define WORKER_STARTING 0
105 #define WORKER_LOOKUP 1
106 #define WORKER_FETCH 2
107 #define WORKER_DISCARD 3
108 #define WORKER_DONE 4
109 #define WORKER_SLEEP 5
110 #define WORKER_BROKEN 6
111 
112 
113 
115 public:
116  multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize);
118 
119  void run(std::vector<Url> &urllist);
120 
121 protected:
122  friend class multifetchworker;
123 
127 
128  FILE *_fp;
131  off_t _filesize;
132 
133  CURLM *_multi;
134 
135  std::list<multifetchworker *> _workers;
136  bool _stealing;
138 
139  size_t _blkno;
140  off_t _blkoff;
145  bool _finished;
146  off_t _totalsize;
149 
150  double _starttime;
152 
155  double _periodavg;
156 
157 public:
158  double _timeout;
160  double _maxspeed;
162 };
163 
164 #define BLKSIZE 131072
165 #define MAXURLS 10
166 
167 
169 
170 static double
172 {
173  struct timeval tv;
174  if (gettimeofday(&tv, NULL))
175  return 0;
176  return tv.tv_sec + tv.tv_usec / 1000000.;
177 }
178 
179 size_t
180 multifetchworker::writefunction(void *ptr, size_t size)
181 {
182  size_t len, cnt;
183  if (_state == WORKER_BROKEN)
184  return size ? 0 : 1;
185 
186  double now = currentTime();
187 
188  len = size > _size ? _size : size;
189  if (!len)
190  {
191  // kill this job?
192  return size;
193  }
194 
195  if (_blkstart && _off == _blkstart)
196  {
197  // make sure that the server replied with "partial content"
198  // for http requests
199  char *effurl;
200  (void)curl_easy_getinfo(_curl, CURLINFO_EFFECTIVE_URL, &effurl);
201  if (effurl && !strncasecmp(effurl, "http", 4))
202  {
203  long statuscode = 0;
204  (void)curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statuscode);
205  if (statuscode != 206)
206  return size ? 0 : 1;
207  }
208  }
209 
210  _blkreceived += len;
211  _received += len;
212 
213  _request->_lastprogress = now;
214 
215  if (_state == WORKER_DISCARD || !_request->_fp)
216  {
217  // block is no longer needed
218  // still calculate the checksum so that we can throw out bad servers
219  if (_request->_blklist)
220  _dig.update((const char *)ptr, len);
221  _off += len;
222  _size -= len;
223  return size;
224  }
225  if (fseeko(_request->_fp, _off, SEEK_SET))
226  return size ? 0 : 1;
227  cnt = fwrite(ptr, 1, len, _request->_fp);
228  if (cnt > 0)
229  {
230  _request->_fetchedsize += cnt;
231  if (_request->_blklist)
232  _dig.update((const char *)ptr, cnt);
233  _off += cnt;
234  _size -= cnt;
235  if (cnt == len)
236  return size;
237  }
238  return cnt;
239 }
240 
241 size_t
242 multifetchworker::_writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
243 {
244  multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
245  return me->writefunction(ptr, size * nmemb);
246 }
247 
248 size_t
249 multifetchworker::headerfunction(char *p, size_t size)
250 {
251  size_t l = size;
252  if (l > 9 && !strncasecmp(p, "Location:", 9))
253  {
254  string line(p + 9, l - 9);
255  if (line[l - 10] == '\r')
256  line.erase(l - 10, 1);
257  XXX << "#" << _workerno << ": redirecting to" << line << endl;
258  return size;
259  }
260  if (l <= 14 || l >= 128 || strncasecmp(p, "Content-Range:", 14) != 0)
261  return size;
262  p += 14;
263  l -= 14;
264  while (l && (*p == ' ' || *p == '\t'))
265  p++, l--;
266  if (l < 6 || strncasecmp(p, "bytes", 5))
267  return size;
268  p += 5;
269  l -= 5;
270  char buf[128];
271  memcpy(buf, p, l);
272  buf[l] = 0;
273  unsigned long long start, off, filesize;
274  if (sscanf(buf, "%llu-%llu/%llu", &start, &off, &filesize) != 3)
275  return size;
276  if (_request->_filesize == (off_t)-1)
277  {
278  WAR << "#" << _workerno << ": setting request filesize to " << filesize << endl;
279  _request->_filesize = filesize;
280  if (_request->_totalsize == 0 && !_request->_blklist)
281  _request->_totalsize = filesize;
282  }
283  if (_request->_filesize != (off_t)filesize)
284  {
285  XXX << "#" << _workerno << ": filesize mismatch" << endl;
287  strncpy(_curlError, "filesize mismatch", CURL_ERROR_SIZE);
288  }
289  return size;
290 }
291 
292 size_t
293 multifetchworker::_headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
294 {
295  multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
296  return me->headerfunction((char *)ptr, size * nmemb);
297 }
298 
299 multifetchworker::multifetchworker(int no, multifetchrequest &request, const Url &url)
300 : MediaCurl(url, Pathname())
301 {
302  _workerno = no;
303  _request = &request;
305  _competing = false;
306  _off = _blkstart = 0;
307  _size = _blksize = 0;
308  _pass = 0;
309  _blkno = 0;
310  _pid = 0;
311  _dnspipe = -1;
312  _blkreceived = 0;
313  _received = 0;
314  _blkstarttime = 0;
315  _avgspeed = 0;
316  _sleepuntil = 0;
318  _noendrange = false;
319 
320  Url curlUrl( clearQueryString(url) );
321  _urlbuf = curlUrl.asString();
323  if (_curl)
324  XXX << "reused worker from pool" << endl;
325  if (!_curl && !(_curl = curl_easy_init()))
326  {
328  strncpy(_curlError, "curl_easy_init failed", CURL_ERROR_SIZE);
329  return;
330  }
331  try
332  {
333  setupEasy();
334  }
335  catch (Exception &ex)
336  {
337  curl_easy_cleanup(_curl);
338  _curl = 0;
340  strncpy(_curlError, "curl_easy_setopt failed", CURL_ERROR_SIZE);
341  return;
342  }
343  curl_easy_setopt(_curl, CURLOPT_PRIVATE, this);
344  curl_easy_setopt(_curl, CURLOPT_URL, _urlbuf.c_str());
345  curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, &_writefunction);
346  curl_easy_setopt(_curl, CURLOPT_WRITEDATA, this);
347  if (_request->_filesize == off_t(-1) || !_request->_blklist || !_request->_blklist->haveChecksum(0))
348  {
349  curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, &_headerfunction);
350  curl_easy_setopt(_curl, CURLOPT_HEADERDATA, this);
351  }
352  // if this is the same host copy authorization
353  // (the host check is also what curl does when doing a redirect)
354  // (note also that unauthorized exceptions are thrown with the request host)
355  if (url.getHost() == _request->_context->_url.getHost())
356  {
360  if ( _settings.userPassword().size() )
361  {
362  curl_easy_setopt(_curl, CURLOPT_USERPWD, _settings.userPassword().c_str());
363  string use_auth = _settings.authType();
364  if (use_auth.empty())
365  use_auth = "digest,basic"; // our default
366  long auth = CurlAuthData::auth_type_str2long(use_auth);
367  if( auth != CURLAUTH_NONE)
368  {
369  XXX << "#" << _workerno << ": Enabling HTTP authentication methods: " << use_auth
370  << " (CURLOPT_HTTPAUTH=" << auth << ")" << std::endl;
371  curl_easy_setopt(_curl, CURLOPT_HTTPAUTH, auth);
372  }
373  }
374  }
375  checkdns();
376 }
377 
379 {
380  if (_curl)
381  {
383  curl_multi_remove_handle(_request->_multi, _curl);
384  if (_state == WORKER_DONE || _state == WORKER_SLEEP)
385  {
386 #if CURLVERSION_AT_LEAST(7,15,5)
387  curl_easy_setopt(_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)0);
388 #endif
389  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
390  curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, (void *)0);
391  curl_easy_setopt(_curl, CURLOPT_WRITEDATA, (void *)0);
392  curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, (void *)0);
393  curl_easy_setopt(_curl, CURLOPT_HEADERDATA, (void *)0);
395  }
396  else
397  curl_easy_cleanup(_curl);
398  _curl = 0;
399  }
400  if (_pid)
401  {
402  kill(_pid, SIGKILL);
403  int status;
404  while (waitpid(_pid, &status, 0) == -1)
405  if (errno != EINTR)
406  break;
407  _pid = 0;
408  }
409  if (_dnspipe != -1)
410  {
411  close(_dnspipe);
412  _dnspipe = -1;
413  }
414  // the destructor in MediaCurl doesn't call disconnect() if
415  // the media is not attached, so we do it here manually
416  disconnectFrom();
417 }
418 
419 static inline bool env_isset(string name)
420 {
421  const char *s = getenv(name.c_str());
422  return s && *s ? true : false;
423 }
424 
425 void
427 {
428  string host = _url.getHost();
429 
430  if (host.empty())
431  return;
432 
433  if (_request->_context->isDNSok(host))
434  return;
435 
436  // no need to do dns checking for numeric hosts
437  char addrbuf[128];
438  if (inet_pton(AF_INET, host.c_str(), addrbuf) == 1)
439  return;
440  if (inet_pton(AF_INET6, host.c_str(), addrbuf) == 1)
441  return;
442 
443  // no need to do dns checking if we use a proxy
444  if (!_settings.proxy().empty())
445  return;
446  if (env_isset("all_proxy") || env_isset("ALL_PROXY"))
447  return;
448  string schemeproxy = _url.getScheme() + "_proxy";
449  if (env_isset(schemeproxy))
450  return;
451  if (schemeproxy != "http_proxy")
452  {
453  std::transform(schemeproxy.begin(), schemeproxy.end(), schemeproxy.begin(), ::toupper);
454  if (env_isset(schemeproxy))
455  return;
456  }
457 
458  XXX << "checking DNS lookup of " << host << endl;
459  int pipefds[2];
460  if (pipe(pipefds))
461  {
463  strncpy(_curlError, "DNS pipe creation failed", CURL_ERROR_SIZE);
464  return;
465  }
466  _pid = fork();
467  if (_pid == pid_t(-1))
468  {
469  close(pipefds[0]);
470  close(pipefds[1]);
471  _pid = 0;
473  strncpy(_curlError, "DNS checker fork failed", CURL_ERROR_SIZE);
474  return;
475  }
476  else if (_pid == 0)
477  {
478  close(pipefds[0]);
479  // XXX: close all other file descriptors
480  struct addrinfo *ai, aihints;
481  memset(&aihints, 0, sizeof(aihints));
482  aihints.ai_family = PF_UNSPEC;
483  int tstsock = socket(PF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
484  if (tstsock == -1)
485  aihints.ai_family = PF_INET;
486  else
487  close(tstsock);
488  aihints.ai_socktype = SOCK_STREAM;
489  aihints.ai_flags = AI_CANONNAME;
490  unsigned int connecttimeout = _request->_connect_timeout;
491  if (connecttimeout)
492  alarm(connecttimeout);
493  signal(SIGALRM, SIG_DFL);
494  if (getaddrinfo(host.c_str(), NULL, &aihints, &ai))
495  _exit(1);
496  _exit(0);
497  }
498  close(pipefds[1]);
499  _dnspipe = pipefds[0];
501 }
502 
503 void
504 multifetchworker::adddnsfd(fd_set &rset, int &maxfd)
505 {
506  if (_state != WORKER_LOOKUP)
507  return;
508  FD_SET(_dnspipe, &rset);
509  if (maxfd < _dnspipe)
510  maxfd = _dnspipe;
511 }
512 
513 void
515 {
516 
517  if (_state != WORKER_LOOKUP || !FD_ISSET(_dnspipe, &rset))
518  return;
519  int status;
520  while (waitpid(_pid, &status, 0) == -1)
521  {
522  if (errno != EINTR)
523  return;
524  }
525  _pid = 0;
526  if (_dnspipe != -1)
527  {
528  close(_dnspipe);
529  _dnspipe = -1;
530  }
531  if (!WIFEXITED(status))
532  {
534  strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
536  return;
537  }
538  int exitcode = WEXITSTATUS(status);
539  XXX << "#" << _workerno << ": DNS lookup returned " << exitcode << endl;
540  if (exitcode != 0)
541  {
543  strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
545  return;
546  }
548  nextjob();
549 }
550 
551 bool
553 {
554  // XXX << "checkChecksum block " << _blkno << endl;
555  if (!_blksize || !_request->_blklist)
556  return true;
558 }
559 
560 bool
562 {
563  // XXX << "recheckChecksum block " << _blkno << endl;
564  if (!_request->_fp || !_blksize || !_request->_blklist)
565  return true;
566  if (fseeko(_request->_fp, _blkstart, SEEK_SET))
567  return false;
568  char buf[4096];
569  size_t l = _blksize;
570  _request->_blklist->createDigest(_dig); // resets digest
571  while (l)
572  {
573  size_t cnt = l > sizeof(buf) ? sizeof(buf) : l;
574  if (fread(buf, cnt, 1, _request->_fp) != 1)
575  return false;
576  _dig.update(buf, cnt);
577  l -= cnt;
578  }
580 }
581 
582 
583 void
585 {
586  if (!_request->_stealing)
587  {
588  XXX << "start stealing!" << endl;
589  _request->_stealing = true;
590  }
591  multifetchworker *best = 0;
592  std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
593  double now = 0;
594  for (; workeriter != _request->_workers.end(); ++workeriter)
595  {
596  multifetchworker *worker = *workeriter;
597  if (worker == this)
598  continue;
599  if (worker->_pass == -1)
600  continue; // do not steal!
601  if (worker->_state == WORKER_DISCARD || worker->_state == WORKER_DONE || worker->_state == WORKER_SLEEP || !worker->_blksize)
602  continue; // do not steal finished jobs
603  if (!worker->_avgspeed && worker->_blkreceived)
604  {
605  if (!now)
606  now = currentTime();
607  if (now > worker->_blkstarttime)
608  worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
609  }
610  if (!best || best->_pass > worker->_pass)
611  {
612  best = worker;
613  continue;
614  }
615  if (best->_pass < worker->_pass)
616  continue;
617  // if it is the same block, we want to know the best worker, otherwise the worst
618  if (worker->_blkstart == best->_blkstart)
619  {
620  if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed < (best->_blksize - best->_blkreceived) * worker->_avgspeed)
621  best = worker;
622  }
623  else
624  {
625  if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed > (best->_blksize - best->_blkreceived) * worker->_avgspeed)
626  best = worker;
627  }
628  }
629  if (!best)
630  {
633  _request->_finished = true;
634  return;
635  }
636  // do not sleep twice
637  if (_state != WORKER_SLEEP)
638  {
639  if (!_avgspeed && _blkreceived)
640  {
641  if (!now)
642  now = currentTime();
643  if (now > _blkstarttime)
645  }
646 
647  // lets see if we should sleep a bit
648  XXX << "me #" << _workerno << ": " << _avgspeed << ", size " << best->_blksize << endl;
649  XXX << "best #" << best->_workerno << ": " << best->_avgspeed << ", size " << (best->_blksize - best->_blkreceived) << endl;
650  if (_avgspeed && best->_avgspeed && best->_blksize - best->_blkreceived > 0 &&
651  (best->_blksize - best->_blkreceived) * _avgspeed < best->_blksize * best->_avgspeed)
652  {
653  if (!now)
654  now = currentTime();
655  double sl = (best->_blksize - best->_blkreceived) / best->_avgspeed * 2;
656  if (sl > 1)
657  sl = 1;
658  XXX << "#" << _workerno << ": going to sleep for " << sl * 1000 << " ms" << endl;
659  _sleepuntil = now + sl;
662  return;
663  }
664  }
665 
666  _competing = true;
667  best->_competing = true;
668  _blkstart = best->_blkstart;
669  _blksize = best->_blksize;
670  best->_pass++;
671  _pass = best->_pass;
672  _blkno = best->_blkno;
673  run();
674 }
675 
676 void
678 {
679  std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
680  for (; workeriter != _request->_workers.end(); ++workeriter)
681  {
682  multifetchworker *worker = *workeriter;
683  if (worker == this)
684  continue;
685  if (worker->_blkstart == _blkstart)
686  {
687  if (worker->_state == WORKER_FETCH)
688  worker->_state = WORKER_DISCARD;
689  worker->_pass = -1; /* do not steal this one, we already have it */
690  }
691  }
692 }
693 
694 
695 void
697 {
698  _noendrange = false;
699  if (_request->_stealing)
700  {
701  stealjob();
702  return;
703  }
704 
705  MediaBlockList *blklist = _request->_blklist;
706  if (!blklist)
707  {
708  _blksize = BLKSIZE;
709  if (_request->_filesize != off_t(-1))
710  {
712  {
713  stealjob();
714  return;
715  }
717  if (_blksize > BLKSIZE)
718  _blksize = BLKSIZE;
719  }
720  }
721  else
722  {
723  MediaBlock blk = blklist->getBlock(_request->_blkno);
724  while (_request->_blkoff >= (off_t)(blk.off + blk.size))
725  {
726  if (++_request->_blkno == blklist->numBlocks())
727  {
728  stealjob();
729  return;
730  }
731  blk = blklist->getBlock(_request->_blkno);
732  _request->_blkoff = blk.off;
733  }
734  _blksize = blk.off + blk.size - _request->_blkoff;
735  if (_blksize > BLKSIZE && !blklist->haveChecksum(_request->_blkno))
736  _blksize = BLKSIZE;
737  }
741  run();
742 }
743 
744 void
746 {
747  char rangebuf[128];
748 
749  if (_state == WORKER_BROKEN || _state == WORKER_DONE)
750  return; // just in case...
751  if (_noendrange)
752  sprintf(rangebuf, "%llu-", (unsigned long long)_blkstart);
753  else
754  sprintf(rangebuf, "%llu-%llu", (unsigned long long)_blkstart, (unsigned long long)_blkstart + _blksize - 1);
755  XXX << "#" << _workerno << ": BLK " << _blkno << ":" << rangebuf << " " << _url << endl;
756  if (curl_easy_setopt(_curl, CURLOPT_RANGE, !_noendrange || _blkstart != 0 ? rangebuf : (char *)0) != CURLE_OK)
757  {
760  strncpy(_curlError, "curl_easy_setopt range failed", CURL_ERROR_SIZE);
761  return;
762  }
763  if (curl_multi_add_handle(_request->_multi, _curl) != CURLM_OK)
764  {
767  strncpy(_curlError, "curl_multi_add_handle failed", CURL_ERROR_SIZE);
768  return;
769  }
770  _request->_havenewjob = true;
771  _off = _blkstart;
772  _size = _blksize;
773  if (_request->_blklist)
774  _request->_blklist->createDigest(_dig); // resets digest
776 
777  double now = currentTime();
778  _blkstarttime = now;
779  _blkreceived = 0;
780 }
781 
782 
784 
785 
786 multifetchrequest::multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) : _context(context), _filename(filename), _baseurl(baseurl)
787 {
788  _fp = fp;
789  _report = report;
790  _blklist = blklist;
791  _filesize = filesize;
792  _multi = multi;
793  _stealing = false;
794  _havenewjob = false;
795  _blkno = 0;
796  if (_blklist)
798  else
799  _blkoff = 0;
800  _activeworkers = 0;
801  _lookupworkers = 0;
802  _sleepworkers = 0;
803  _minsleepuntil = 0;
804  _finished = false;
805  _fetchedsize = 0;
806  _fetchedgoodsize = 0;
807  _totalsize = 0;
809  _lastperiodfetched = 0;
810  _periodavg = 0;
811  _timeout = 0;
812  _connect_timeout = 0;
813  _maxspeed = 0;
814  _maxworkers = 0;
815  if (blklist)
816  {
817  for (size_t blkno = 0; blkno < blklist->numBlocks(); blkno++)
818  {
819  MediaBlock blk = blklist->getBlock(blkno);
820  _totalsize += blk.size;
821  }
822  }
823  else if (filesize != off_t(-1))
824  _totalsize = filesize;
825 }
826 
828 {
829  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
830  {
831  multifetchworker *worker = *workeriter;
832  *workeriter = NULL;
833  delete worker;
834  }
835  _workers.clear();
836 }
837 
838 void
839 multifetchrequest::run(std::vector<Url> &urllist)
840 {
841  int workerno = 0;
842  std::vector<Url>::iterator urliter = urllist.begin();
843  for (;;)
844  {
845  fd_set rset, wset, xset;
846  int maxfd, nqueue;
847 
848  if (_finished)
849  {
850  XXX << "finished!" << endl;
851  break;
852  }
853 
854  if ((int)_activeworkers < _maxworkers && urliter != urllist.end() && _workers.size() < MAXURLS)
855  {
856  // spawn another worker!
857  multifetchworker *worker = new multifetchworker(workerno++, *this, *urliter);
858  _workers.push_back(worker);
859  if (worker->_state != WORKER_BROKEN)
860  {
861  _activeworkers++;
862  if (worker->_state != WORKER_LOOKUP)
863  {
864  worker->nextjob();
865  }
866  else
867  _lookupworkers++;
868  }
869  ++urliter;
870  continue;
871  }
872  if (!_activeworkers)
873  {
874  WAR << "No more active workers!" << endl;
875  // show the first worker error we find
876  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
877  {
878  if ((*workeriter)->_state != WORKER_BROKEN)
879  continue;
880  ZYPP_THROW(MediaCurlException(_baseurl, "Server error", (*workeriter)->_curlError));
881  }
882  break;
883  }
884 
885  FD_ZERO(&rset);
886  FD_ZERO(&wset);
887  FD_ZERO(&xset);
888 
889  curl_multi_fdset(_multi, &rset, &wset, &xset, &maxfd);
890 
891  if (_lookupworkers)
892  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
893  (*workeriter)->adddnsfd(rset, maxfd);
894 
895  timeval tv;
896  // if we added a new job we have to call multi_perform once
897  // to make it show up in the fd set. do not sleep in this case.
898  tv.tv_sec = 0;
899  tv.tv_usec = _havenewjob ? 0 : 200000;
900  if (_sleepworkers && !_havenewjob)
901  {
902  if (_minsleepuntil == 0)
903  {
904  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
905  {
906  multifetchworker *worker = *workeriter;
907  if (worker->_state != WORKER_SLEEP)
908  continue;
909  if (!_minsleepuntil || _minsleepuntil > worker->_sleepuntil)
910  _minsleepuntil = worker->_sleepuntil;
911  }
912  }
913  double sl = _minsleepuntil - currentTime();
914  if (sl < 0)
915  {
916  sl = 0;
917  _minsleepuntil = 0;
918  }
919  if (sl < .2)
920  tv.tv_usec = sl * 1000000;
921  }
922  int r = select(maxfd + 1, &rset, &wset, &xset, &tv);
923  if (r == -1 && errno != EINTR)
924  ZYPP_THROW(MediaCurlException(_baseurl, "select() failed", "unknown error"));
925  if (r != 0 && _lookupworkers)
926  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
927  {
928  multifetchworker *worker = *workeriter;
929  if (worker->_state != WORKER_LOOKUP)
930  continue;
931  (*workeriter)->dnsevent(rset);
932  if (worker->_state != WORKER_LOOKUP)
933  _lookupworkers--;
934  }
935  _havenewjob = false;
936 
937  // run curl
938  for (;;)
939  {
940  CURLMcode mcode;
941  int tasks;
942  mcode = curl_multi_perform(_multi, &tasks);
943  if (mcode == CURLM_CALL_MULTI_PERFORM)
944  continue;
945  if (mcode != CURLM_OK)
946  ZYPP_THROW(MediaCurlException(_baseurl, "curl_multi_perform", "unknown error"));
947  break;
948  }
949 
950  double now = currentTime();
951 
952  // update periodavg
953  if (now > _lastperiodstart + .5)
954  {
955  if (!_periodavg)
957  else
960  _lastperiodstart = now;
961  }
962 
963  // wake up sleepers
964  if (_sleepworkers)
965  {
966  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
967  {
968  multifetchworker *worker = *workeriter;
969  if (worker->_state != WORKER_SLEEP)
970  continue;
971  if (worker->_sleepuntil > now)
972  continue;
973  if (_minsleepuntil == worker->_sleepuntil)
974  _minsleepuntil = 0;
975  XXX << "#" << worker->_workerno << ": sleep done, wake up" << endl;
976  _sleepworkers--;
977  // nextjob chnages the state
978  worker->nextjob();
979  }
980  }
981 
982  // collect all curl results, reschedule new jobs
983  CURLMsg *msg;
984  while ((msg = curl_multi_info_read(_multi, &nqueue)) != 0)
985  {
986  if (msg->msg != CURLMSG_DONE)
987  continue;
988  CURL *easy = msg->easy_handle;
989  CURLcode cc = msg->data.result;
990  multifetchworker *worker;
991  if (curl_easy_getinfo(easy, CURLINFO_PRIVATE, &worker) != CURLE_OK)
992  ZYPP_THROW(MediaCurlException(_baseurl, "curl_easy_getinfo", "unknown error"));
993  if (worker->_blkreceived && now > worker->_blkstarttime)
994  {
995  if (worker->_avgspeed)
996  worker->_avgspeed = (worker->_avgspeed + worker->_blkreceived / (now - worker->_blkstarttime)) / 2;
997  else
998  worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
999  }
1000  XXX << "#" << worker->_workerno << ": BLK " << worker->_blkno << " done code " << cc << " speed " << worker->_avgspeed << endl;
1001  curl_multi_remove_handle(_multi, easy);
1002  if (cc == CURLE_HTTP_RETURNED_ERROR)
1003  {
1004  long statuscode = 0;
1005  (void)curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &statuscode);
1006  XXX << "HTTP status " << statuscode << endl;
1007  if (statuscode == 416 && !_blklist) /* Range error */
1008  {
1009  if (_filesize == off_t(-1))
1010  {
1011  if (!worker->_noendrange)
1012  {
1013  XXX << "#" << worker->_workerno << ": retrying with no end range" << endl;
1014  worker->_noendrange = true;
1015  worker->run();
1016  continue;
1017  }
1018  worker->_noendrange = false;
1019  worker->stealjob();
1020  continue;
1021  }
1022  if (worker->_blkstart >= _filesize)
1023  {
1024  worker->nextjob();
1025  continue;
1026  }
1027  }
1028  }
1029  if (cc == 0)
1030  {
1031  if (!worker->checkChecksum())
1032  {
1033  WAR << "#" << worker->_workerno << ": checksum error, disable worker" << endl;
1034  worker->_state = WORKER_BROKEN;
1035  strncpy(worker->_curlError, "checksum error", CURL_ERROR_SIZE);
1036  _activeworkers--;
1037  continue;
1038  }
1039  if (worker->_state == WORKER_FETCH)
1040  {
1041  if (worker->_competing)
1042  {
1043  worker->disableCompetition();
1044  // multiple workers wrote into this block. We already know that our
1045  // data was correct, but maybe some other worker overwrote our data
1046  // with something broken. Thus we have to re-check the block.
1047  if (!worker->recheckChecksum())
1048  {
1049  XXX << "#" << worker->_workerno << ": recheck checksum error, refetch block" << endl;
1050  // re-fetch! No need to worry about the bad workers,
1051  // they will now be set to DISCARD. At the end of their block
1052  // they will notice that they wrote bad data and go into BROKEN.
1053  worker->run();
1054  continue;
1055  }
1056  }
1057  _fetchedgoodsize += worker->_blksize;
1058  }
1059 
1060  // make bad workers sleep a little
1061  double maxavg = 0;
1062  int maxworkerno = 0;
1063  int numbetter = 0;
1064  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1065  {
1066  multifetchworker *oworker = *workeriter;
1067  if (oworker->_state == WORKER_BROKEN)
1068  continue;
1069  if (oworker->_avgspeed > maxavg)
1070  {
1071  maxavg = oworker->_avgspeed;
1072  maxworkerno = oworker->_workerno;
1073  }
1074  if (oworker->_avgspeed > worker->_avgspeed)
1075  numbetter++;
1076  }
1077  if (maxavg && !_stealing)
1078  {
1079  double ratio = worker->_avgspeed / maxavg;
1080  ratio = 1 - ratio;
1081  if (numbetter < 3) // don't sleep that much if we're in the top two
1082  ratio = ratio * ratio;
1083  if (ratio > .01)
1084  {
1085  XXX << "#" << worker->_workerno << ": too slow ("<< ratio << ", " << worker->_avgspeed << ", #" << maxworkerno << ": " << maxavg << "), going to sleep for " << ratio * 1000 << " ms" << endl;
1086  worker->_sleepuntil = now + ratio;
1087  worker->_state = WORKER_SLEEP;
1088  _sleepworkers++;
1089  continue;
1090  }
1091  }
1092 
1093  // do rate control (if requested)
1094  // should use periodavg, but that's not what libcurl does
1095  if (_maxspeed && now > _starttime)
1096  {
1097  double avg = _fetchedsize / (now - _starttime);
1098  avg = worker->_maxspeed * _maxspeed / avg;
1099  if (avg < _maxspeed / _maxworkers)
1100  avg = _maxspeed / _maxworkers;
1101  if (avg > _maxspeed)
1102  avg = _maxspeed;
1103  if (avg < 1024)
1104  avg = 1024;
1105  worker->_maxspeed = avg;
1106 #if CURLVERSION_AT_LEAST(7,15,5)
1107  curl_easy_setopt(worker->_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)(avg));
1108 #endif
1109  }
1110 
1111  worker->nextjob();
1112  }
1113  else
1114  {
1115  worker->_state = WORKER_BROKEN;
1116  _activeworkers--;
1117  if (!_activeworkers && !(urliter != urllist.end() && _workers.size() < MAXURLS))
1118  {
1119  // end of workers reached! goodbye!
1120  worker->evaluateCurlCode(Pathname(), cc, false);
1121  }
1122  }
1123 
1124  if ( _filesize > 0 && _fetchedgoodsize > _filesize ) {
1126  }
1127  }
1128 
1129  // send report
1130  if (_report)
1131  {
1132  int percent = _totalsize ? (100 * (_fetchedgoodsize + _fetchedsize)) / (_totalsize + _fetchedsize) : 0;
1133 
1134  double avg = 0;
1135  if (now > _starttime)
1136  avg = _fetchedsize / (now - _starttime);
1137  if (!(*(_report))->progress(percent, _baseurl, avg, _lastperiodstart == _starttime ? avg : _periodavg))
1138  ZYPP_THROW(MediaCurlException(_baseurl, "User abort", "cancelled"));
1139  }
1140 
1141  if (_timeout && now - _lastprogress > _timeout)
1142  break;
1143  }
1144 
1145  if (!_finished)
1147 
1148  // print some download stats
1149  WAR << "overall result" << endl;
1150  for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1151  {
1152  multifetchworker *worker = *workeriter;
1153  WAR << "#" << worker->_workerno << ": state: " << worker->_state << " received: " << worker->_received << " url: " << worker->_url << endl;
1154  }
1155 }
1156 
1157 
1159 
1160 
1161 MediaMultiCurl::MediaMultiCurl(const Url &url_r, const Pathname & attach_point_hint_r)
1162  : MediaCurl(url_r, attach_point_hint_r)
1163 {
1164  MIL << "MediaMultiCurl::MediaMultiCurl(" << url_r << ", " << attach_point_hint_r << ")" << endl;
1165  _multi = 0;
1167 }
1168 
1170 {
1172  {
1173  curl_slist_free_all(_customHeadersMetalink);
1175  }
1176  if (_multi)
1177  {
1178  curl_multi_cleanup(_multi);
1179  _multi = 0;
1180  }
1181  std::map<std::string, CURL *>::iterator it;
1182  for (it = _easypool.begin(); it != _easypool.end(); it++)
1183  {
1184  CURL *easy = it->second;
1185  if (easy)
1186  {
1187  curl_easy_cleanup(easy);
1188  it->second = NULL;
1189  }
1190  }
1191 }
1192 
1194 {
1196 
1198  {
1199  curl_slist_free_all(_customHeadersMetalink);
1201  }
1202  struct curl_slist *sl = _customHeaders;
1203  for (; sl; sl = sl->next)
1204  _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, sl->data);
1205  _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, "Accept: */*, application/metalink+xml, application/metalink4+xml");
1206 }
1207 
1208 static bool looks_like_metalink_fd(int fd)
1209 {
1210  char buf[256], *p;
1211  int l;
1212  while ((l = pread(fd, buf, sizeof(buf) - 1, (off_t)0)) == -1 && errno == EINTR)
1213  ;
1214  if (l == -1)
1215  return 0;
1216  buf[l] = 0;
1217  p = buf;
1218  while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1219  p++;
1220  if (!strncasecmp(p, "<?xml", 5))
1221  {
1222  while (*p && *p != '>')
1223  p++;
1224  if (*p == '>')
1225  p++;
1226  while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1227  p++;
1228  }
1229  bool ret = !strncasecmp(p, "<metalink", 9) ? true : false;
1230  return ret;
1231 }
1232 
1233 static bool looks_like_metalink(const Pathname & file)
1234 {
1235  int fd;
1236  if ((fd = open(file.asString().c_str(), O_RDONLY|O_CLOEXEC)) == -1)
1237  return false;
1238  bool ret = looks_like_metalink_fd(fd);
1239  close(fd);
1240  DBG << "looks_like_metalink(" << file << "): " << ret << endl;
1241  return ret;
1242 }
1243 
1244 // here we try to suppress all progress coming from a metalink download
1245 // bsc#1021291: Nevertheless send alive trigger (without stats), so UIs
1246 // are able to abort a hanging metalink download via callback response.
1247 int MediaMultiCurl::progressCallback( void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
1248 {
1249  CURL *_curl = MediaCurl::progressCallback_getcurl(clientp);
1250  if (!_curl)
1251  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1252 
1253  // bsc#408814: Don't report any sizes before we don't have data on disk. Data reported
1254  // due to redirection etc. are not interesting, but may disturb filesize checks.
1255  FILE *fp = 0;
1256  if ( curl_easy_getinfo( _curl, CURLINFO_PRIVATE, &fp ) != CURLE_OK || !fp )
1257  return MediaCurl::aliveCallback( clientp, dltotal, dlnow, ultotal, ulnow );
1258  if ( ftell( fp ) == 0 )
1259  return MediaCurl::aliveCallback( clientp, dltotal, 0.0, ultotal, ulnow );
1260 
1261  // (no longer needed due to the filesize check above?)
1262  // work around curl bug that gives us old data
1263  long httpReturnCode = 0;
1264  if (curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode ) != CURLE_OK || httpReturnCode == 0)
1265  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1266 
1267  char *ptr = NULL;
1268  bool ismetalink = false;
1269  if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1270  {
1271  string ct = string(ptr);
1272  if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1273  ismetalink = true;
1274  }
1275  if (!ismetalink && dlnow < 256)
1276  {
1277  // can't tell yet, ...
1278  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1279  }
1280  if (!ismetalink)
1281  {
1282  fflush(fp);
1283  ismetalink = looks_like_metalink_fd(fileno(fp));
1284  DBG << "looks_like_metalink_fd: " << ismetalink << endl;
1285  }
1286  if (ismetalink)
1287  {
1288  // this is a metalink file change the expected filesize
1290  // we're downloading the metalink file. Just trigger aliveCallbacks
1291  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::aliveCallback);
1292  return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1293  }
1294  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &MediaCurl::progressCallback);
1295  return MediaCurl::progressCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1296 }
1297 
1298 void MediaMultiCurl::doGetFileCopy( const Pathname & filename , const Pathname & target, callback::SendReport<DownloadProgressReport> & report, const ByteCount &expectedFileSize_r, RequestOptions options ) const
1299 {
1300  Pathname dest = target.absolutename();
1301  if( assert_dir( dest.dirname() ) )
1302  {
1303  DBG << "assert_dir " << dest.dirname() << " failed" << endl;
1304  ZYPP_THROW( MediaSystemException(getFileUrl(filename), "System error on " + dest.dirname().asString()) );
1305  }
1306 
1307  ManagedFile destNew { target.extend( ".new.zypp.XXXXXX" ) };
1308  AutoFILE file;
1309  {
1310  AutoFREE<char> buf { ::strdup( (*destNew).c_str() ) };
1311  if( ! buf )
1312  {
1313  ERR << "out of memory for temp file name" << endl;
1314  ZYPP_THROW(MediaSystemException(getFileUrl(filename), "out of memory for temp file name"));
1315  }
1316 
1317  AutoFD tmp_fd { ::mkostemp( buf, O_CLOEXEC ) };
1318  if( tmp_fd == -1 )
1319  {
1320  ERR << "mkstemp failed for file '" << destNew << "'" << endl;
1321  ZYPP_THROW(MediaWriteException(destNew));
1322  }
1323  destNew = ManagedFile( (*buf), filesystem::unlink );
1324 
1325  file = ::fdopen( tmp_fd, "we" );
1326  if ( ! file )
1327  {
1328  ERR << "fopen failed for file '" << destNew << "'" << endl;
1329  ZYPP_THROW(MediaWriteException(destNew));
1330  }
1331  tmp_fd.resetDispose(); // don't close it here! ::fdopen moved ownership to file
1332  }
1333 
1334  DBG << "dest: " << dest << endl;
1335  DBG << "temp: " << destNew << endl;
1336 
1337  // set IFMODSINCE time condition (no download if not modified)
1338  if( PathInfo(target).isExist() && !(options & OPTION_NO_IFMODSINCE) )
1339  {
1340  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
1341  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, (long)PathInfo(target).mtime());
1342  }
1343  else
1344  {
1345  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1346  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1347  }
1348  // change header to include Accept: metalink
1349  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeadersMetalink);
1350  // change to our own progress funcion
1351  curl_easy_setopt(_curl, CURLOPT_PROGRESSFUNCTION, &progressCallback);
1352  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (*file) ); // important to pass the FILE* explicitly (passing through varargs)
1353  try
1354  {
1355  MediaCurl::doGetFileCopyFile(filename, dest, file, report, expectedFileSize_r, options);
1356  }
1357  catch (Exception &ex)
1358  {
1359  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1360  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1361  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1362  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1363  ZYPP_RETHROW(ex);
1364  }
1365  curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1366  curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1367  curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1368  curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1369  long httpReturnCode = 0;
1370  CURLcode infoRet = curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode);
1371  if (infoRet == CURLE_OK)
1372  {
1373  DBG << "HTTP response: " + str::numstring(httpReturnCode) << endl;
1374  if ( httpReturnCode == 304
1375  || ( httpReturnCode == 213 && _url.getScheme() == "ftp" ) ) // not modified
1376  {
1377  DBG << "not modified: " << PathInfo(dest) << endl;
1378  return;
1379  }
1380  }
1381  else
1382  {
1383  WAR << "Could not get the reponse code." << endl;
1384  }
1385 
1386  bool ismetalink = false;
1387 
1388  char *ptr = NULL;
1389  if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1390  {
1391  string ct = string(ptr);
1392  if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1393  ismetalink = true;
1394  }
1395 
1396  if (!ismetalink)
1397  {
1398  // some proxies do not store the content type, so also look at the file to find
1399  // out if we received a metalink (bnc#649925)
1400  fflush(file);
1401  if (looks_like_metalink(destNew))
1402  ismetalink = true;
1403  }
1404 
1405  if (ismetalink)
1406  {
1407  bool userabort = false;
1408  Pathname failedFile = ZConfig::instance().repoCachePath() / "MultiCurl.failed";
1409  file = nullptr; // explicitly close destNew before the parser reads it.
1410  try
1411  {
1412  MetaLinkParser mlp;
1413  mlp.parse(destNew);
1414  MediaBlockList bl = mlp.getBlockList();
1415  vector<Url> urls = mlp.getUrls();
1416  XXX << bl << endl;
1417  file = fopen((*destNew).c_str(), "w+e");
1418  if (!file)
1419  ZYPP_THROW(MediaWriteException(destNew));
1420  if (PathInfo(target).isExist())
1421  {
1422  XXX << "reusing blocks from file " << target << endl;
1423  bl.reuseBlocks(file, target.asString());
1424  XXX << bl << endl;
1425  }
1426  if (bl.haveChecksum(1) && PathInfo(failedFile).isExist())
1427  {
1428  XXX << "reusing blocks from file " << failedFile << endl;
1429  bl.reuseBlocks(file, failedFile.asString());
1430  XXX << bl << endl;
1431  filesystem::unlink(failedFile);
1432  }
1433  Pathname df = deltafile();
1434  if (!df.empty())
1435  {
1436  XXX << "reusing blocks from file " << df << endl;
1437  bl.reuseBlocks(file, df.asString());
1438  XXX << bl << endl;
1439  }
1440  try
1441  {
1442  multifetch(filename, file, &urls, &report, &bl, expectedFileSize_r);
1443  }
1444  catch (MediaCurlException &ex)
1445  {
1446  userabort = ex.errstr() == "User abort";
1447  ZYPP_RETHROW(ex);
1448  }
1449  }
1450  catch (MediaFileSizeExceededException &ex) {
1451  ZYPP_RETHROW(ex);
1452  }
1453  catch (Exception &ex)
1454  {
1455  // something went wrong. fall back to normal download
1456  file = nullptr; // explicitly close destNew before moving it
1457  if (PathInfo(destNew).size() >= 63336)
1458  {
1459  ::unlink(failedFile.asString().c_str());
1460  filesystem::hardlinkCopy(destNew, failedFile);
1461  }
1462  if (userabort)
1463  {
1464  ZYPP_RETHROW(ex);
1465  }
1466  file = fopen((*destNew).c_str(), "w+e");
1467  if (!file)
1468  ZYPP_THROW(MediaWriteException(destNew));
1469  MediaCurl::doGetFileCopyFile(filename, dest, file, report, expectedFileSize_r, options | OPTION_NO_REPORT_START);
1470  }
1471  }
1472 
1473  if (::fchmod( ::fileno(file), filesystem::applyUmaskTo( 0644 )))
1474  {
1475  ERR << "Failed to chmod file " << destNew << endl;
1476  }
1477 
1478  file.resetDispose(); // we're going to close it manually here
1479  if (::fclose(file))
1480  {
1481  filesystem::unlink(destNew);
1482  ERR << "Fclose failed for file '" << destNew << "'" << endl;
1483  ZYPP_THROW(MediaWriteException(destNew));
1484  }
1485 
1486  if ( rename( destNew, dest ) != 0 )
1487  {
1488  ERR << "Rename failed" << endl;
1490  }
1491  destNew.resetDispose(); // no more need to unlink it
1492 
1493  DBG << "done: " << PathInfo(dest) << endl;
1494 }
1495 
1497 namespace {
1498  // bsc#933839: propagate proxy settings passed in the repo URL
1499  inline Url propagateQueryParams( Url url_r, const Url & template_r )
1500  {
1501  for ( std::string param : { "proxy", "proxyport", "proxyuser", "proxypass"} )
1502  {
1503  const std::string & value( template_r.getQueryParam( param ) );
1504  if ( ! value.empty() )
1505  url_r.setQueryParam( param, value );
1506  }
1507  return url_r;
1508  }
1509 }
1511 
1512 void MediaMultiCurl::multifetch(const Pathname & filename, FILE *fp, std::vector<Url> *urllist, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) const
1513 {
1514  Url baseurl(getFileUrl(filename));
1515  if (blklist && filesize == off_t(-1) && blklist->haveFilesize())
1516  filesize = blklist->getFilesize();
1517  if (blklist && !blklist->haveBlocks() && filesize != 0)
1518  blklist = 0;
1519  if (blklist && (filesize == 0 || !blklist->numBlocks()))
1520  {
1521  checkFileDigest(baseurl, fp, blklist);
1522  return;
1523  }
1524  if (filesize == 0)
1525  return;
1526  if (!_multi)
1527  {
1528  _multi = curl_multi_init();
1529  if (!_multi)
1531  }
1532 
1533  multifetchrequest req(this, filename, baseurl, _multi, fp, report, blklist, filesize);
1534  req._timeout = _settings.timeout();
1538  if (req._maxworkers > MAXURLS)
1539  req._maxworkers = MAXURLS;
1540  if (req._maxworkers <= 0)
1541  req._maxworkers = 1;
1542  std::vector<Url> myurllist;
1543  for (std::vector<Url>::iterator urliter = urllist->begin(); urliter != urllist->end(); ++urliter)
1544  {
1545  try
1546  {
1547  string scheme = urliter->getScheme();
1548  if (scheme == "http" || scheme == "https" || scheme == "ftp" || scheme == "tftp")
1549  {
1550  checkProtocol(*urliter);
1551  myurllist.push_back(propagateQueryParams(*urliter, _url));
1552  }
1553  }
1554  catch (...)
1555  {
1556  }
1557  }
1558  if (!myurllist.size())
1559  myurllist.push_back(baseurl);
1560  req.run(myurllist);
1561  checkFileDigest(baseurl, fp, blklist);
1562 }
1563 
1564 void MediaMultiCurl::checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
1565 {
1566  if (!blklist || !blklist->haveFileChecksum())
1567  return;
1568  if (fseeko(fp, off_t(0), SEEK_SET))
1569  ZYPP_THROW(MediaCurlException(url, "fseeko", "seek error"));
1570  Digest dig;
1571  blklist->createFileDigest(dig);
1572  char buf[4096];
1573  size_t l;
1574  while ((l = fread(buf, 1, sizeof(buf), fp)) > 0)
1575  dig.update(buf, l);
1576  if (!blklist->verifyFileDigest(dig))
1577  ZYPP_THROW(MediaCurlException(url, "file verification failed", "checksum error"));
1578 }
1579 
1580 bool MediaMultiCurl::isDNSok(const string &host) const
1581 {
1582  return _dnsok.find(host) == _dnsok.end() ? false : true;
1583 }
1584 
1585 void MediaMultiCurl::setDNSok(const string &host) const
1586 {
1587  _dnsok.insert(host);
1588 }
1589 
1590 CURL *MediaMultiCurl::fromEasyPool(const string &host) const
1591 {
1592  if (_easypool.find(host) == _easypool.end())
1593  return 0;
1594  CURL *ret = _easypool[host];
1595  _easypool.erase(host);
1596  return ret;
1597 }
1598 
1599 void MediaMultiCurl::toEasyPool(const std::string &host, CURL *easy) const
1600 {
1601  CURL *oldeasy = _easypool[host];
1602  _easypool[host] = easy;
1603  if (oldeasy)
1604  curl_easy_cleanup(oldeasy);
1605 }
1606 
1607  } // namespace media
1608 } // namespace zypp
1609 
std::string getScheme() const
Returns the scheme name of the URL.
Definition: Url.cc:528
long timeout() const
transfer timeout
std::string authType() const
get the allowed authentication types
int assert_dir(const Pathname &path, unsigned mode)
Like 'mkdir -p'.
Definition: PathInfo.cc:320
std::string password() const
auth password
#define MIL
Definition: Logger.h:79
#define WORKER_DISCARD
static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
void setQueryParam(const std::string &param, const std::string &value)
Set or add value for the specified query parameter.
Definition: Url.cc:833
std::set< std::string > _dnsok
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:392
size_t writefunction(void *ptr, size_t size)
static ZConfig & instance()
Singleton ctor.
Definition: Resolver.cc:127
void checkProtocol(const Url &url) const
check the url is supported by the curl library
Definition: MediaCurl.cc:612
Implementation class for FTP, HTTP and HTTPS MediaHandler.
Definition: MediaCurl.h:32
Compute Message Digests (MD5, SHA1 etc)
Definition: Digest.h:45
#define WORKER_SLEEP
Store and operate with byte count.
Definition: ByteCount.h:30
long maxDownloadSpeed() const
Maximum download speed (bytes per second)
std::string proxy() const
proxy host
Pathname deltafile() const
to not add a IFMODSINCE header if target exists
Definition: MediaCurl.h:44
void reuseBlocks(FILE *wfp, std::string filename)
scan a file for blocks from our blocklist.
void run(std::vector< Url > &urllist)
static int progressCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Callback reporting download progress.
Definition: MediaCurl.cc:1656
Pathname extend(const std::string &r) const
Append string r to the last component of the path.
Definition: Pathname.h:169
bool createDigest(Digest &digest) const
std::map< std::string, CURL * > _easypool
callback::SendReport< DownloadProgressReport > * _report
#define WORKER_STARTING
static int aliveCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Callback sending just an alive trigger to the UI, without stats (e.g.
Definition: MediaCurl.cc:1642
static double currentTime()
Definition: Arch.h:344
std::string username() const
auth username
AutoDispose< const Pathname > ManagedFile
A Pathname plus associated cleanup code to be executed when path is no longer needed.
Definition: ManagedFile.h:27
Url url
Definition: MediaCurl.cc:180
#define XXX
Definition: Logger.h:77
static const Unit MB
1000^2 Byte
Definition: ByteCount.h:60
virtual void setupEasy()
initializes the curl easy handle with the data from the url
Definition: MediaCurl.cc:637
void toEasyPool(const std::string &host, CURL *easy) const
AutoDispose<int> calling ::close
Definition: AutoDispose.h:203
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
#define ERR
Definition: Logger.h:81
Url getFileUrl(const Pathname &filename) const
concatenate the attach url and the filename to a complete download url
Definition: MediaCurl.cc:947
multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport< DownloadProgressReport > *report, MediaBlockList *blklist, off_t filesize)
void setPassword(const std::string &password)
sets the auth password
void setUsername(const std::string &username)
sets the auth username
#define WORKER_FETCH
static bool env_isset(string name)
bool haveBlocks() const
do we have a blocklist describing the file? set to true when addBlock() is called
static void resetExpectedFileSize(void *clientp, const ByteCount &expectedFileSize)
MediaMultiCurl needs to reset the expected filesize in case a metalink file is downloaded otherwise t...
Definition: MediaCurl.cc:1699
std::list< multifetchworker * > _workers
#define ZYPP_RETHROW(EXCPT)
Drops a logline and rethrows, updating the CodeLocation.
Definition: Exception.h:400
const MediaMultiCurl * _context
virtual void setupEasy() override
initializes the curl easy handle with the data from the url
std::string asString() const
Returns a default string representation of the Url object.
Definition: Url.cc:492
std::string getQueryParam(const std::string &param, EEncoding eflag=zypp::url::E_DECODED) const
Return the value for the specified query parameter.
Definition: Url.cc:655
bool verifyFileDigest(Digest &digest) const
Url clearQueryString(const Url &url) const
Definition: MediaCurl.cc:574
void setAuthType(const std::string &authtype)
set the allowed authentication types
CURL * fromEasyPool(const std::string &host) const
int unlink(const Pathname &path)
Like 'unlink'.
Definition: PathInfo.cc:653
multifetchrequest * _request
static bool looks_like_metalink_fd(int fd)
const Url _url
Url to handle.
Definition: MediaHandler.h:110
bool isDNSok(const std::string &host) const
const std::string & asString() const
String representation.
Definition: Pathname.h:90
int rename(const Pathname &oldpath, const Pathname &newpath)
Like 'rename'.
Definition: PathInfo.cc:695
bool isExist() const
Return whether valid stat info exists.
Definition: PathInfo.h:281
void evaluateCurlCode(const zypp::Pathname &filename, CURLcode code, bool timeout) const
Evaluates a curl return code and throws the right MediaException filename Filename being downloaded c...
Definition: MediaCurl.cc:1049
Pathname repoCachePath() const
Path where the caches are kept (/var/cache/zypp)
Definition: ZConfig.cc:909
long connectTimeout() const
connection timeout
Pathname dirname() const
Return all but the last component od this path.
Definition: Pathname.h:123
do not send a start ProgressReport
Definition: MediaCurl.h:46
#define WAR
Definition: Logger.h:80
TransferSettings _settings
Definition: MediaCurl.h:179
bool verifyDigest(size_t blkno, Digest &digest) const
#define MAXURLS
int hardlinkCopy(const Pathname &oldpath, const Pathname &newpath)
Create newpath as hardlink or copy of oldpath.
Definition: PathInfo.cc:836
size_t headerfunction(char *ptr, size_t size)
a single block from the blocklist, consisting of an offset and a size
bool createFileDigest(Digest &digest) const
virtual void doGetFileCopy(const Pathname &srcFilename, const Pathname &targetFilename, callback::SendReport< DownloadProgressReport > &_report, const ByteCount &expectedFileSize_r, RequestOptions options=OPTION_NONE) const override
void setDNSok(const std::string &host) const
size_t numBlocks() const
return the number of blocks in the blocklist
void multifetch(const Pathname &filename, FILE *fp, std::vector< Url > *urllist, callback::SendReport< DownloadProgressReport > *report=0, MediaBlockList *blklist=0, off_t filesize=off_t(-1)) const
std::string numstring(char n, int w=0)
Definition: String.h:288
MediaBlockList getBlockList()
return the block list from the parsed metalink data
std::string asString(unsigned field_width_r=0, unsigned unit_width_r=1) const
Auto selected Unit and precision.
Definition: ByteCount.h:133
void resetDispose()
Set no dispose function.
Definition: AutoDispose.h:162
curl_slist * _customHeaders
Definition: MediaCurl.h:178
static bool looks_like_metalink(const Pathname &file)
void doGetFileCopyFile(const Pathname &srcFilename, const Pathname &dest, FILE *file, callback::SendReport< DownloadProgressReport > &_report, const ByteCount &expectedFileSize_r, RequestOptions options=OPTION_NONE) const
Definition: MediaCurl.cc:1486
MediaMultiCurl(const Url &url_r, const Pathname &attach_point_hint_r)
Pathname absolutename() const
Return this path, adding a leading '/' if relative.
Definition: Pathname.h:138
Base class for Exception.
Definition: Exception.h:145
void checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
std::vector< Url > getUrls()
return the download urls from the parsed metalink data
Url url() const
Url used.
Definition: MediaHandler.h:507
#define BLKSIZE
callback::SendReport< DownloadProgressReport > * report
Definition: MediaCurl.cc:184
long maxConcurrentConnections() const
Maximum number of concurrent connections for a single transfer.
std::string getHost(EEncoding eflag=zypp::url::E_DECODED) const
Returns the hostname or IP from the URL authority.
Definition: Url.cc:583
curl_slist * _customHeadersMetalink
virtual void disconnectFrom() override
Definition: MediaCurl.cc:925
static CURL * progressCallback_getcurl(void *clientp)
Definition: MediaCurl.cc:1672
static long auth_type_str2long(std::string &auth_type_str)
Converts a string of comma separated list of authetication type names into a long of ORed CURLAUTH_* ...
bool haveChecksum(size_t blkno) const
#define WORKER_LOOKUP
Reference counted access to a Tp object calling a custom Dispose function when the last AutoDispose h...
Definition: AutoDispose.h:92
Wrapper class for ::stat/::lstat.
Definition: PathInfo.h:220
AutoDispose<FILE*> calling ::fclose
Definition: AutoDispose.h:214
AutoDispose< void * > _state
void adddnsfd(fd_set &rset, int &maxfd)
#define WORKER_BROKEN
mode_t applyUmaskTo(mode_t mode_r)
Modify mode_r according to the current umask ( mode_r & ~getUmask() ).
Definition: PathInfo.h:809
std::string userPassword() const
returns the user and password as a user:pass string
static int progressCallback(void *clientp, double dltotal, double dlnow, double ultotal, double ulnow)
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:1
#define WORKER_DONE
char _curlError[CURL_ERROR_SIZE]
Definition: MediaCurl.h:177
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm
Definition: Digest.cc:230
Url manipulation class.
Definition: Url.h:87
#define DBG
Definition: Logger.h:78
MediaBlock getBlock(size_t blkno) const
return the offset/size of a block with number blkno
ByteCount df(const Pathname &path_r)
Report free disk space on a mounted file system.
Definition: PathInfo.cc:1105