changelog shortlog graph tags branches changeset files revisions annotate raw help

Mercurial > core / rust/lib/dl/src/lib.rs

changeset 698: 96958d3eb5b0
parent: 966f92770ddf
author: Richard Westhaver <ellis@rwest.io>
date: Fri, 04 Oct 2024 22:04:59 -0400
permissions: -rw-r--r--
description: fixes
1 //! Easy file downloading
2 #![deny(rust_2018_idioms)]
3 
4 use anyhow::{Context, Result};
5 use std::path::Path;
6 use url::Url;
7 mod errors;
8 pub use crate::errors::*;
9 
10 /// User agent header value for HTTP request.
11 const USER_AGENT: &str = concat!("cc-install/", env!("CARGO_PKG_VERSION"));
12 
13 #[derive(Debug, Copy, Clone)]
14 pub enum Backend {
15  Curl,
16  Reqwest(TlsBackend),
17 }
18 
19 #[derive(Debug, Copy, Clone)]
20 pub enum TlsBackend {
21  Rustls,
22  Default,
23 }
24 
25 #[derive(Debug, Copy, Clone)]
26 pub enum Event<'a> {
27  ResumingPartialDownload,
28  /// Received the Content-Length of the to-be downloaded data.
29  DownloadContentLengthReceived(u64),
30  /// Received some data.
31  DownloadDataReceived(&'a [u8]),
32 }
33 
34 fn download_with_backend(
35  backend: Backend,
36  url: &Url,
37  resume_from: u64,
38  callback: &dyn Fn(Event<'_>) -> Result<()>,
39 ) -> Result<()> {
40  match backend {
41  Backend::Curl => curl::download(url, resume_from, callback),
42  Backend::Reqwest(tls) => {
43  reqwest_be::download(url, resume_from, callback, tls)
44  }
45  }
46 }
47 
48 type DownloadCallback<'a> = &'a dyn Fn(Event<'_>) -> Result<()>;
49 
50 pub fn download_to_path_with_backend(
51  backend: Backend,
52  url: &Url,
53  path: &Path,
54  resume_from_partial: bool,
55  callback: Option<DownloadCallback<'_>>,
56 ) -> Result<()> {
57  use std::{
58  cell::RefCell,
59  fs::{remove_file, OpenOptions},
60  io::{Read, Seek, SeekFrom, Write},
61  };
62 
63  || -> Result<()> {
64  let (file, resume_from) = if resume_from_partial {
65  let possible_partial = OpenOptions::new().read(true).open(path);
66 
67  let downloaded_so_far = if let Ok(mut partial) = possible_partial {
68  if let Some(cb) = callback {
69  cb(Event::ResumingPartialDownload)?;
70 
71  let mut buf = vec![0; 32768];
72  let mut downloaded_so_far = 0;
73  loop {
74  let n = partial.read(&mut buf)?;
75  downloaded_so_far += n as u64;
76  if n == 0 {
77  break;
78  }
79  cb(Event::DownloadDataReceived(&buf[..n]))?;
80  }
81 
82  downloaded_so_far
83  } else {
84  let file_info = partial.metadata()?;
85  file_info.len()
86  }
87  } else {
88  0
89  };
90 
91  let mut possible_partial = OpenOptions::new()
92  .write(true)
93  .create(true)
94  .open(path)
95  .context("error opening file for download")?;
96 
97  possible_partial.seek(SeekFrom::End(0))?;
98 
99  (possible_partial, downloaded_so_far)
100  } else {
101  (
102  OpenOptions::new()
103  .write(true)
104  .create(true)
105  .open(path)
106  .context("error creating file for download")?,
107  0,
108  )
109  };
110 
111  let file = RefCell::new(file);
112 
113  download_with_backend(backend, url, resume_from, &|event| {
114  if let Event::DownloadDataReceived(data) = event {
115  file
116  .borrow_mut()
117  .write_all(data)
118  .context("unable to write download to disk")?;
119  }
120  match callback {
121  Some(cb) => cb(event),
122  None => Ok(()),
123  }
124  })?;
125 
126  file
127  .borrow_mut()
128  .sync_data()
129  .context("unable to sync download to disk")?;
130 
131  Ok(())
132  }()
133  .map_err(|e| {
134  // TODO: We currently clear up the cached download on any error, should we
135  // restrict it to a subset?
136  if let Err(file_err) =
137  remove_file(path).context("cleaning up cached downloads")
138  {
139  file_err.context(e)
140  } else {
141  e
142  }
143  })
144 }
145 
146 #[cfg(all(not(feature = "reqwest-backend"), not(feature = "curl-backend")))]
147 compile_error!("Must enable at least one backend");
148 
149 /// Download via libcurl; encrypt with the native (or OpenSSl) TLS
150 /// stack via libcurl
151 #[cfg(feature = "curl-backend")]
152 pub mod curl {
153  use std::{cell::RefCell, str, time::Duration};
154 
155  use anyhow::{Context, Result};
156  use curl::easy::Easy;
157  use url::Url;
158 
159  use super::Event;
160  use crate::errors::*;
161 
162  pub fn download(
163  url: &Url,
164  resume_from: u64,
165  callback: &dyn Fn(Event<'_>) -> Result<()>,
166  ) -> Result<()> {
167  // Fetch either a cached libcurl handle (which will preserve open
168  // connections) or create a new one if it isn't listed.
169  //
170  // Once we've acquired it, reset the lifetime from 'static to our local
171  // scope.
172  thread_local!(static EASY: RefCell<Easy> = RefCell::new(Easy::new()));
173  EASY.with(|handle| {
174  let mut handle = handle.borrow_mut();
175 
176  handle.url(url.as_ref())?;
177  handle.follow_location(true)?;
178  handle.useragent(super::USER_AGENT)?;
179 
180  if resume_from > 0 {
181  handle.resume_from(resume_from)?;
182  } else {
183  // an error here indicates that the range header isn't supported by
184  // underlying curl, so there's nothing to "clear" - safe to
185  // ignore this error.
186  let _ = handle.resume_from(0);
187  }
188 
189  // Take at most 30s to connect
190  handle.connect_timeout(Duration::new(30, 0))?;
191 
192  {
193  let cberr = RefCell::new(None);
194  let mut transfer = handle.transfer();
195 
196  // Data callback for libcurl which is called with data that's
197  // downloaded. We just feed it into our hasher and also write it out
198  // to disk.
199  transfer.write_function(|data| {
200  match callback(Event::DownloadDataReceived(data)) {
201  Ok(()) => Ok(data.len()),
202  Err(e) => {
203  *cberr.borrow_mut() = Some(e);
204  Ok(0)
205  }
206  }
207  })?;
208 
209  // Listen for headers and parse out a `Content-Length`
210  // (case-insensitive) if it comes so we know how much we're
211  // downloading.
212  transfer.header_function(|header| {
213  if let Ok(data) = str::from_utf8(header) {
214  let prefix = "content-length: ";
215  if data.to_ascii_lowercase().starts_with(prefix) {
216  if let Ok(s) = data[prefix.len()..].trim().parse::<u64>() {
217  let msg = Event::DownloadContentLengthReceived(s + resume_from);
218  match callback(msg) {
219  Ok(()) => (),
220  Err(e) => {
221  *cberr.borrow_mut() = Some(e);
222  return false;
223  }
224  }
225  }
226  }
227  }
228  true
229  })?;
230 
231  // If an error happens check to see if we had a filesystem error up
232  // in `cberr`, but we always want to punt it up.
233  transfer.perform().or_else(|e| {
234  // If the original error was generated by one of our
235  // callbacks, return it.
236  match cberr.borrow_mut().take() {
237  Some(cberr) => Err(cberr),
238  None => {
239  // Otherwise, return the error from curl
240  if e.is_file_couldnt_read_file() {
241  Err(e).context(DownloadError::FileNotFound)
242  } else {
243  Err(e).context("error during download")?
244  }
245  }
246  }
247  })?;
248  }
249 
250  // If we didn't get a 20x or 0 ("OK" for files) then return an error
251  let code = handle.response_code()?;
252  match code {
253  0 | 200..=299 => {}
254  _ => {
255  return Err(DownloadError::HttpStatus(code).into());
256  }
257  };
258 
259  Ok(())
260  })
261  }
262 }
263 
264 #[cfg(feature = "reqwest-backend")]
265 pub mod reqwest_be {
266  #[cfg(all(
267  not(feature = "reqwest-rustls-tls"),
268  not(feature = "reqwest-default-tls")
269  ))]
270  compile_error!("Must select a reqwest TLS backend");
271 
272  use std::{io, time::Duration};
273 
274  use anyhow::{anyhow, Context, Result};
275  #[cfg(any(
276  feature = "reqwest-rustls-tls",
277  feature = "reqwest-default-tls"
278  ))]
279  use once_cell::sync::Lazy;
280  use reqwest::{
281  blocking::{Client, ClientBuilder, Response},
282  header, Proxy,
283  };
284  use url::Url;
285 
286  use super::{Event, TlsBackend};
287  use crate::errors::*;
288 
289  pub fn download(
290  url: &Url,
291  resume_from: u64,
292  callback: &dyn Fn(Event<'_>) -> Result<()>,
293  tls: TlsBackend,
294  ) -> Result<()> {
295  // Short-circuit reqwest for the "file:" URL scheme
296  if download_from_file_url(url, resume_from, callback)? {
297  return Ok(());
298  }
299 
300  let mut res = request(url, resume_from, tls)
301  .context("failed to make network request")?;
302 
303  if !res.status().is_success() {
304  let code: u16 = res.status().into();
305  return Err(anyhow!(DownloadError::HttpStatus(u32::from(code))));
306  }
307 
308  let buffer_size = 0x10000;
309  let mut buffer = vec![0u8; buffer_size];
310 
311  if let Some(len) = res.headers().get(header::CONTENT_LENGTH) {
312  // TODO possible issues during unwrap?
313  let len = len.to_str().unwrap().parse::<u64>().unwrap() + resume_from;
314  callback(Event::DownloadContentLengthReceived(len))?;
315  }
316 
317  loop {
318  let bytes_read = io::Read::read(&mut res, &mut buffer)?;
319 
320  if bytes_read != 0 {
321  callback(Event::DownloadDataReceived(&buffer[0..bytes_read]))?;
322  } else {
323  return Ok(());
324  }
325  }
326  }
327 
328  fn client_generic() -> ClientBuilder {
329  Client::builder()
330  .gzip(false)
331  .user_agent(super::USER_AGENT)
332  .proxy(Proxy::custom(env_proxy))
333  .timeout(Duration::from_secs(30))
334  }
335 
336  #[cfg(feature = "reqwest-rustls-tls")]
337  static CLIENT_RUSTLS_TLS: Lazy<Client> = Lazy::new(|| {
338  let catcher = || client_generic().use_rustls_tls().build();
339 
340  // woah, an unwrap?!
341  // It's OK. This is the same as what is happening in curl.
342  //
343  // The curl::Easy::new() internally assert!s that the initialized
344  // Easy is not null. Inside reqwest, the errors here would be from
345  // the TLS library returning a null pointer as well.
346  catcher().unwrap()
347  });
348 
349  #[cfg(feature = "reqwest-default-tls")]
350  static CLIENT_DEFAULT_TLS: Lazy<Client> = Lazy::new(|| {
351  let catcher = || client_generic().build();
352 
353  // woah, an unwrap?!
354  // It's OK. This is the same as what is happening in curl.
355  //
356  // The curl::Easy::new() internally assert!s that the initialized
357  // Easy is not null. Inside reqwest, the errors here would be from
358  // the TLS library returning a null pointer as well.
359  catcher().unwrap()
360  });
361 
362  fn env_proxy(url: &Url) -> Option<Url> {
363  env_proxy::for_url(url).to_url()
364  }
365 
366  fn request(
367  url: &Url,
368  resume_from: u64,
369  backend: TlsBackend,
370  ) -> Result<Response, DownloadError> {
371  let client: &Client = match backend {
372  #[cfg(feature = "reqwest-rustls-tls")]
373  TlsBackend::Rustls => &CLIENT_RUSTLS_TLS,
374  #[cfg(not(feature = "reqwest-rustls-tls"))]
375  TlsBackend::Rustls => {
376  return Err(DownloadError::BackendUnavailable("reqwest rustls"));
377  }
378  #[cfg(feature = "reqwest-default-tls")]
379  TlsBackend::Default => &CLIENT_DEFAULT_TLS,
380  #[cfg(not(feature = "reqwest-default-tls"))]
381  TlsBackend::Default => {
382  return Err(DownloadError::BackendUnavailable("reqwest default TLS"));
383  }
384  };
385  let mut req = client.get(url.as_str());
386 
387  if resume_from != 0 {
388  req = req.header(header::RANGE, format!("bytes={resume_from}-"));
389  }
390 
391  Ok(req.send()?)
392  }
393 
394  fn download_from_file_url(
395  url: &Url,
396  resume_from: u64,
397  callback: &dyn Fn(Event<'_>) -> Result<()>,
398  ) -> Result<bool> {
399  use std::fs;
400 
401  // The file scheme is mostly for use by tests to mock the dist server
402  if url.scheme() == "file" {
403  let src = url.to_file_path().map_err(|_| {
404  DownloadError::Message(format!("bogus file url: '{url}'"))
405  })?;
406  if !src.is_file() {
407  // Because some of rustup's logic depends on checking
408  // the error when a downloaded file doesn't exist, make
409  // the file case return the same error value as the
410  // network case.
411  return Err(anyhow!(DownloadError::FileNotFound));
412  }
413 
414  let mut f =
415  fs::File::open(src).context("unable to open downloaded file")?;
416  io::Seek::seek(&mut f, io::SeekFrom::Start(resume_from))?;
417 
418  let mut buffer = vec![0u8; 0x10000];
419  loop {
420  let bytes_read = io::Read::read(&mut f, &mut buffer)?;
421  if bytes_read == 0 {
422  break;
423  }
424  callback(Event::DownloadDataReceived(&buffer[0..bytes_read]))?;
425  }
426 
427  Ok(true)
428  } else {
429  Ok(false)
430  }
431  }
432 }
433 
434 #[cfg(not(feature = "curl-backend"))]
435 pub mod curl {
436 
437  use anyhow::{anyhow, Result};
438 
439  use super::Event;
440  use crate::errors::*;
441  use url::Url;
442 
443  pub fn download(
444  _url: &Url,
445  _resume_from: u64,
446  _callback: &dyn Fn(Event<'_>) -> Result<()>,
447  ) -> Result<()> {
448  Err(anyhow!(DownloadError::BackendUnavailable("curl")))
449  }
450 }
451 
452 #[cfg(not(feature = "reqwest-backend"))]
453 pub mod reqwest_be {
454 
455  use anyhow::{anyhow, Result};
456 
457  use super::{Event, TlsBackend};
458  use crate::errors::*;
459  use url::Url;
460 
461  pub fn download(
462  _url: &Url,
463  _resume_from: u64,
464  _callback: &dyn Fn(Event<'_>) -> Result<()>,
465  _tls: TlsBackend,
466  ) -> Result<()> {
467  Err(anyhow!(DownloadError::BackendUnavailable("reqwest")))
468  }
469 }