Skip to main content

bootc_internal_mount/
mount.rs

1//! Helpers for interacting with mountpoints
2
3use std::{
4    fs,
5    mem::MaybeUninit,
6    os::fd::{AsFd, OwnedFd},
7    process::Command,
8};
9
10use anyhow::{Context, Result, anyhow};
11use bootc_utils::CommandRunExt;
12use camino::Utf8Path;
13use cap_std_ext::{cap_std::fs::Dir, cmdext::CapStdExtCommandExt};
14use fn_error_context::context;
15use rustix::{
16    mount::{MoveMountFlags, OpenTreeFlags},
17    net::{
18        AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags,
19        SocketFlags, SocketType,
20    },
21    process::WaitOptions,
22    thread::Pid,
23};
24use serde::Deserialize;
25
26/// Temporary mount management with automatic cleanup.
27pub mod tempmount;
28
29/// Well known identifier for pid 1
30pub const PID1: Pid = const {
31    match Pid::from_raw(1) {
32        Some(v) => v,
33        None => panic!("Expected to parse pid1"),
34    }
35};
36
37/// Deserialized information about a mounted filesystem from `findmnt`.
38#[derive(Deserialize, Debug)]
39#[serde(rename_all = "kebab-case")]
40#[allow(dead_code)]
41pub struct Filesystem {
42    // Note if you add an entry to this list, you need to change the --output invocation below too
43    /// The source device or path.
44    pub source: String,
45    /// The mount target path.
46    pub target: String,
47    /// Major:minor device numbers.
48    #[serde(rename = "maj:min")]
49    pub maj_min: String,
50    /// The filesystem type (e.g. ext4, xfs).
51    pub fstype: String,
52    /// Mount options.
53    pub options: String,
54    /// The filesystem UUID, if available.
55    pub uuid: Option<String>,
56    /// Child filesystems, if any.
57    pub children: Option<Vec<Filesystem>>,
58}
59
60/// Deserialized output of `findmnt --json`.
61#[derive(Deserialize, Debug, Default)]
62pub struct Findmnt {
63    /// The list of mounted filesystems.
64    pub filesystems: Vec<Filesystem>,
65}
66
67/// Run `findmnt` with JSON output and parse the result.
68pub fn run_findmnt(args: &[&str], cwd: Option<&Dir>, path: Option<&str>) -> Result<Findmnt> {
69    let mut cmd = Command::new("findmnt");
70    if let Some(cwd) = cwd {
71        cmd.cwd_dir(cwd.try_clone()?);
72    }
73    cmd.args([
74        "-J",
75        "-v",
76        // If you change this you probably also want to change the Filesystem struct above
77        "--output=SOURCE,TARGET,MAJ:MIN,FSTYPE,OPTIONS,UUID",
78    ])
79    .args(args)
80    .args(path);
81    let o: Findmnt = cmd.log_debug().run_and_parse_json()?;
82    Ok(o)
83}
84
85// Retrieve a mounted filesystem from a device given a matching path
86fn findmnt_filesystem(args: &[&str], cwd: Option<&Dir>, path: &str) -> Result<Filesystem> {
87    let o = run_findmnt(args, cwd, Some(path))?;
88    o.filesystems
89        .into_iter()
90        .next()
91        .ok_or_else(|| anyhow!("findmnt returned no data for {path}"))
92}
93
94#[context("Inspecting filesystem {path}")]
95/// Inspect a target which must be a mountpoint root - it is an error
96/// if the target is not the mount root.
97pub fn inspect_filesystem(path: &Utf8Path) -> Result<Filesystem> {
98    findmnt_filesystem(&["--mountpoint"], None, path.as_str())
99}
100
101#[context("Inspecting filesystem")]
102/// Inspect a target which must be a mountpoint root - it is an error
103/// if the target is not the mount root.
104pub fn inspect_filesystem_of_dir(d: &Dir) -> Result<Filesystem> {
105    findmnt_filesystem(&["--mountpoint"], Some(d), ".")
106}
107
108#[context("Inspecting filesystem by UUID {uuid}")]
109/// Inspect a filesystem by partition UUID
110pub fn inspect_filesystem_by_uuid(uuid: &str) -> Result<Filesystem> {
111    findmnt_filesystem(&["--source"], None, &(format!("UUID={uuid}")))
112}
113
114/// Check if a specified device contains an already mounted filesystem
115/// in the root mount namespace.
116pub fn is_mounted_in_pid1_mountns(path: &str) -> Result<bool> {
117    let o = run_findmnt(&["-N"], None, Some("1"))?;
118
119    let mounted = o.filesystems.iter().any(|fs| is_source_mounted(path, fs));
120
121    Ok(mounted)
122}
123
124/// Recursively check a given filesystem to see if it contains an already mounted source.
125pub fn is_source_mounted(path: &str, mounted_fs: &Filesystem) -> bool {
126    if mounted_fs.source.contains(path) {
127        return true;
128    }
129
130    if let Some(ref children) = mounted_fs.children {
131        for child in children {
132            if is_source_mounted(path, child) {
133                return true;
134            }
135        }
136    }
137
138    false
139}
140
141/// Mount a device to the target path.
142pub fn mount(dev: &str, target: &Utf8Path) -> Result<()> {
143    Command::new("mount")
144        .args([dev, target.as_str()])
145        .run_inherited_with_cmd_context()
146}
147
148/// Mount a device with an explicit filesystem type.
149///
150/// This avoids relying on the `mount` utility's blkid auto-detection,
151/// which can fail in certain container environments (e.g. when the
152/// required filesystem kernel module is not yet loaded and the blkid
153/// probe doesn't work, causing mount to fall back to iterating
154/// `/etc/filesystems` and `/proc/filesystems`).
155pub fn mount_typed(dev: &str, fstype: &str, target: &Utf8Path) -> Result<()> {
156    Command::new("mount")
157        .args(["-t", fstype, dev, target.as_str()])
158        .run_inherited_with_cmd_context()
159}
160
161/// If the fsid of the passed path matches the fsid of the same path rooted
162/// at /proc/1/root, it is assumed that these are indeed the same mounted
163/// filesystem between container and host.
164/// Path should be absolute.
165#[context("Comparing filesystems at {path} and /proc/1/root/{path}")]
166pub fn is_same_as_host(path: &Utf8Path) -> Result<bool> {
167    // Add a leading '/' in case a relative path is passed
168    let path = Utf8Path::new("/").join(path);
169
170    // Using statvfs instead of fs, since rustix will translate the fsid field
171    // for us.
172    let devstat = rustix::fs::statvfs(path.as_std_path())?;
173    let hostpath = Utf8Path::new("/proc/1/root").join(path.strip_prefix("/")?);
174    let hostdevstat = rustix::fs::statvfs(hostpath.as_std_path())?;
175    tracing::trace!(
176        "base mount id {:?}, host mount id {:?}",
177        devstat.f_fsid,
178        hostdevstat.f_fsid
179    );
180    Ok(devstat.f_fsid == hostdevstat.f_fsid)
181}
182
183/// Given a pid, enter its mount namespace and acquire a file descriptor
184/// for a mount from that namespace.
185#[allow(unsafe_code)]
186#[context("Opening mount tree from pid")]
187pub fn open_tree_from_pidns(
188    pid: rustix::process::Pid,
189    path: &Utf8Path,
190    recursive: bool,
191) -> Result<OwnedFd> {
192    // Allocate a socket pair to use for sending file descriptors.
193    let (sock_parent, sock_child) = rustix::net::socketpair(
194        AddressFamily::UNIX,
195        SocketType::STREAM,
196        SocketFlags::CLOEXEC,
197        None,
198    )
199    .context("socketpair")?;
200    const DUMMY_DATA: &[u8] = b"!";
201    match unsafe { libc::fork() } {
202        0 => {
203            // We're in the child. At this point we know we don't have multiple threads, so we
204            // can safely `setns`.
205
206            drop(sock_parent);
207
208            // Open up the namespace of the target process as a file descriptor, and enter it.
209            let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?;
210            rustix::thread::move_into_link_name_space(
211                pidlink.as_fd(),
212                Some(rustix::thread::LinkNameSpaceType::Mount),
213            )
214            .context("setns")?;
215
216            // Open the target mount path as a file descriptor.
217            let recursive = if recursive {
218                OpenTreeFlags::AT_RECURSIVE
219            } else {
220                OpenTreeFlags::empty()
221            };
222            let fd = rustix::mount::open_tree(
223                rustix::fs::CWD,
224                path.as_std_path(),
225                OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive,
226            )
227            .context("open_tree")?;
228
229            // And send that file descriptor via fd passing over the socketpair.
230            let fd = fd.as_fd();
231            let fds = [fd];
232            let mut buffer = [MaybeUninit::uninit(); rustix::cmsg_space!(ScmRights(1))];
233            let mut control = SendAncillaryBuffer::new(&mut buffer);
234            let pushed = control.push(SendAncillaryMessage::ScmRights(&fds));
235            assert!(pushed);
236            let ios = std::io::IoSlice::new(DUMMY_DATA);
237            rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?;
238            // Then we're done.
239            std::process::exit(0)
240        }
241        -1 => {
242            // fork failed
243            let e = std::io::Error::last_os_error();
244            anyhow::bail!("failed to fork: {e}");
245        }
246        n => {
247            // We're in the parent; create a pid (checking that n > 0).
248            let pid = rustix::process::Pid::from_raw(n).unwrap();
249            drop(sock_child);
250            // Receive the mount file descriptor from the child
251            let mut cmsg_space = vec![MaybeUninit::uninit(); rustix::cmsg_space!(ScmRights(1))];
252            let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space);
253            let mut buf = [0u8; DUMMY_DATA.len()];
254            let iov = std::io::IoSliceMut::new(buf.as_mut());
255            let mut iov = [iov];
256            let nread = rustix::net::recvmsg(
257                sock_parent,
258                &mut iov,
259                &mut cmsg_buffer,
260                RecvFlags::CMSG_CLOEXEC,
261            )
262            .context("recvmsg")?
263            .bytes;
264            anyhow::ensure!(nread == DUMMY_DATA.len());
265            assert_eq!(buf, DUMMY_DATA);
266            // And extract the file descriptor
267            let r = cmsg_buffer
268                .drain()
269                .filter_map(|m| match m {
270                    rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f),
271                    _ => None,
272                })
273                .flatten()
274                .next()
275                .ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?;
276            // SAFETY: Since we're not setting WNOHANG, this will always return Some().
277            let st = rustix::process::waitpid(Some(pid), WaitOptions::empty())?
278                .expect("Wait status")
279                .1;
280            if let Some(0) = st.exit_status() {
281                Ok(r)
282            } else {
283                anyhow::bail!("forked helper failed: {st:?}");
284            }
285        }
286    }
287}
288
289/// Create a bind mount from the mount namespace of the target pid
290/// into our mount namespace.
291pub fn bind_mount_from_pidns(
292    pid: Pid,
293    src: &Utf8Path,
294    target: &Utf8Path,
295    recursive: bool,
296) -> Result<()> {
297    let src = open_tree_from_pidns(pid, src, recursive)?;
298    rustix::mount::move_mount(
299        src.as_fd(),
300        "",
301        rustix::fs::CWD,
302        target.as_std_path(),
303        MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
304    )
305    .context("Moving mount")?;
306    Ok(())
307}
308
309/// If the target path is not already mirrored from the host (e.g. via `-v /dev:/dev`)
310/// then recursively mount it.
311pub fn ensure_mirrored_host_mount(path: impl AsRef<Utf8Path>) -> Result<()> {
312    let path = path.as_ref();
313    // If we didn't have this in our filesystem already (e.g. for /var/lib/containers)
314    // then create it now.
315    std::fs::create_dir_all(path)?;
316    if is_same_as_host(path)? {
317        tracing::debug!("Already mounted from host: {path}");
318        return Ok(());
319    }
320    tracing::debug!("Propagating host mount: {path}");
321    bind_mount_from_pidns(PID1, path, path, true)
322}