Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-10-08 23:10:07

0001 #define _GNU_SOURCE 1
0002 #define _FILE_OFFSET_BITS 64
0003 #include "Utilities/StorageFactory/interface/LocalFileSystem.h"
0004 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0005 #include <cerrno>
0006 #include <cstdio>
0007 #include <cstdlib>
0008 #include <cstring>
0009 #include <cassert>
0010 #include <sys/param.h>
0011 #if BSD
0012 #include <sys/statvfs.h>
0013 #include <sys/ucred.h>
0014 #include <sys/mount.h>
0015 #else
0016 #include <mntent.h>
0017 #include <sys/vfs.h>
0018 #endif
0019 #include <sys/stat.h>
0020 #include <unistd.h>
0021 #include <iostream>
0022 #include <atomic>
0023 #include <memory>
0024 
0025 using namespace edm::storage;
0026 
0027 /// Information about file systems on this node.
0028 struct LocalFileSystem::FSInfo {
0029   char *fsname;               //< file system name
0030   char *type;                 //< file system type
0031   char *dir;                  //< mount point directory
0032   char *origin = nullptr;     //< mount origin
0033   dev_t dev;                  //< device id
0034   long fstype;                //< file system id
0035   double freespc;             //< free space in megabytes
0036   unsigned local : 1;         //< flag for local device
0037   unsigned bind : 1;          //< flag for bind mounts
0038   std::atomic<bool> checked;  //< flag for valid dev, fstype
0039 };
0040 
0041 /** Read /proc/filesystems to determine which filesystems are local,
0042     meaning access latency is tolerably small, and operating system
0043     buffer cache will likely do a good job at caching file contents
0044     and accelerate many small file operations reasonably well.
0045 
0046     The /proc list enumerates all filesystems known by the kernel,
0047     except a few special ones like /dev and /selinux. The ones marked
0048     as "nodev" have unstable device definition, meaning they are some
0049     way or another "virtual" file systems.  This labeling is used by
0050     kernel nfsd to determine which file systems are safe for exporting
0051     without help (fixing fsid), and turns out to be close enough to
0052     list of file systems that we can consider to be high-speed local,
0053     minus a few exceptions.  Everything else we consider "remote" or
0054     "slow" file systems where application should prefer massive bulk
0055     streaming I/O for better performance.
0056 
0057     The exceptions to /proc/filesystems list: lustre and fuse file
0058     systems are forced to remote status. Everything else like NFS,
0059     AFS, GPFS and various cluster-based systems are already remote. */
0060 int LocalFileSystem::readFSTypes() {
0061   int ret = 0;
0062 
0063 #if __linux__
0064   constexpr char procfs[] = "/proc/filesystems";
0065   auto close_ = [](FILE *iFile) { fclose(iFile); };
0066   std::unique_ptr<FILE, decltype(close_)> fs(fopen(procfs, "r"), close_);
0067   if (!fs) {
0068     int nerr = errno;
0069     edm::LogWarning("LocalFileSystem::readFSTypes()")
0070         << "Cannot read '" << procfs << "': " << strerror(nerr) << " (error " << nerr << ")";
0071     return -1;
0072   }
0073 
0074   ssize_t nread;
0075   int line = 0;
0076   auto free_ = [](char **iPtr) { free(*iPtr); };
0077   while (!feof(fs.get())) {
0078     char *type = nullptr;
0079     std::unique_ptr<char *, decltype(free_)> freeType(&type, free_);
0080 
0081     size_t len = 0;
0082     ++line;
0083 
0084     if ((nread = getdelim(&type, &len, '\t', fs.get())) == -1 && !feof(fs.get())) {
0085       edm::LogError("LocalFileSystem::readFSTypes()")
0086           .format("{}:{}: {} ({}; 1)\n", procfs, line, strerror(errno), nread);
0087       ret = -1;
0088       break;
0089     }
0090 
0091     char *fstype = nullptr;
0092     std::unique_ptr<char *, decltype(free_)> freeFSType(&fstype, free_);
0093     if ((nread = getdelim(&fstype, &len, '\n', fs.get())) == -1 && !feof(fs.get())) {
0094       edm::LogError("LocalFileSystem::readFSTypes()")
0095           .format("{}:{}: {} ({}; 2)\n", procfs, line, strerror(errno), nread);
0096       ret = -1;
0097       break;
0098     }
0099 
0100     if (feof(fs.get())) {
0101       break;
0102     }
0103 
0104     if (!strcmp(type, "nodev\t") || !strcmp(fstype, "lustre\n") || !strncmp(fstype, "fuse", 4)) {
0105       continue;
0106     }
0107 
0108     assert(nread >= 1);
0109     fstype[nread - 1] = 0;
0110     fstypes_.push_back(fstype);
0111   }
0112 #endif  // __linux__
0113 
0114   return ret;
0115 }
0116 
0117 /** Initialise file system description from /etc/mtab info.
0118 
0119     This function saves the information from getmntent(), matching the
0120     file system type to the known local ones.  It only remembers the
0121     information from /etc/mtab, so the dev and fstype attributes are
0122     not yet valid; call statFSInfo() to fill those in.  This avoids
0123     touching irrelevant filesystems unnecessarily; the file system may
0124     not be fully functional, or partially offline, or just very slow. */
0125 LocalFileSystem::FSInfo *LocalFileSystem::initFSInfo(void *arg) {
0126 #if BSD
0127   struct statfs *m = static_cast<struct statfs *>(arg);
0128   size_t infolen = sizeof(struct FSInfo);
0129   size_t fslen = strlen(m->f_mntfromname) + 1;
0130   size_t dirlen = strlen(m->f_mntonname) + 1;
0131   size_t typelen = strlen(m->f_fstypename) + 1;
0132   size_t totlen = infolen + fslen + dirlen + typelen;
0133   FSInfo *i = (FSInfo *)malloc(totlen);
0134   char *p = (char *)i;
0135   i->fsname = strncpy(p += infolen, m->f_mntfromname, fslen);
0136   i->type = strncpy(p += fslen, m->f_fstypename, typelen);
0137   i->dir = strncpy(p += typelen, m->f_mntonname, dirlen);
0138   i->dev = m->f_fsid.val[0];
0139   i->fstype = m->f_type;
0140   i->freespc = 0;
0141   i->bind = 0;
0142   i->origin = nullptr;
0143   if (m->f_bsize > 0) {
0144     i->freespc = m->f_bavail;
0145     i->freespc *= m->f_bsize;
0146     i->freespc /= 1024. * 1024. * 1024.;
0147   }
0148   /* FIXME: This incorrectly says that mounted disk images are local,
0149      even if it was mounted from a network server. The alternative is
0150      to walk up the device tree using either a) process IORegistry to
0151      get the device tree, which lists devices for disk images, and from
0152      there translate volume uuid to a mount point; b) parse output from
0153      'hdiutil info -plist' to determine image-path / dev-entry map. */
0154   i->local = ((m->f_flags & MNT_LOCAL) ? 1 : 0);
0155   i->checked = 1;
0156   return i;
0157 
0158 #else   // ! BSD
0159   mntent *m = static_cast<mntent *>(arg);
0160   size_t infolen = sizeof(struct FSInfo);
0161   size_t fslen = strlen(m->mnt_fsname) + 1;
0162   size_t dirlen = strlen(m->mnt_dir) + 1;
0163   size_t typelen = strlen(m->mnt_type) + 1;
0164   size_t originlen = strlen(m->mnt_fsname) + 1;
0165   size_t totlen = infolen + fslen + dirlen + typelen + originlen;
0166   FSInfo *i = (FSInfo *)malloc(totlen);
0167   char *p = (char *)i;
0168   i->fsname = static_cast<char *>(memcpy(p += infolen, m->mnt_fsname, fslen));
0169   i->type = static_cast<char *>(memcpy(p += fslen, m->mnt_type, typelen));
0170   i->dir = static_cast<char *>(memcpy(p += typelen, m->mnt_dir, dirlen));
0171   i->origin = static_cast<char *>(memcpy(p + dirlen, m->mnt_fsname, originlen));
0172   i->dev = -1;
0173   i->fstype = -1;
0174   i->freespc = 0;
0175   i->local = 0;
0176   i->checked = false;
0177   i->bind = strstr(m->mnt_opts, "bind") != nullptr;
0178 
0179   for (size_t j = 0; j < fstypes_.size() && !i->local; ++j)
0180     if (fstypes_[j] == i->type)
0181       i->local = 1;
0182 #endif  // BSD
0183 
0184   return i;
0185 }
0186 
0187 /** Initialise the list of currently mounted file systems.
0188 
0189     Reads /etc/mtab (or equivalent) to determine all currently mounted
0190     file systems, and initialises FSInfo structure for them.  It does
0191     not yet call statFSInfo() on them, so the device and file type ids
0192     are not yet complete. */
0193 int LocalFileSystem::initFSList() {
0194 #if BSD
0195   int rc;
0196   struct statfs *mtab = 0;
0197   if ((rc = getmntinfo(&mtab, MNT_NOWAIT)) < 0) {
0198     int nerr = errno;
0199     edm::LogWarning("LocalFileSystem::initFSList()")
0200         << "getmntinfo() failed: " << strerror(nerr) << " (error " << nerr << ")";
0201     return -1;
0202   }
0203 
0204   fs_.reserve(rc);
0205   for (int ix = 0; ix < rc; ++ix)
0206     fs_.push_back(initFSInfo(&mtab[ix]));
0207 
0208   free(mtab);
0209 #else
0210   const char *const _PATH_MOUNTED_LINUX = "/proc/self/mounts";
0211   struct mntent *m;
0212   FILE *mtab = setmntent(_PATH_MOUNTED_LINUX, "r");
0213   if (!mtab) {
0214     int nerr = errno;
0215     edm::LogWarning("LocalFileSystem::initFSList()")
0216         << "Cannot read '" << _PATH_MOUNTED_LINUX << "': " << strerror(nerr) << " (error " << nerr << ")";
0217     return -1;
0218   }
0219 
0220   fs_.reserve(20);
0221   while ((m = getmntent(mtab)))
0222     fs_.push_back(initFSInfo(m));
0223 
0224   endmntent(mtab);
0225 #endif
0226 
0227   return 0;
0228 }
0229 
0230 /** Figure out file system device and type ids.
0231 
0232     Calls stat() and statfs() on the file system to determine device
0233     and file system type ids.  These are required to determine if two
0234     paths are actually on the same file system.
0235 
0236     This function can be called any number of times.  It only does the
0237     file system check the first time the function is called. */
0238 int LocalFileSystem::statFSInfo(FSInfo *i) const {
0239   int ret = 0;
0240   struct stat s;
0241   struct statfs sfs;
0242 
0243   if (!i->checked) {
0244     if (lstat(i->dir, &s) < 0) {
0245       i->checked = true;
0246 
0247       int nerr = errno;
0248       if (nerr != ENOENT && nerr != EACCES)
0249         edm::LogWarning("LocalFileSystem::statFSInfo()")
0250             << "Cannot lstat('" << i->dir << "'): " << strerror(nerr) << " (error " << nerr << ")";
0251       return -1;
0252     }
0253 
0254     if (statfs(i->dir, &sfs) < 0) {
0255       i->checked = true;
0256       int nerr = errno;
0257       edm::LogWarning("LocalFileSystem::statFSInfo()")
0258           << "Cannot statfs('" << i->dir << "'): " << strerror(nerr) << " (error " << nerr << ")";
0259       return -1;
0260     }
0261 
0262     i->dev = s.st_dev;
0263     i->fstype = sfs.f_type;
0264     if (sfs.f_bsize > 0) {
0265       i->freespc = sfs.f_bavail;
0266       i->freespc *= sfs.f_bsize;
0267       i->freespc /= 1024. * 1024. * 1024.;
0268     }
0269     i->checked = true;
0270   } else if (i->fstype == -1) {
0271     errno = ENOENT;
0272     ret = -1;
0273   }
0274 
0275   return ret;
0276 }
0277 
0278 /** Find the file system @a path was mounted from.  The statfs() and
0279     stat() information for @a path should be in @a sfs and @a s,
0280     respectively.
0281 
0282     Finds currently mounted file system that @a path is owned by, and
0283     returns the FSInfo object for it, or null if no matching live file
0284     system can be found.  If the return value is non-null, then it is
0285     guaranteed @a path was on that file system.
0286 
0287     A null return value is possible for certain paths which are not on
0288     any mounted file system (e.g. /dev or /selinux), or if the file
0289     system is unavailable or some other way dysfunctional, such as
0290     dead nfs mount or filesystem does not implement statfs().  */
0291 LocalFileSystem::FSInfo *LocalFileSystem::findMount(const char *path,
0292                                                     struct statfs *sfs,
0293                                                     struct stat *s,
0294                                                     std::vector<std::string> &prev_paths) const {
0295   for (const auto &old_path : prev_paths) {
0296     if (!strcmp(old_path.c_str(), path)) {
0297       edm::LogWarning("LocalFileSystem::findMount()") << "Found a loop in bind mounts; stopping evaluation.";
0298       return nullptr;
0299     }
0300   }
0301 
0302   FSInfo *best = nullptr;
0303   size_t bestlen = 0;
0304   size_t len = strlen(path);
0305   for (size_t i = 0; i < fs_.size(); ++i) {
0306     // First match simply against the file system path.  We don't
0307     // touch the file system until the path prefix matches.
0308     // When we have a path prefix match, check the file system if
0309     //   we don't have a best match candidate yet, OR
0310     //   this match is longer (more specific) than the previous best OR
0311     //   this match is the same length and the previous best isn't local
0312     // The final condition handles cases such as '/' that can appear twice
0313     // in the file system list, once as 'rootfs' and once as local fs.
0314     size_t fslen = strlen(fs_[i]->dir);
0315     if (!strncmp(fs_[i]->dir, path, fslen) &&
0316         ((fslen == 1 && fs_[i]->dir[0] == '/') || len == fslen || path[fslen] == '/') &&
0317         (!best || fslen > bestlen || (fslen == bestlen && !best->local))) {
0318       // Get the file system device and file system ids.
0319       if (statFSInfo(fs_[i]) < 0)
0320         return nullptr;
0321 
0322       // Check the path is on the same device / file system.  If this
0323       // fails, we found a better prefix match on path, but it's the
0324       // wrong device, so reset our idea of the best match: it can't
0325       // be the outer mount any more.  Not sure this is the right
0326       // thing to do with e.g. loop-back or union mounts.
0327       if (fs_[i]->dev != s->st_dev || fs_[i]->fstype != sfs->f_type) {
0328         best = nullptr;
0329         continue;
0330       }
0331 
0332       // OK this is better than anything else we found so far.
0333       best = fs_[i];
0334       bestlen = fslen;
0335     }
0336   }
0337   // In the case of a bind mount, try looking again at the source directory.
0338   if (best && best->bind && best->origin) {
0339     struct stat s2;
0340     struct statfs sfs2;
0341     std::unique_ptr<char, decltype(std::free) *> fullpath{realpath(best->origin, nullptr), std::free};
0342 
0343     if (!fullpath)
0344       fullpath.reset(strdup(best->origin));
0345 
0346     if (lstat(fullpath.get(), &s2) < 0) {
0347       int nerr = errno;
0348       edm::LogWarning("LocalFileSystem::findMount()") << "Cannot lstat('" << fullpath.get() << "' alias '" << path
0349                                                       << "'): " << strerror(nerr) << " (error " << nerr << ")";
0350       return best;
0351     }
0352 
0353     if (statfs(fullpath.get(), &sfs2) < 0) {
0354       int nerr = errno;
0355       edm::LogWarning("LocalFileSystem::findMount()") << "Cannot statfs('" << fullpath.get() << "' alias '" << path
0356                                                       << "'): " << strerror(nerr) << " (error " << nerr << ")";
0357       return best;
0358     }
0359 
0360     prev_paths.push_back(path);
0361     LocalFileSystem::FSInfo *new_best = findMount(fullpath.get(), &sfs2, &s2, prev_paths);
0362     return new_best ? new_best : best;
0363   }
0364 
0365   return best;
0366 }
0367 
0368 /** Determine if @a path is on a file system known to be local.
0369 
0370     Returns @c true if the path is definitely known to be local.
0371     Returns @c false otherwise, including when it's not possible to
0372     determine anything about the path at all.
0373 
0374     Does not throw exceptions.  If any errors occur, the errors are
0375     reported as message logger warnings but the actual error is
0376     swallowed and the function simply returns @c false. */
0377 bool LocalFileSystem::isLocalPath(const std::string &path) const {
0378   struct stat s;
0379   struct statfs sfs;
0380   std::unique_ptr<char, decltype(std::free) *> fullpath{realpath(path.c_str(), nullptr), std::free};
0381 
0382   if (!fullpath)
0383     fullpath.reset(strdup(path.c_str()));
0384 
0385   if (lstat(fullpath.get(), &s) < 0) {
0386     int nerr = errno;
0387     edm::LogWarning("LocalFileSystem::isLocalPath()") << "Cannot lstat('" << fullpath.get() << "' alias '" << path
0388                                                       << "'): " << strerror(nerr) << " (error " << nerr << ")";
0389     return false;
0390   }
0391 
0392   if (statfs(fullpath.get(), &sfs) < 0) {
0393     int nerr = errno;
0394     edm::LogWarning("LocalFileSystem::isLocalPath()") << "Cannot statfs('" << fullpath.get() << "' alias '" << path
0395                                                       << "'): " << strerror(nerr) << " (error " << nerr << ")";
0396     return false;
0397   }
0398 
0399   std::vector<std::string> prev_paths;
0400   FSInfo *m = findMount(fullpath.get(), &sfs, &s, prev_paths);
0401   return m ? m->local : false;
0402 }
0403 
0404 /** Find a writeable directory among @a paths which is known to be
0405     local and has at least @a minFreeSpace amount of free space in
0406     gigabytes.
0407 
0408     The @a paths should contain list of relative or absolute candidate
0409     directories.  If an entry starts with letter "$" then the value of
0410     that environment variable is used instead; if the value is $TMPDIR
0411     and the environment variable is empty, "/tmp" is used instead.
0412 
0413     Returns the first path in @a paths which satisfies the criteria,
0414     expanded to environment variable value if appropriate, resolved
0415     to full absolute path.  If no suitable path can be found, returns
0416     an empty string.
0417 
0418     Does not throw exceptions.  If any serious errors occur, the errors
0419     are reported as message logger warnings but the actual error is
0420     swallowed and the directory concerned is skipped.  Non-existent
0421     and inaccessible directories are silently ignored without warning. */
0422 std::pair<std::string, std::string> LocalFileSystem::findCachePath(const std::vector<std::string> &paths,
0423                                                                    double minFreeSpace) const {
0424   struct stat s;
0425   struct statfs sfs;
0426   std::ostringstream warningst;
0427   warningst << "Cannot use lazy-download because:\n";
0428 
0429   for (size_t i = 0, e = paths.size(); i < e; ++i) {
0430     const char *inpath = paths[i].c_str();
0431     const char *path = inpath;
0432 
0433     if (*path == '$') {
0434       char *p = std::getenv(path + 1);
0435       if (p && *p)
0436         path = p;
0437       else if (!strcmp(path, "$TMPDIR"))
0438         path = "/tmp";
0439     }
0440 
0441     std::unique_ptr<char, decltype(std::free) *> fullpath{realpath(path, nullptr), std::free};
0442     if (!fullpath)
0443       fullpath.reset(strdup(path));
0444 
0445 #if 0
0446     std::cerr /* edm::LogInfo("LocalFileSystem") */
0447       << "Checking if '" << fullpath.get() << "', from '"
0448       << inpath << "' is valid cache path with "
0449       << minFreeSpace << " free space" << std::endl;
0450 #endif
0451 
0452     if (lstat(fullpath.get(), &s) < 0) {
0453       int nerr = errno;
0454       if (nerr != ENOENT && nerr != EACCES)
0455         edm::LogWarning("LocalFileSystem::findCachePath()")
0456             << "Cannot lstat('" << fullpath.get() << "', from '" << inpath << "'): " << strerror(nerr) << " (error "
0457             << nerr << ")";
0458       continue;
0459     }
0460 
0461     if (statfs(fullpath.get(), &sfs) < 0) {
0462       int nerr = errno;
0463       edm::LogWarning("LocalFileSystem::findCachePath()")
0464           << "Cannot statfs('" << fullpath.get() << "', from '" << inpath << "'): " << strerror(nerr) << " (error "
0465           << nerr << ")";
0466       continue;
0467     }
0468 
0469     std::vector<std::string> prev_paths;
0470     FSInfo *m = findMount(fullpath.get(), &sfs, &s, prev_paths);
0471 #if 0
0472     std::cerr /* edm::LogInfo("LocalFileSystem") */
0473       << "Candidate '" << fullpath.get() << "': "
0474       << "found=" << (m ? 1 : 0)
0475       << " local=" << (m && m->local)
0476       << " free=" << (m ? m->freespc : 0)
0477       << " access=" << access(fullpath.get(), W_OK)
0478       << std::endl;
0479 #endif
0480 
0481     if (m && m->local && m->freespc >= minFreeSpace && access(fullpath.get(), W_OK) == 0) {
0482       return std::make_pair(std::string(fullpath.get()), std::string());
0483     } else if (m) {
0484       if (!m->local) {
0485         warningst << "- The mount " << fullpath.get() << " is not local.\n";
0486       } else if (m->freespc < minFreeSpace) {
0487         warningst << " - The mount at " << fullpath.get() << " has only " << m->freespc << " GB free; a minumum of "
0488                   << minFreeSpace << " GB is required.\n";
0489       } else if (access(fullpath.get(), W_OK)) {
0490         warningst << " - The process has no permission to write into " << fullpath.get() << "\n";
0491       }
0492     }
0493   }
0494 
0495   std::string warning_str = warningst.str();
0496   if (!warning_str.empty()) {
0497     warning_str = warning_str.substr(0, warning_str.size() - 2);
0498   }
0499 
0500   return std::make_pair(std::string(), std::move(warning_str));
0501 }
0502 
0503 /** Initialise local file system status.  */
0504 LocalFileSystem::LocalFileSystem() {
0505   if (readFSTypes() < 0)
0506     return;
0507 
0508   if (initFSList() < 0)
0509     return;
0510 }
0511 
0512 /** Free local file system status resources. */
0513 LocalFileSystem::~LocalFileSystem() {
0514   for (size_t i = 0, e = fs_.size(); i < e; ++i)
0515     free(fs_[i]);
0516 }