Back to home page

Project CMSSW displayed by LXR

 
 

    


File indexing completed on 2024-04-06 12:31:52

0001 #define _GNU_SOURCE 1
0002 #define _FILE_OFFSET_BITS 64
0003 #include "Utilities/StorageFactory/interface/LocalFileSystem.h"
0004 #include "FWCore/MessageLogger/interface/MessageLogger.h"
0005 #include <cerrno>
0006 #include <cstdio>
0007 #include <cstdlib>
0008 #include <cstring>
0009 #include <cassert>
0010 #include <sys/param.h>
0011 #if BSD
0012 #include <sys/statvfs.h>
0013 #include <sys/ucred.h>
0014 #include <sys/mount.h>
0015 #else
0016 #include <mntent.h>
0017 #include <sys/vfs.h>
0018 #endif
0019 #include <sys/stat.h>
0020 #include <unistd.h>
0021 #include <iostream>
0022 #include <atomic>
0023 #include <memory>
0024 
0025 using namespace edm::storage;
0026 
0027 /// Information about file systems on this node.
0028 struct LocalFileSystem::FSInfo {
0029   char *fsname;               //< file system name
0030   char *type;                 //< file system type
0031   char *dir;                  //< mount point directory
0032   char *origin = nullptr;     //< mount origin
0033   dev_t dev;                  //< device id
0034   long fstype;                //< file system id
0035   double freespc;             //< free space in megabytes
0036   unsigned local : 1;         //< flag for local device
0037   unsigned bind : 1;          //< flag for bind mounts
0038   std::atomic<bool> checked;  //< flag for valid dev, fstype
0039 };
0040 
0041 /** Read /proc/filesystems to determine which filesystems are local,
0042     meaning access latency is tolerably small, and operating system
0043     buffer cache will likely do a good job at caching file contents
0044     and accelerate many small file operations reasonably well.
0045 
0046     The /proc list enumerates all filesystems known by the kernel,
0047     except a few special ones like /dev and /selinux. The ones marked
0048     as "nodev" have unstable device definition, meaning they are some
0049     way or another "virtual" file systems.  This labeling is used by
0050     kernel nfsd to determine which file systems are safe for exporting
0051     without help (fixing fsid), and turns out to be close enough to
0052     list of file systems that we can consider to be high-speed local,
0053     minus a few exceptions.  Everything else we consider "remote" or
0054     "slow" file systems where application should prefer massive bulk
0055     streaming I/O for better performance.
0056 
0057     The exceptions to /proc/filesystems list: lustre and fuse file
0058     systems are forced to remote status. Everything else like NFS,
0059     AFS, GPFS and various cluster-based systems are already remote. */
0060 int LocalFileSystem::readFSTypes() {
0061   int ret = 0;
0062 
0063 #if __linux__
0064   constexpr char procfs[] = "/proc/filesystems";
0065   auto close_ = [](FILE *iFile) { fclose(iFile); };
0066   std::unique_ptr<FILE, decltype(close_)> fs(fopen(procfs, "r"), close_);
0067   if (!fs) {
0068     int nerr = errno;
0069     edm::LogWarning("LocalFileSystem::readFSTypes()")
0070         << "Cannot read '" << procfs << "': " << strerror(nerr) << " (error " << nerr << ")";
0071     return -1;
0072   }
0073 
0074   ssize_t nread;
0075   int line = 0;
0076   auto free_ = [](char **iPtr) { free(*iPtr); };
0077   while (!feof(fs.get())) {
0078     char *type = nullptr;
0079     std::unique_ptr<char *, decltype(free_)> freeType(&type, free_);
0080 
0081     size_t len = 0;
0082     ++line;
0083 
0084     if ((nread = getdelim(&type, &len, '\t', fs.get())) == -1 && !feof(fs.get())) {
0085       edm::LogError("LocalFileSystem::readFSTypes()")
0086           .format("{}:{}: {} ({}; 1)\n", procfs, line, strerror(errno), nread);
0087       ret = -1;
0088       break;
0089     }
0090 
0091     char *fstype = nullptr;
0092     std::unique_ptr<char *, decltype(free_)> freeFSType(&fstype, free_);
0093     if ((nread = getdelim(&fstype, &len, '\n', fs.get())) == -1 && !feof(fs.get())) {
0094       edm::LogError("LocalFileSystem::readFSTypes()")
0095           .format("{}:{}: {} ({}; 2)\n", procfs, line, strerror(errno), nread);
0096       ret = -1;
0097       break;
0098     }
0099 
0100     if (feof(fs.get())) {
0101       break;
0102     }
0103 
0104     if (!strcmp(type, "nodev\t") || !strcmp(fstype, "lustre\n") || !strncmp(fstype, "fuse", 4)) {
0105       continue;
0106     }
0107 
0108     assert(nread >= 1);
0109     fstype[nread - 1] = 0;
0110     fstypes_.push_back(fstype);
0111   }
0112 #endif  // __linux__
0113 
0114   return ret;
0115 }
0116 
0117 /** Initialise file system description from /etc/mtab info.
0118 
0119     This function saves the information from getmntent(), matching the
0120     file system type to the known local ones.  It only remembers the
0121     information from /etc/mtab, so the dev and fstype attributes are
0122     not yet valid; call statFSInfo() to fill those in.  This avoids
0123     touching irrelevant filesystems unnecessarily; the file system may
0124     not be fully functional, or partially offline, or just very slow. */
0125 LocalFileSystem::FSInfo *LocalFileSystem::initFSInfo(void *arg) {
0126 #if BSD
0127   struct statfs *m = static_cast<struct statfs *>(arg);
0128   size_t infolen = sizeof(struct FSInfo);
0129   size_t fslen = strlen(m->f_mntfromname) + 1;
0130   size_t dirlen = strlen(m->f_mntonname) + 1;
0131   size_t typelen = strlen(m->f_fstypename) + 1;
0132   size_t totlen = infolen + fslen + dirlen + typelen;
0133   FSInfo *i = (FSInfo *)malloc(totlen);
0134   char *p = (char *)i;
0135   i->fsname = strncpy(p += infolen, m->f_mntfromname, fslen);
0136   i->type = strncpy(p += fslen, m->f_fstypename, typelen);
0137   i->dir = strncpy(p += typelen, m->f_mntonname, dirlen);
0138   i->dev = m->f_fsid.val[0];
0139   i->fstype = m->f_type;
0140   i->freespc = 0;
0141   i->bind = 0;
0142   i->origin = nullptr;
0143   if (m->f_bsize > 0) {
0144     i->freespc = m->f_bavail;
0145     i->freespc *= m->f_bsize;
0146     i->freespc /= 1024. * 1024. * 1024.;
0147   }
0148   /* FIXME: This incorrectly says that mounted disk images are local,
0149      even if it was mounted from a network server. The alternative is
0150      to walk up the device tree using either a) process IORegistry to
0151      get the device tree, which lists devices for disk images, and from
0152      there translate volume uuid to a mount point; b) parse output from
0153      'hdiutil info -plist' to determine image-path / dev-entry map. */
0154   i->local = ((m->f_flags & MNT_LOCAL) ? 1 : 0);
0155   i->checked = 1;
0156   return i;
0157 
0158 #else   // ! BSD
0159   mntent *m = static_cast<mntent *>(arg);
0160   size_t infolen = sizeof(struct FSInfo);
0161   size_t fslen = strlen(m->mnt_fsname) + 1;
0162   size_t dirlen = strlen(m->mnt_dir) + 1;
0163   size_t typelen = strlen(m->mnt_type) + 1;
0164   size_t originlen = strlen(m->mnt_fsname) + 1;
0165   size_t totlen = infolen + fslen + dirlen + typelen + originlen;
0166   FSInfo *i = (FSInfo *)malloc(totlen);
0167   char *p = (char *)i;
0168   i->fsname = static_cast<char *>(memcpy(p += infolen, m->mnt_fsname, fslen));
0169   i->type = static_cast<char *>(memcpy(p += fslen, m->mnt_type, typelen));
0170   i->dir = static_cast<char *>(memcpy(p += typelen, m->mnt_dir, dirlen));
0171   i->origin = static_cast<char *>(memcpy(p += dirlen, m->mnt_fsname, originlen));
0172   i->dev = -1;
0173   i->fstype = -1;
0174   i->freespc = 0;
0175   i->local = 0;
0176   i->checked = false;
0177   i->bind = strstr(m->mnt_opts, "bind") != nullptr;
0178 
0179   for (size_t j = 0; j < fstypes_.size() && !i->local; ++j)
0180     if (fstypes_[j] == i->type)
0181       i->local = 1;
0182 #endif  // BSD
0183 
0184   return i;
0185 }
0186 
0187 /** Initialise the list of currently mounted file systems.
0188 
0189     Reads /etc/mtab (or equivalent) to determine all currently mounted
0190     file systems, and initialises FSInfo structure for them.  It does
0191     not yet call statFSInfo() on them, so the device and file type ids
0192     are not yet complete. */
0193 int LocalFileSystem::initFSList() {
0194 #if BSD
0195   int rc;
0196   struct statfs *mtab = 0;
0197   if ((rc = getmntinfo(&mtab, MNT_NOWAIT)) < 0) {
0198     int nerr = errno;
0199     edm::LogWarning("LocalFileSystem::initFSList()")
0200         << "getmntinfo() failed: " << strerror(nerr) << " (error " << nerr << ")";
0201     return -1;
0202   }
0203 
0204   fs_.reserve(rc);
0205   for (int ix = 0; ix < rc; ++ix)
0206     fs_.push_back(initFSInfo(&mtab[ix]));
0207 
0208   free(mtab);
0209 #else
0210   const char *const _PATH_MOUNTED_LINUX = "/proc/self/mounts";
0211   struct mntent *m;
0212   FILE *mtab = setmntent(_PATH_MOUNTED_LINUX, "r");
0213   if (!mtab) {
0214     int nerr = errno;
0215     edm::LogWarning("LocalFileSystem::initFSList()")
0216         << "Cannot read '" << _PATH_MOUNTED_LINUX << "': " << strerror(nerr) << " (error " << nerr << ")";
0217     return -1;
0218   }
0219 
0220   fs_.reserve(20);
0221   while ((m = getmntent(mtab)))
0222     fs_.push_back(initFSInfo(m));
0223 
0224   endmntent(mtab);
0225 #endif
0226 
0227   return 0;
0228 }
0229 
0230 /** Figure out file system device and type ids.
0231 
0232     Calls stat() and statfs() on the file system to determine device
0233     and file system type ids.  These are required to determine if two
0234     paths are actually on the same file system.
0235 
0236     This function can be called any number of times.  It only does the
0237     file system check the first time the function is called. */
0238 int LocalFileSystem::statFSInfo(FSInfo *i) const {
0239   int ret = 0;
0240   struct stat s;
0241   struct statfs sfs;
0242 
0243   if (!i->checked) {
0244     if (lstat(i->dir, &s) < 0) {
0245       i->checked = true;
0246 
0247       int nerr = errno;
0248       if (nerr != ENOENT && nerr != EACCES)
0249         edm::LogWarning("LocalFileSystem::statFSInfo()")
0250             << "Cannot lstat('" << i->dir << "'): " << strerror(nerr) << " (error " << nerr << ")";
0251       return -1;
0252     }
0253 
0254     if (statfs(i->dir, &sfs) < 0) {
0255       i->checked = true;
0256       int nerr = errno;
0257       edm::LogWarning("LocalFileSystem::statFSInfo()")
0258           << "Cannot statfs('" << i->dir << "'): " << strerror(nerr) << " (error " << nerr << ")";
0259       return -1;
0260     }
0261 
0262     i->dev = s.st_dev;
0263     i->fstype = sfs.f_type;
0264     if (sfs.f_bsize > 0) {
0265       i->freespc = sfs.f_bavail;
0266       i->freespc *= sfs.f_bsize;
0267       i->freespc /= 1024. * 1024. * 1024.;
0268     }
0269     i->checked = true;
0270   } else if (i->fstype == -1) {
0271     errno = ENOENT;
0272     ret = -1;
0273   }
0274 
0275   return ret;
0276 }
0277 
0278 /** Find the file system @a path was mounted from.  The statfs() and
0279     stat() information for @a path should be in @a sfs and @a s,
0280     respectively.
0281 
0282     Finds currently mounted file system that @a path is owned by, and
0283     returns the FSInfo object for it, or null if no matching live file
0284     system can be found.  If the return value is non-null, then it is
0285     guaranteed @a path was on that file system.
0286 
0287     A null return value is possible for certain paths which are not on
0288     any mounted file system (e.g. /dev or /selinux), or if the file
0289     system is unavailable or some other way dysfunctional, such as
0290     dead nfs mount or filesystem does not implement statfs().  */
0291 LocalFileSystem::FSInfo *LocalFileSystem::findMount(const char *path,
0292                                                     struct statfs *sfs,
0293                                                     struct stat *s,
0294                                                     std::vector<std::string> &prev_paths) const {
0295   for (const auto &old_path : prev_paths) {
0296     if (!strcmp(old_path.c_str(), path)) {
0297       edm::LogWarning("LocalFileSystem::findMount()") << "Found a loop in bind mounts; stopping evaluation.";
0298       return nullptr;
0299     }
0300   }
0301 
0302   FSInfo *best = nullptr;
0303   size_t bestlen = 0;
0304   size_t len = strlen(path);
0305   for (size_t i = 0; i < fs_.size(); ++i) {
0306     // First match simply against the file system path.  We don't
0307     // touch the file system until the path prefix matches.
0308     // When we have a path prefix match, check the file system if
0309     //   we don't have a best match candidate yet, OR
0310     //   this match is longer (more specific) than the previous best OR
0311     //   this match is the same length and the previous best isn't local
0312     // The final condition handles cases such as '/' that can appear twice
0313     // in the file system list, once as 'rootfs' and once as local fs.
0314     size_t fslen = strlen(fs_[i]->dir);
0315     if (!strncmp(fs_[i]->dir, path, fslen) &&
0316         ((fslen == 1 && fs_[i]->dir[0] == '/') || len == fslen || path[fslen] == '/') &&
0317         (!best || fslen > bestlen || (fslen == bestlen && !best->local))) {
0318       // Get the file system device and file system ids.
0319       if (statFSInfo(fs_[i]) < 0)
0320         return nullptr;
0321 
0322       // Check the path is on the same device / file system.  If this
0323       // fails, we found a better prefix match on path, but it's the
0324       // wrong device, so reset our idea of the best match: it can't
0325       // be the outer mount any more.  Not sure this is the right
0326       // thing to do with e.g. loop-back or union mounts.
0327       if (fs_[i]->dev != s->st_dev || fs_[i]->fstype != sfs->f_type) {
0328         best = nullptr;
0329         continue;
0330       }
0331 
0332       // OK this is better than anything else we found so far.
0333       best = fs_[i];
0334       bestlen = fslen;
0335     }
0336   }
0337   // In the case of a bind mount, try looking again at the source directory.
0338   if (best && best->bind && best->origin) {
0339     struct stat s2;
0340     struct statfs sfs2;
0341     char *fullpath = realpath(best->origin, nullptr);
0342 
0343     if (!fullpath)
0344       fullpath = strdup(best->origin);
0345 
0346     if (lstat(fullpath, &s2) < 0) {
0347       int nerr = errno;
0348       edm::LogWarning("LocalFileSystem::findMount()") << "Cannot lstat('" << fullpath << "' alias '" << path
0349                                                       << "'): " << strerror(nerr) << " (error " << nerr << ")";
0350       free(fullpath);
0351       return best;
0352     }
0353 
0354     if (statfs(fullpath, &sfs2) < 0) {
0355       int nerr = errno;
0356       edm::LogWarning("LocalFileSystem::findMount()") << "Cannot statfs('" << fullpath << "' alias '" << path
0357                                                       << "'): " << strerror(nerr) << " (error " << nerr << ")";
0358       free(fullpath);
0359       return best;
0360     }
0361 
0362     prev_paths.push_back(path);
0363     LocalFileSystem::FSInfo *new_best = findMount(fullpath, &sfs2, &s2, prev_paths);
0364     return new_best ? new_best : best;
0365   }
0366 
0367   return best;
0368 }
0369 
0370 /** Determine if @a path is on a file system known to be local.
0371 
0372     Returns @c true if the path is definitely known to be local.
0373     Returns @c false otherwise, including when it's not possible to
0374     determine anything about the path at all.
0375 
0376     Does not throw exceptions.  If any errors occur, the errors are
0377     reported as message logger warnings but the actual error is
0378     swallowed and the function simply returns @c false. */
0379 bool LocalFileSystem::isLocalPath(const std::string &path) const {
0380   struct stat s;
0381   struct statfs sfs;
0382   char *fullpath = realpath(path.c_str(), nullptr);
0383 
0384   if (!fullpath)
0385     fullpath = strdup(path.c_str());
0386 
0387   if (lstat(fullpath, &s) < 0) {
0388     int nerr = errno;
0389     edm::LogWarning("LocalFileSystem::isLocalPath()")
0390         << "Cannot lstat('" << fullpath << "' alias '" << path << "'): " << strerror(nerr) << " (error " << nerr << ")";
0391     free(fullpath);
0392     return false;
0393   }
0394 
0395   if (statfs(fullpath, &sfs) < 0) {
0396     int nerr = errno;
0397     edm::LogWarning("LocalFileSystem::isLocalPath()") << "Cannot statfs('" << fullpath << "' alias '" << path
0398                                                       << "'): " << strerror(nerr) << " (error " << nerr << ")";
0399     free(fullpath);
0400     return false;
0401   }
0402 
0403   std::vector<std::string> prev_paths;
0404   FSInfo *m = findMount(fullpath, &sfs, &s, prev_paths);
0405   free(fullpath);
0406 
0407   return m ? m->local : false;
0408 }
0409 
0410 /** Find a writeable directory among @a paths which is known to be
0411     local and has at least @a minFreeSpace amount of free space in
0412     gigabytes.
0413 
0414     The @a paths should contain list of relative or absolute candidate
0415     directories.  If an entry starts with letter "$" then the value of
0416     that environment variable is used instead; if the value is $TMPDIR
0417     and the environment variable is empty, "/tmp" is used instead.
0418 
0419     Returns the first path in @a paths which satisfies the criteria,
0420     expanded to environment variable value if appropriate, resolved
0421     to full absolute path.  If no suitable path can be found, returns
0422     an empty string.
0423 
0424     Does not throw exceptions.  If any serious errors occur, the errors
0425     are reported as message logger warnings but the actual error is
0426     swallowed and the directory concerned is skipped.  Non-existent
0427     and inaccessible directories are silently ignored without warning. */
0428 std::pair<std::string, std::string> LocalFileSystem::findCachePath(const std::vector<std::string> &paths,
0429                                                                    double minFreeSpace) const {
0430   struct stat s;
0431   struct statfs sfs;
0432   std::ostringstream warningst;
0433   warningst << "Cannot use lazy-download because:\n";
0434 
0435   for (size_t i = 0, e = paths.size(); i < e; ++i) {
0436     char *fullpath;
0437     const char *inpath = paths[i].c_str();
0438     const char *path = inpath;
0439 
0440     if (*path == '$') {
0441       char *p = std::getenv(path + 1);
0442       if (p && *p)
0443         path = p;
0444       else if (!strcmp(path, "$TMPDIR"))
0445         path = "/tmp";
0446     }
0447 
0448     if (!(fullpath = realpath(path, nullptr)))
0449       fullpath = strdup(path);
0450 
0451 #if 0
0452     std::cerr /* edm::LogInfo("LocalFileSystem") */
0453       << "Checking if '" << fullpath << "', from '"
0454       << inpath << "' is valid cache path with "
0455       << minFreeSpace << " free space" << std::endl;
0456 #endif
0457 
0458     if (lstat(fullpath, &s) < 0) {
0459       int nerr = errno;
0460       if (nerr != ENOENT && nerr != EACCES)
0461         edm::LogWarning("LocalFileSystem::findCachePath()") << "Cannot lstat('" << fullpath << "', from '" << inpath
0462                                                             << "'): " << strerror(nerr) << " (error " << nerr << ")";
0463       free(fullpath);
0464       continue;
0465     }
0466 
0467     if (statfs(fullpath, &sfs) < 0) {
0468       int nerr = errno;
0469       edm::LogWarning("LocalFileSystem::findCachePath()") << "Cannot statfs('" << fullpath << "', from '" << inpath
0470                                                           << "'): " << strerror(nerr) << " (error " << nerr << ")";
0471       free(fullpath);
0472       continue;
0473     }
0474 
0475     std::vector<std::string> prev_paths;
0476     FSInfo *m = findMount(fullpath, &sfs, &s, prev_paths);
0477 #if 0
0478     std::cerr /* edm::LogInfo("LocalFileSystem") */
0479       << "Candidate '" << fullpath << "': "
0480       << "found=" << (m ? 1 : 0)
0481       << " local=" << (m && m->local)
0482       << " free=" << (m ? m->freespc : 0)
0483       << " access=" << access(fullpath, W_OK)
0484       << std::endl;
0485 #endif
0486 
0487     if (m && m->local && m->freespc >= minFreeSpace && access(fullpath, W_OK) == 0) {
0488       std::string result(fullpath);
0489       free(fullpath);
0490       return std::make_pair(result, std::string());
0491     } else if (m) {
0492       if (!m->local) {
0493         warningst << "- The mount " << fullpath << " is not local.\n";
0494       } else if (m->freespc < minFreeSpace) {
0495         warningst << " - The mount at " << fullpath << " has only " << m->freespc << " GB free; a minumum of "
0496                   << minFreeSpace << " GB is required.\n";
0497       } else if (access(fullpath, W_OK)) {
0498         warningst << " - The process has no permission to write into " << fullpath << "\n";
0499       }
0500     }
0501 
0502     free(fullpath);
0503   }
0504 
0505   std::string warning_str = warningst.str();
0506   if (!warning_str.empty()) {
0507     warning_str = warning_str.substr(0, warning_str.size() - 2);
0508   }
0509 
0510   return std::make_pair(std::string(), std::move(warning_str));
0511 }
0512 
0513 /** Initialise local file system status.  */
0514 LocalFileSystem::LocalFileSystem() {
0515   if (readFSTypes() < 0)
0516     return;
0517 
0518   if (initFSList() < 0)
0519     return;
0520 }
0521 
0522 /** Free local file system status resources. */
0523 LocalFileSystem::~LocalFileSystem() {
0524   for (size_t i = 0, e = fs_.size(); i < e; ++i)
0525     free(fs_[i]);
0526 }