`

从文件名找到文件信息(namei)

阅读更多

本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: yfydz_no1@hotmail.com
来源:http://yfydz.cublog.cn

1. 前言

inode是类Unix系统的文件系统的基本索引方法,每个文件都对应一个inode,再通过inode找到文件中的实际数据,因此根据文件路径名找到具体的inode节点就是一个很重要的处理步骤。系统会缓存用过的每个文件或目录对应的dentry结构, 从该结构可以指向相应的inode, 每次打开文件, 都会最终对应到文件的inode,中间查找过程称为namei。

本文介绍Linux下的路径到文件指针的转换过程,内核版本为2.6.19.2。

虚拟文件系统的转换源代码在fs/namei.c中,具体和文件系统相关的部分在fs/*/namei.c文件中。

2. 引子

由于这种转换是一个中间过程,在具体分析namei处理前,先看看系统的调用顺序是如何进入转换的:
当用户空间程序用open系统调用打开一个文件时,内核对应的处理是sys_open:
/* fs/open.c */
asmlinkage long sys_open(const char __user *filename, int flags, int mode)
{
 long ret;
 if (force_o_largefile())
  flags |= O_LARGEFILE;
 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
 /* avoid REGPARM breakage on x86: */
 prevent_tail_call(ret);
 return ret;
}
真正的打开函数是do_sys_open:
/* fs/open.c */
// dfd为AT_FDCWD
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
// 通过该函数将用户空间的文件名传递到内核
// tmp是一个cache类的动态内存空间,用于保存文件路径名
//
 char *tmp = getname(filename);
 int fd = PTR_ERR(tmp);
 if (!IS_ERR(tmp)) {
// 获取一个未使用的文件描述符, 和inode无关
  fd = get_unused_fd();
  if (fd >= 0) {
// 打开文件,将文件名转换为文件结构
   struct file *f = do_filp_open(dfd, tmp, flags, mode);
   if (IS_ERR(f)) {
    put_unused_fd(fd);
    fd = PTR_ERR(f);
   } else {
    fsnotify_open(f->f_dentry);
    fd_install(fd, f);
   }
  }
  putname(tmp);
 }
 return fd;
}

// 文件打开
static struct file *do_filp_open(int dfd, const char *filename, int flags,
     int mode)
{
 int namei_flags, error;
// 注意这是结构而不是指针
 struct nameidata nd;
 namei_flags = flags;
 if ((namei_flags+1) & O_ACCMODE)
  namei_flags++;
// 根据文件名得到nameidata, nd作为namei空间保存结果
 error = open_namei(dfd, filename, namei_flags, mode, &nd);
 if (!error)
// 成功, nameidata再转换为file指针
  return nameidata_to_filp(&nd, flags);
 return ERR_PTR(error);
}

因此重点函数是open_namei函数, 实现了从文件名到inode的转换, 也是namei的处理入口.

在分析open_namei前, 再分析一下getname, 这用到了kmem_cache来处理的:
// 文件名转换, 从用户空间拷贝到内核空间
/* fs/namei.c */
char * getname(const char __user * filename)
{
 char *tmp, *result;
 result = ERR_PTR(-ENOMEM);
/* include/linux/fs.h */
// __getname和__putname的定义,实际就是内核cache的分配和释放
// #define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL)
// #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
// 这里实际是分配names的cache, 该cache定义为
//  names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
//   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 tmp = __getname();
 if (tmp)  {
// cache分配成功
// 进入实际操作函数
  int retval = do_getname(filename, tmp);
// 要返回结果指向cache
  result = tmp;
  if (retval < 0) {
// 操作失败,释放cache,返回错误
   __putname(tmp);
   result = ERR_PTR(retval);
  }
 }
// 编译内核时如果没有设置CONFIG_AUDITSYSCALL, 则audit_getname为空
// 审计系统调用结果
 audit_getname(result);
 return result;
}

static int do_getname(const char __user *filename, char *page)
{
 int retval;
 unsigned long len = PATH_MAX;
 if (!segment_eq(get_fs(), KERNEL_DS)) {
  if ((unsigned long) filename >= TASK_SIZE)
   return -EFAULT;
  if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
   len = TASK_SIZE - (unsigned long) filename;
 }
// 将用户空间提供的文件名拷贝到cache中
 retval = strncpy_from_user(page, filename, len);
 if (retval > 0) {
  if (retval < len)
   return 0;
  return -ENAMETOOLONG;
 } else if (!retval)
  retval = -ENOENT;
 return retval;
}
 
3. namei相关数据结构
/* include/linux/namei.h */
struct nameidata {
// 路径点
 struct dentry *dentry;
// 虚拟系统挂接点
 struct vfsmount *mnt;
// 路径名中的最后的文件名或目录名
 struct qstr last;
 unsigned int flags;
 int  last_type;
// 目录深度
 unsigned depth;
 char *saved_names[MAX_NESTED_LINKS + 1]; // 9
 /* Intent data */
// 相关数据
 union {
// 包含打开的文件的指针
  struct open_intent open;
 } intent;
};

struct open_intent {
// 标志
 int flags;
// 创建模式
 int create_mode;
// 文件指针
 struct file *file;
};

// 路径结构, 属于中间处理结构, 将文件系统挂接点和dentry捆绑在一起而已
struct path {
 struct vfsmount *mnt;
 struct dentry *dentry;
};

/* include/linux/dcache.h */
// 文件目录项, 在系统cache中
struct dentry {
 atomic_t d_count;
 unsigned int d_flags;  /* protected by d_lock */
 spinlock_t d_lock;  /* per dentry lock */
 struct inode *d_inode;  /* Where the name belongs to - NULL is
      * negative */
 /*
  * The next three fields are touched by __d_lookup.  Place them here
  * so they all fit in a cache line.
  */
 struct hlist_node d_hash; /* lookup hash list */
 struct dentry *d_parent; /* parent directory */
 struct qstr d_name;
 struct list_head d_lru;  /* LRU list */
 /*
  * d_child and d_rcu can share memory
  */
 union {
  struct list_head d_child; /* child of parent list */
   struct rcu_head d_rcu;
 } d_u;
 struct list_head d_subdirs; /* our children */
 struct list_head d_alias; /* inode alias list */
 unsigned long d_time;  /* used by d_revalidate */
 struct dentry_operations *d_op;
 struct super_block *d_sb; /* The root of the dentry tree */
 void *d_fsdata;   /* fs-specific data */
#ifdef CONFIG_PROFILING
 struct dcookie_struct *d_cookie; /* cookie, if any */
#endif
 int d_mounted;
 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};

/* include/linux/fs.h */
// 文件结构
struct file {
 /*
  * fu_list becomes invalid after file_free is called and queued via
  * fu_rcuhead for RCU freeing
  */
 union {
  struct list_head fu_list;
  struct rcu_head  fu_rcuhead;
 } f_u;
// 文件的dentry
 struct dentry  *f_dentry;
// 虚拟文件系统挂接点
 struct vfsmount         *f_vfsmnt;
// 文件操作
 const struct file_operations *f_op;
 atomic_t  f_count;
 unsigned int   f_flags;
 mode_t   f_mode;
 loff_t   f_pos;
 struct fown_struct f_owner;
 unsigned int  f_uid, f_gid;
 struct file_ra_state f_ra;
 unsigned long  f_version;
#ifdef CONFIG_SECURITY
 void   *f_security;
#endif
 /* needed for tty driver, and maybe others */
 void   *private_data;
#ifdef CONFIG_EPOLL
 /* Used by fs/eventpoll.c to link all the hooks to this file */
 struct list_head f_ep_links;
 spinlock_t  f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
 struct address_space *f_mapping;
};
 
4. namei操作

4.1 open_namei

/* fs/namei.c */
/*
 * open_namei()
 *
 * namei for open - this is in fact almost the whole open-routine.
 *
 * Note that the low bits of "flag" aren't the same as in the open
 * system call - they are 00 - no permissions needed
 *     01 - read permission needed
 *     10 - write permission needed
 *     11 - read/write permissions needed
 * which is a lot more logical, and also allows the "no perm" needed
 * for symlinks (where the permissions are checked later).
 * SMP-safe
 */
int open_namei(int dfd, const char *pathname, int flag,
  int mode, struct nameidata *nd)
{
 int acc_mode, error;
 struct path path;
 struct dentry *dir;
 int count = 0;
// #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
// 审计模式
 acc_mode = ACC_MODE(flag);
 /* O_TRUNC implies we need access checks for write permissions */
// 截断标志, 基本上需要写权限, 除非要截断的长度实际大于文件本身长度
 if (flag & O_TRUNC)
  acc_mode |= MAY_WRITE;
 /* Allow the LSM permission hook to distinguish append
    access from general write access. */
// 添加标志, 也是需要写权限
 if (flag & O_APPEND)
  acc_mode |= MAY_APPEND;
 /*
  * The simplest case - just a plain lookup.
  */
// 不需要创建文件
 if (!(flag & O_CREAT)) {
// 直接找pathname的dentry和挂接点, 结果填在nd中
  error = path_lookup_open(dfd, pathname, lookup_flags(flag),
      nd, flag);
  if (error)
   return error;
  goto ok;
 }
 /*
  * Create - we need to know the parent.
  */
// 创建文件的dentry和挂接点, 数据填到nd中
 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
 if (error)
  return error;
 /*
  * We have the parent and last component. First of all, check
  * that we are not asked to creat(2) an obvious directory - that
  * will not do.
  */
 error = -EISDIR;
// 检查nameidata结构中的last参数是否合法
 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
  goto exit;
// 文件项dentry
 dir = nd->dentry;
// 去掉查询父目录标志
 nd->flags &= ~LOOKUP_PARENT;
 mutex_lock(&dir->d_inode->i_mutex);
// 填充path参数, 又根据nd的信息搜索一次当前的缓存的dentry
// 不过dir与path.dentry难道不相同么?
 path.dentry = lookup_hash(nd);
 path.mnt = nd->mnt;
do_last:
// 检查path.entry是否合法
 error = PTR_ERR(path.dentry);
 if (IS_ERR(path.dentry)) {
  mutex_unlock(&dir->d_inode->i_mutex);
  goto exit;
 }
// 检查nd->intent.open.file是否合法, 这是最终要返回的文件指针
 if (IS_ERR(nd->intent.open.file)) {
  mutex_unlock(&dir->d_inode->i_mutex);
  error = PTR_ERR(nd->intent.open.file);
  goto exit_dput;
 }
 /* Negative dentry, just create the file */
 if (!path.dentry->d_inode) {
// 创建新文件的inode, 然后返回
  error = open_namei_create(nd, &path, flag, mode);
  if (error)
   goto exit;
  return 0;
 }
// 现在是打开已经存在的文件
 /*
  * It already exists.
  */
 mutex_unlock(&dir->d_inode->i_mutex);
 audit_inode_update(path.dentry->d_inode);
 error = -EEXIST;
// O_EXCL标志是只必须打开的是不存在的文件, 文件已存在时错误
 if (flag & O_EXCL)
  goto exit_dput;
 if (__follow_mount(&path)) {
  error = -ELOOP;
  if (flag & O_NOFOLLOW)
   goto exit_dput;
 }
 error = -ENOENT;
 if (!path.dentry->d_inode)
  goto exit_dput;
// 如果dentry的具体FS的实现中定义了follow_link操作, 转
// 不过大多数FS的实现中都没有定义该函数
 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
  goto do_link;
// 从路径中的dentry和mnt信息赋值到nameidata
 path_to_nameidata(&path, nd);
 error = -EISDIR;
// 如果是一个目录, 返回错误
 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
  goto exit;
ok:
// 对nd中的dentry及其inode进行打开前的错误检查
 error = may_open(nd, acc_mode, flag);
 if (error)
  goto exit;
 return 0;
// 下面是错误处理, 释放掉已分配的资源, 返回错误
exit_dput:
 dput_path(&path, nd);
exit:
 if (!IS_ERR(nd->intent.open.file))
  release_open_intent(nd);
 path_release(nd);
 return error;
// 处理符号连接, 找到实际文件的inode,然后重新循环, 要注意回环情况的错误处理
do_link:
 error = -ELOOP;
 if (flag & O_NOFOLLOW)
  goto exit_dput;
 /*
  * This is subtle. Instead of calling do_follow_link() we do the
  * thing by hands. The reason is that this way we have zero link_count
  * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
  * After that we have the parent and last component, i.e.
  * we are in the same situation as after the first path_walk().
  * Well, almost - if the last component is normal we get its copy
  * stored in nd->last.name and we will have to putname() it when we
  * are done. Procfs-like symlinks just set LAST_BIND.
  */
// 设置查找LOOKUP_PARENT标志
 nd->flags |= LOOKUP_PARENT;
 error = security_inode_follow_link(path.dentry, nd);
 if (error)
  goto exit_dput;
// 处理符号链接
 error = __do_follow_link(&path, nd);
 if (error) {
  /* Does someone understand code flow here? Or it is only
   * me so stupid? Anathema to whoever designed this non-sense
   * with "intent.open".
   */
  release_open_intent(nd);
  return error;
 }
 nd->flags &= ~LOOKUP_PARENT;
// 检查最后一段文件或目录名的属性情况
 if (nd->last_type == LAST_BIND)
  goto ok;
 error = -EISDIR;
 if (nd->last_type != LAST_NORM)
  goto exit;
 if (nd->last.name[nd->last.len]) {
  __putname(nd->last.name);
  goto exit;
 }
 error = -ELOOP;
// 出现回环标志: 循环超过32次
 if (count++==32) {
  __putname(nd->last.name);
  goto exit;
 }
 dir = nd->dentry;
 mutex_lock(&dir->d_inode->i_mutex);
// 更新路径的挂接点和dentry
 path.dentry = lookup_hash(nd);
 path.mnt = nd->mnt;
 __putname(nd->last.name);
 goto do_last;
}

4.2  path_lookup_open和path_lookup_create

这两个函数找到路径名对应的挂接点和dentry结构, 赋值到nameidata结构中, create时如果文件不存在, 建立新文件:
/**
 * path_lookup_open - lookup a file path with open intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 */
int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
  struct nameidata *nd, int open_flags)
{
 return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
   open_flags, 0);
}

/**
 * path_lookup_create - lookup a file path with open + create intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 * @create_mode: create intent flags
 */
static int path_lookup_create(int dfd, const char *name,
         unsigned int lookup_flags, struct nameidata *nd,
         int open_flags, int create_mode)
{
 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
   nd, open_flags, create_mode);
}

这两个函数都是调用__path_lookup_intent_open, 只是参数不同,create中加入了LOOKUP_CREATE标志和create_mode:

static int __path_lookup_intent_open(int dfd, const char *name,
  unsigned int lookup_flags, struct nameidata *nd,
  int open_flags, int create_mode)
{
// 找一个空闲的文件指针
 struct file *filp = get_empty_filp();
 int err;
// 找不到返回错误, 文件表溢出了
 if (filp == NULL)
  return -ENFILE;
// 在nameidate中填充打开的文件参数, 这是最终会返回的文件指针
 nd->intent.open.file = filp;
 nd->intent.open.flags = open_flags;
 nd->intent.open.create_mode = create_mode;
// 进行具体的路径查找, name是路径名
 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
// 先检查nd->intent.open.file而不是err
 if (IS_ERR(nd->intent.open.file)) {
// 打开的文件指针错误
  if (err == 0) {
// do_path_lookup已经成功了, 释放path, err重新设置为错误值
   err = PTR_ERR(nd->intent.open.file);
   path_release(nd);
  }
 } else if (err != 0)
  release_open_intent(nd);
 return err;
}

// 路径查找
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int fastcall do_path_lookup(int dfd, const char *name,
    unsigned int flags, struct nameidata *nd)
{
 int retval = 0;
 int fput_needed;
 struct file *file;
// 文件系统指针从进程中获取
 struct fs_struct *fs = current->fs;
// 缺省情况last_type为绝对路径, 以"/"开头的格式
 nd->last_type = LAST_ROOT; /* if there are only slashes... */
 nd->flags = flags;
 nd->depth = 0;
// 下面只是用于增加某些变量的使用计数值, get是增加,put是减少
 if (*name=='/') {
// 绝对路径格式
  read_lock(&fs->lock);
  if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
// 检查是否更改了root, 即用chroot
// 增加altrootmnt的使用计数, 其为一vfsmount结构指针
   nd->mnt = mntget(fs->altrootmnt);
   nd->dentry = dget(fs->altroot);
   read_unlock(&fs->lock);
   if (__emul_lookup_dentry(name,nd))
    goto out; /* found in altroot */
   read_lock(&fs->lock);
  }
// 增加rootmnt的使用计数然后赋值到nd中
  nd->mnt = mntget(fs->rootmnt);
// 增加根的dentry的使用计数然后赋值到nd中
  nd->dentry = dget(fs->root);
  read_unlock(&fs->lock);
 } else if (dfd == AT_FDCWD) {
// 从sys_open调用来的话会到这里, 表示从当前工作目录的路径开始的相对路径
  read_lock(&fs->lock);
// 增加pwdmnt使用计数然后赋值到nd中
  nd->mnt = mntget(fs->pwdmnt);
// 增加pwd使用计数然后赋值到nd中
  nd->dentry = dget(fs->pwd);
  read_unlock(&fs->lock);
 } else {
  struct dentry *dentry;
// 轻量级的路径查找, fd不是共享的话不会增加引用计数
  file = fget_light(dfd, &fput_needed);
  retval = -EBADF;
  if (!file)
   goto out_fail;
  dentry = file->f_dentry;
  retval = -ENOTDIR;
  if (!S_ISDIR(dentry->d_inode->i_mode))
   goto fput_fail;
// 检查文件的执行权限
  retval = file_permission(file, MAY_EXEC);
  if (retval)
   goto fput_fail;
// 增加f_vfsmnt的使用计数
  nd->mnt = mntget(file->f_vfsmnt);
  nd->dentry = dget(dentry);
// 轻量级释放
  fput_light(file, fput_needed);
 }
// 清空总链接数
 current->total_link_count = 0;
// 变量路径表查询, 核心函数
 retval = link_path_walk(name, nd);
out:
 if (likely(retval == 0)) {
// 在大部分情况下都会执行到这,能正确打开路径
  if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
    nd->dentry->d_inode))
  audit_inode(name, nd->dentry->d_inode);
 }
out_fail:
 return retval;
fput_fail:
 fput_light(file, fput_needed);
 goto out_fail;
}

do_path_lookup调用的核心函数是link_path_walk:

/*
 * Wrapper to retry pathname resolution whenever the underlying
 * file system returns an ESTALE.
 *
 * Retry the whole path once, forcing real lookup requests
 * instead of relying on the dcache.
 */
int fastcall link_path_walk(const char *name, struct nameidata *nd)
{
// 先备份一下
 struct nameidata save = *nd;
 int result;
 /* make sure the stuff we saved doesn't go away */
 dget(save.dentry);
 mntget(save.mnt);
 result = __link_path_walk(name, nd);
 if (result == -ESTALE) {
// ESTALE是失效的文件句柄错误
// 用备份的nameidate重新恢复, 设置LOOKUP_REVAL标志后重新查询
  *nd = save;
  dget(nd->dentry);
  mntget(nd->mnt);
  nd->flags |= LOOKUP_REVAL;
  result = __link_path_walk(name, nd);
 }
 dput(save.dentry);
 mntput(save.mnt);
 return result;
}

真正的名称解析函数__link_path_walk:
/*
 * Name resolution.
 * This is the basic name resolution function, turning a pathname into
 * the final dentry. We expect 'base' to be positive and a directory.
 *
 * Returns 0 and nd will have valid dentry and mnt on success.
 * Returns error and drops reference to input namei data on failure.
 */
static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
{
 struct path next;
 struct inode *inode;
 int err;
 unsigned int lookup_flags = nd->flags;
// 去掉起始多余的"/", 同时也说明系统可以允许你输入多个"/"而不报错
 while (*name=='/')
  name++;
// 空路径
 if (!*name)
  goto return_reval;
// 路径对应的inode
 inode = nd->dentry->d_inode;
 if (nd->depth)
  lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
 /* At this point we know we have a real path component. */
 for(;;) {
// 循环处理,每个循环提取文件路径的一个目录名, '/'分隔
  unsigned long hash;
  struct qstr this;
  unsigned int c;
  nd->flags |= LOOKUP_CONTINUE;
// 检查文件权限, 包括读写执行权限, 用户/组/其他权限, 返回0为合法
  err = exec_permission_lite(inode, nd);
  if (err == -EAGAIN)
// EAGAIN表示该inode正在被操作, 检查其执行权限
// 而对于普通文件检查结果将是错误
   err = vfs_permission(nd, MAY_EXEC);
// 出错中断循环
   if (err)
   break;
// 填充quickstring结构
  this.name = name;
// name的第一个字符的数值
  c = *(const unsigned char *)name;
// 计算文件名的hash, 不包括'/'
  hash = init_name_hash();
  do {
   name++;
   hash = partial_name_hash(c, hash);
   c = *(const unsigned char *)name;
  } while (c && (c != '/'));
// 目录(如果有的话)的名称长度
  this.len = name - (const char *) this.name;
// hash
  this.hash = end_name_hash(hash);
  /* remove trailing slashes? */
// c为0表示是最后的具体文件名了
  if (!c)
   goto last_component;
// 跳过中间的'/'
  while (*++name == '/');
// 到名称尾, 说明文件名最后一个字符是'/'
  if (!*name)
   goto last_with_slashes;
  /*
   * "." and ".." are special - ".." especially so because it has
   * to be able to know about the current root directory and
   * parent relationships.
   */
// 如果第一个字符是'.'
  if (this.name[0] == '.') switch (this.len) {
   default:
// 是一个一'.'开头的文件或目录名称
    break;
   case 2: 
// 第2 个字符不是".", 是普通文件或路径名
    if (this.name[1] != '.')
     break;
// 以".."开头, 是父目录, 更新nd为父目录nameidata数据, inode相应更新重新循环
    follow_dotdot(nd);
    inode = nd->dentry->d_inode;
    /* fallthrough */
   case 1:
// 以'.'开头的当前目录, 忽略, 重新循环
    continue;
  }
  /*
   * See if the low-level filesystem might want
   * to use its own hash..
   */
// 底层FS实现中有自己的HASH算法
  if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
   err = nd->dentry->d_op->d_hash(nd->dentry, &this);
   if (err < 0)
    break;
  }
  /* This does the actual lookups.. */
// 根据文件/目录名进行具体的查找
  err = do_lookup(nd, &this, &next);
  if (err)
   break;
  err = -ENOENT;
// inode更新为本级文件目录的inode
  inode = next.dentry->d_inode;
// 找不到inode, 转错误处理
  if (!inode)
   goto out_dput;
  err = -ENOTDIR;
  if (!inode->i_op)
   goto out_dput;
  if (inode->i_op->follow_link) {
// 处理符号链接, 在其中考虑了递归互相链接的异常处理
   err = do_follow_link(&next, nd);
   if (err)
    goto return_err;
   err = -ENOENT;
// 更新inode为实际的inode
   inode = nd->dentry->d_inode;
   if (!inode)
    break;
   err = -ENOTDIR;
   if (!inode->i_op)
    break;
  } else
// nd中得到下一级路径信息
   path_to_nameidata(&next, nd);
  err = -ENOTDIR;
  if (!inode->i_op->lookup)
   break;
// 继续循环找下一目录文件名称
  continue;
  /* here ends the main loop */
// 最后的文件名了, 处理和前面类似
last_with_slashes:
// 最后一个字符是'/', 是一个目录
  lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
  /* Clear LOOKUP_CONTINUE iff it was previously unset */
  nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
  if (lookup_flags & LOOKUP_PARENT)
   goto lookup_parent;
  if (this.name[0] == '.') switch (this.len) {
   default:
    break;
   case 2: 
// 文件名不是"..", 继续
    if (this.name[1] != '.')
     break;
// 文件名是"..", 到父目录
    follow_dotdot(nd);
    inode = nd->dentry->d_inode;
    /* fallthrough */
   case 1:
// 文件名就是".", 跳到返回处理
    goto return_reval;
  }
// 一般文件处理
// 底层FS实现中有自己的HASH算法
  if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
   err = nd->dentry->d_op->d_hash(nd->dentry, &this);
   if (err < 0)
    break;
  }
// 查找最后的文件名
  err = do_lookup(nd, &this, &next);
  if (err)
   break;
  inode = next.dentry->d_inode;
  if ((lookup_flags & LOOKUP_FOLLOW)
      && inode && inode->i_op && inode->i_op->follow_link) {
   err = do_follow_link(&next, nd);
   if (err)
    goto return_err;
   inode = nd->dentry->d_inode;
  } else
// 更新nameidata中的mnt, dentry值
   path_to_nameidata(&next, nd);
  err = -ENOENT;
  if (!inode)
   break;
  if (lookup_flags & LOOKUP_DIRECTORY) {
   err = -ENOTDIR;
   if (!inode->i_op || !inode->i_op->lookup)
    break;
  }
  goto return_base;
lookup_parent:
// 复制当前quickstring结构this信息到nd的last中
// 类型为LAST_NORM
  nd->last = this;
  nd->last_type = LAST_NORM;
  if (this.name[0] != '.')
   goto return_base;
  if (this.len == 1)
   nd->last_type = LAST_DOT;
  else if (this.len == 2 && this.name[1] == '.')
   nd->last_type = LAST_DOTDOT;
  else
   goto return_base;
return_reval:
// 返回
  /*
   * We bypassed the ordinary revalidation routines.
   * We may need to check the cached dentry for staleness.
   */
  if (nd->dentry && nd->dentry->d_sb &&
      (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
   err = -ESTALE;
   /* Note: we do not d_invalidate() */
   if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
    break;
  }
return_base:
  return 0;
out_dput:
  dput_path(&next, nd);
  break;
 }
// 到这里属于出错了
 path_release(nd);
return_err:
 return err;
}
 
/*
 *  It's more convoluted than I'd like it to be, but... it's still fairly
 *  small and for now I'd prefer to have fast path as straight as possible.
 *  It _is_ time-critical.
 */
static int do_lookup(struct nameidata *nd, struct qstr *name,
       struct path *path)
{
 struct vfsmount *mnt = nd->mnt;
// 从系统缓存的dentry的hash表中查找父dentry是nd->dentry, 名称为name的dentry
 struct dentry *dentry = __d_lookup(nd->dentry, name);
// 没找到dentry, 进行真正从存储硬盘中查找
 if (!dentry)
  goto need_lookup;
// 需要进行revalidate操作时先进行validate操作
 if (dentry->d_op && dentry->d_op->d_revalidate)
  goto need_revalidate;
done:
// 找到, 填充path参数: 挂接点mnt和目录项dentry
 path->mnt = mnt;
 path->dentry = dentry;
 __follow_mount(path);
 return 0;
need_lookup:
// 进行真正的查找, 不过read_lookup会重新调用__d_lookup, 找不到才调用底层的fs实现去查找
// 好象是重复操作了
// real_lookup中的操作才反映了各个fs底层和相关标志的区别处理
 dentry = real_lookup(nd->dentry, name, nd);
 if (IS_ERR(dentry))
  goto fail;
 goto done;
need_revalidate:
// 进行validate操作
 dentry = do_revalidate(dentry, nd);
 if (!dentry)
  goto need_lookup;
 if (IS_ERR(dentry))
  goto fail;
 goto done;
fail:
 return PTR_ERR(dentry);
}

/*
 * This is called when everything else fails, and we actually have
 * to go to the low-level filesystem to find out what we should do..
 *
 * We get the directory semaphore, and after getting that we also
 * make sure that nobody added the entry to the dcache in the meantime..
 * SMP-safe
 */
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
{
 struct dentry * result;
 struct inode *dir = parent->d_inode;
 mutex_lock(&dir->i_mutex);
 /*
  * First re-do the cached lookup just in case it was created
  * while we waited for the directory semaphore..
  *
  * FIXME! This could use version numbering or similar to
  * avoid unnecessary cache lookups.
  *
  * The "dcache_lock" is purely to protect the RCU list walker
  * from concurrent renames at this point (we mustn't get false
  * negatives from the RCU list walk here, unlike the optimistic
  * fast walk).
  *
  * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
  */
// 查找缓存中的dentry项
 result = d_lookup(parent, name);
 if (!result) {
// 没找到, 新建dentry项
  struct dentry * dentry = d_alloc(parent, name);
  result = ERR_PTR(-ENOMEM);
  if (dentry) {
// 调用inode的查找操作, 这是和具体文件系统相关
   result = dir->i_op->lookup(dir, dentry, nd);
   if (result)
// 失败, 释放dentry
    dput(dentry);
   else
// 成功, 找到的dentry作为结果返回
    result = dentry;
  }
  mutex_unlock(&dir->i_mutex);
  return result;
 }
 /*
  * Uhhuh! Nasty case: the cache was re-populated while
  * we waited on the semaphore. Need to revalidate.
  */
// 在缓存中找到dentry项, 进行validate操作
 mutex_unlock(&dir->i_mutex);
 if (result->d_op && result->d_op->d_revalidate) {
  result = do_revalidate(result, nd);
  if (!result)
   result = ERR_PTR(-ENOENT);
 }
 return result;
}

小结一下函数调用顺序:
path_lookup_open    path_lookup_create
     |                     |
     V                     V
   __path_lookup_intent_open
               |
               V
        do_path_lookup
               |
               V
        link_path_walk
               |
               V
      __link_path_walk
               |
               V
           do_lookup
               |
               V
          real_lookup

这些函数操作都属于虚拟文件系统操作, 对所有类型的文件系统都适用, 而从各个FS的具体实现才能看出差异和相关标志的作用.

4.3 open_namei_create

static int open_namei_create(struct nameidata *nd, struct path *path,
    int flag, int mode)
{
 int error;
// nd当前的dentry
 struct dentry *dir = nd->dentry;
 if (!IS_POSIXACL(dir->d_inode))
  mode &= ~current->fs->umask;
 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
 mutex_unlock(&dir->d_inode->i_mutex);
 dput(nd->dentry);
 nd->dentry = path->dentry;
 if (error)
  return error;
 /* Don't check for write permission, don't truncate */
 return may_open(nd, 0, flag & ~O_TRUNC);
}

4.4 path_to_nameidata

// 将路径参数赋值到nameidata结构中
static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
{
// 释放原来的目录项
 dput(nd->dentry);
// 如果挂接点也不同,释放掉原来的
 if (nd->mnt != path->mnt)
  mntput(nd->mnt);
// 将新路径参数赋值到nameidata结构中
 nd->mnt = path->mnt;
 nd->dentry = path->dentry;
}

5. 结论

打开文件时, 目的是要生成一个struct file的结构的指针, 该结构中有相关文件名的名称, dentry指针, 挂接点文件系统等信息, 而struct nameidata作为一个中间过程结构保存相关的处理结果, 最终返回需要的文件信息。
分享到:
评论

相关推荐

    namei-开源

    Namei将遵循路径名,直到找到终点为止(例如,文件,目录,字符设备等)。 如果namei找到一个符号链接,它将显示该链接并开始跟随它,使输出缩进以显示上下文。

    namei.rar_logic

    Complete rewrite of the pathname lookup logic.

    namei.rar_V2

    QNX6 file system, Linux implementation for Linux v2.13.6.

    namei.rar_The Other

    Like most other Unix systems, set the ctime for inodes on a rename.

    namei_vfat.rar_Always

    This is not negative dentry. Always valid.

    namei_vfat.rar_We See

    This may be nfsd (or something), anyway, we can t see the intent of this. So, since this can be for creation, drop it.

    namei.rar_Linux/Unix编程_Unix_Linux_

    Squashfs - a compressed read only filesystem for Linux

    lsi:遵循文件路径的元素

    它从克隆了与namei实用程序相同的功能,该手册的手册页也准确地描述了lsi : lsi将其参数解释为任何类型的Unix文件(符号链接,文件,目录等)的路径名。 然后, lsi跟随每个路径名,直到找到端点(文件,目录,...

    莱昂氏UNIX源代码分析(全面剖析unix)PDF

    19.4 namei(7518) 324 19.5 一些注释 325 19.6 link(5909) 326 19.7 wdir(7477) 327 19.8 maknode(7455) 327 19.9 unlink(3510) 327 19.10 mknod(5952) 327 19.11 access(6746) 328 第20章 文件系统 329 20.1 超级块...

    莱昂氏UNIX源代码分析

    19.4 namei(7518) 324 19.5 一些注释 325 19.6 link(5909) 326 19.7 wdir(7477) 327 19.8 maknode(7455) 327 19.9 unlink(3510) 327 19.10 mknod(5952) 327 19.11 access(6746) 328 第20章 文件系统 329 20.1 超级块...

    求学生成绩程序c++版

    // char namei[20]; float score1,score2,score3; student aSA[size]; //执行缺省构造函数 for(int i=0;i;i++) { char *namei=new char[20]; //动态申请空间 cout; cout;cin&gt;&gt;numberi; cout;cin&gt;&gt;...

    C_C++软件工程师就业求职手册

    C_C++软件工程师就业求职手册 电子版书,有需要的可以下来看看。

    linux内核 0.11版本源码 带中文注释

    │ namei.c │ open.c │ pipe.c │ read_write.c │ stat.c │ super.c │ truncate.c │ ├─include │ │ a.out.h │ │ const.h │ │ ctype.h │ │ errno.h │ │ fcntl.h │ │ signal.h │ │ stdarg.h │ ...

    带中文注释的 linux 0.11 源代码

    .....\..\namei.c .....\..\open.c .....\..\pipe.c .....\..\read_write.c .....\..\stat.c .....\..\super.c .....\..\truncate.c .....\include .....\.......\a.out.h .....\.......\asm .....\........

    util-linux-ng-2.17源码(含fdisk)

    util-linux-ng-2.17.2/misc-utils/namei.c [code] util-linux-ng-2.17.2/misc-utils/procs.c [code] util-linux-ng-2.17.2/misc-utils/rename.c [code] util-linux-ng-2.17.2/misc-utils/script.c [code] util-...

Global site tag (gtag.js) - Google Analytics