@Overridepublicintrun(String argv[])throws Exception {// 初始化FsShll,包括注册命令类init();int exitCode =-1;if(argv.length <1){printUsage(System.err);//打印使用方法}else{
String cmd = argv[0];//取到第一个参数,即 ls
Command instance = null;try{
instance = commandFactory.getInstance(cmd);//实例化Command,即new Ls()if(instance == null){thrownewUnknownCommandException();}//调用LS的父类Command.run
exitCode = instance.run(Arrays.copyOfRange(argv,1, argv.length));}catch(IllegalArgumentException e){displayError(cmd, e.getLocalizedMessage());if(instance != null){printInstanceUsage(System.err, instance);}}catch(Exception e){// instance.run catches IOE, so something is REALLY wrong if here
LOG.debug("Error", e);displayError(cmd,"Fatal internal error");
e.printStackTrace(System.err);}}return exitCode;}
(1)重点分析一:init()如下:
protectedvoidinit()throws IOException {getConf().setQuietMode(true);//开启安静模式if(commandFactory == null){//实例话CommandFactory
commandFactory =newCommandFactory(getConf());//为commandFactory注册命令-对象:help,usage (实例化了commandClass,放入commandFactory的objectMap中)
commandFactory.addObject(newHelp(),"-help");
commandFactory.addObject(newUsage(),"-usage");// 注册命令-class,调用registerCommands方法(未实例化commandClass,延迟实例化,在真正使用的时候在通过反射进行实例化,放入commandFactory的classMap中)registerCommands(commandFactory);}}protectedvoidregisterCommands(CommandFactory factory){// TODO: DFSAdmin subclasses FsShell so need to protect the command// registration. This class should morph into a base class for// commands, and then this method can be abstractif(this.getClass().equals(FsShell.class)){// 调用CommandFactory类的registerCommands方法// 注意,这里传的参数是类FsCommand
factory.registerCommands(FsCommand.class);}}
protectedvoidprocessRawArguments(LinkedList<String> args)throws IOException {processArguments(expandArguments(args));}*************分两部分:首先分析expandArguments(args)********************//将参数由list<string>转换为list<pathData>protected LinkedList<PathData>expandArguments(LinkedList<String> args)throws IOException {
LinkedList<PathData> expandedArgs =newLinkedList<PathData>();for(String arg : args){try{
expandedArgs.addAll(expandArgument(arg));}catch(IOException e){// other exceptions are probably nastydisplayError(e);}}return expandedArgs;}//将单个arg转化为PathData列表,为何单个会转化为列表?这是因为arg支持通配符匹配(这也是Glob单词的含义-通配)protected List<PathData>expandArgument(String arg)throws IOException {
PathData[] items = PathData.expandAsGlob(arg,getConf());if(items.length ==0){// it's a glob that failed to matchthrownewPathNotFoundException(arg);}return Arrays.asList(items);}/**
*重点来看expandAsGlob方法,它的流程如下:
*1)将string转化为Path
*2)通过Path实例化FileSystem
*3)通过FileSystem获取到Path的FileStatus列表
*4)将FileStatus列表包装为PathData[]
FileStatus类封装了文件系统中文件和目录的元数据,包括文件长度、块大小、复本数、修改时间、访问时间、所有者、权限信息。
**/publicstatic PathData[]expandAsGlob(String pattern, Configuration conf)throws IOException {
Path globPath =newPath(pattern);//根据path uri的sechme头结合conf配置来创建出FileSystem实例,具体分析见附录1
FileSystem fs = globPath.getFileSystem(conf);//获取通配path的FileStatus,具体分析见附录2
FileStatus[] stats = fs.globStatus(globPath);
PathData[] items = null;//如果文件系统中没有匹配到这样的path,因此就构造一个空的PathData//这里需要注意一下:对于non-existent paths不会抛出exception而是构造一个空的PathData,是因为touch、mkdir等命令是需要创建path的。if(stats == null){// remove any quoting in the glob pattern
pattern = pattern.replaceAll("\\\\(.)","$1");// not a glob & file not found, so add the path with a null stat
items =newPathData[]{newPathData(fs, pattern, null)};}else{// figure out what type of glob path was given, will convert globbed// paths to match the type to preserve relativity
PathType globType;
URI globUri = globPath.toUri();if(globUri.getScheme()!= null){
globType = PathType.HAS_SCHEME;}elseif(!globUri.getPath().isEmpty()&&newPath(globUri.getPath()).isAbsolute()){
globType = PathType.SCHEMELESS_ABSOLUTE;}else{
globType = PathType.RELATIVE;}// convert stats to PathData// 将FileStatus列表包装为PathData[],并按path从小到大排序
items =newPathData[stats.length];int i=0;for(FileStatus stat : stats){
URI matchUri = stat.getPath().toUri();
String globMatch = null;switch(globType){case HAS_SCHEME:// use as-is, but remove authority if necessaryif(globUri.getAuthority()== null){
matchUri =removeAuthority(matchUri);}
globMatch =uriToString(matchUri,false);break;case SCHEMELESS_ABSOLUTE:// take just the uri's path
globMatch = matchUri.getPath();break;case RELATIVE:// make it relative to the current working dir
URI cwdUri = fs.getWorkingDirectory().toUri();
globMatch =relativize(cwdUri, matchUri, stat.isDirectory());break;}
items[i++]=newPathData(fs, globMatch, stat);}}
Arrays.sort(items);//按path从小到大排序,见PathData的compareTo方法return items;}
接着重点看看fs.globStatus(globPath);,这是fs真正获取fileStatus的过程,如下:
父类FileSystem.globStatus
public FileStatus[]globStatus(Path pathPattern)throws IOException {returnnewGlobber(this, pathPattern, DEFAULT_FILTER).glob();}
真正的获取FileStatus在glob()中:
public FileStatus[]glob()throws IOException {// First we get the scheme and authority of the pattern that was passed// in.
String scheme =schemeFromPath(pathPattern);
String authority =authorityFromPath(pathPattern);// Next we strip off everything except the pathname itself, and expand all// globs. Expansion is a process which turns "grouping" clauses,// expressed as brackets, into separate path patterns.
String pathPatternString = pathPattern.toUri().getPath();
List<String> flattenedPatterns = GlobExpander.expand(pathPatternString);// Now loop over all flattened patterns. In every case, we'll be trying to// match them to entries in the filesystem.
ArrayList<FileStatus> results =newArrayList<FileStatus>(flattenedPatterns.size());boolean sawWildcard =false;for(String flatPattern : flattenedPatterns){// Get the absolute path for this flattened pattern. We couldn't do // this prior to flattening because of patterns like {/,a}, where which// path you go down influences how the path must be made absolute.
Path absPattern =fixRelativePart(newPath(
flatPattern.isEmpty() Path.CUR_DIR : flatPattern));// Now we break the flattened, absolute pattern into path components.// For example, /a/*/c would be broken into the list [a, *, c]
List<String> components =getPathComponents(absPattern.toUri().getPath());// Starting out at the root of the filesystem, we try to match// filesystem entries against pattern components.
ArrayList<FileStatus> candidates =newArrayList<FileStatus>(1);// To get the "real" FileStatus of root, we'd have to do an expensive// RPC to the NameNode. So we create a placeholder FileStatus which has// the correct path, but defaults for the rest of the information.// Later, if it turns out we actually want the FileStatus of root, we'll// replace the placeholder with a real FileStatus obtained from the// NameNode.
FileStatus rootPlaceholder;if(Path.WINDOWS &&!components.isEmpty()&& Path.isWindowsAbsolutePath(absPattern.toUri().getPath(),true)){// On Windows the path could begin with a drive letter, e.g. /E:/foo.// We will skip matching the drive letter and start from listing the// root of the filesystem on that drive.
String driveLetter = components.remove(0);
rootPlaceholder =newFileStatus(0,true,0,0,0,newPath(scheme,
authority, Path.SEPARATOR + driveLetter + Path.SEPARATOR));}else{
rootPlaceholder =newFileStatus(0,true,0,0,0,newPath(scheme, authority, Path.SEPARATOR));}
candidates.add(rootPlaceholder);for(int componentIdx =0; componentIdx < components.size();
componentIdx++){
ArrayList<FileStatus> newCandidates =newArrayList<FileStatus>(candidates.size());
GlobFilter globFilter =newGlobFilter(components.get(componentIdx));
String component =unescapePathComponent(components.get(componentIdx));if(globFilter.hasPattern()){
sawWildcard =true;}if(candidates.isEmpty()&& sawWildcard){// Optimization: if there are no more candidates left, stop examining // the path components. We can only do this if we've already seen// a wildcard component-- otherwise, we still need to visit all path // components in case one of them is a wildcard.break;}if((componentIdx < components.size()-1)&&(!globFilter.hasPattern())){// Optimization: if this is not the terminal path component, and we // are not matching against a glob, assume that it exists. If it // doesn't exist, we'll find out later when resolving a later glob// or the terminal path component.for(FileStatus candidate : candidates){
candidate.setPath(newPath(candidate.getPath(), component));}continue;}for(FileStatus candidate : candidates){if(globFilter.hasPattern()){//在这里了
FileStatus[] children =listStatus(candidate.getPath());if(children.length ==1){// If we get back only one result, this could be either a listing// of a directory with one entry, or it could reflect the fact// that what we listed resolved to a file.//// Unfortunately, we can't just compare the returned paths to// figure this out. Consider the case where you have /a/b, where// b is a symlink to "..". In that case, listing /a/b will give// back "/a/b" again. If we just went by returned pathname, we'd// incorrectly conclude that /a/b was a file and should not match// /a/*/*. So we use getFileStatus of the path we just listed to// disambiguate.if(!getFileStatus(candidate.getPath()).isDirectory()){continue;}}for(FileStatus child : children){if(componentIdx < components.size()-1){// Don't try to recurse into non-directories. See HADOOP-10957.if(!child.isDirectory())continue;}// Set the child path based on the parent path.
child.setPath(newPath(candidate.getPath(),
child.getPath().getName()));if(globFilter.accept(child.getPath())){
newCandidates.add(child);}}}else{// When dealing with non-glob components, use getFileStatus // instead of listStatus. This is an optimization, but it also// is necessary for correctness in HDFS, since there are some// special HDFS directories like .reserved and .snapshot that are// not visible to listStatus, but which do exist. (See HADOOP-9877)//在这里了
FileStatus childStatus =getFileStatus(newPath(candidate.getPath(), component));if(childStatus != null){
newCandidates.add(childStatus);}}}
candidates = newCandidates;}for(FileStatus status : candidates){// Use object equality to see if this status is the root placeholder.// See the explanation for rootPlaceholder above for more information.if(status == rootPlaceholder){
status =getFileStatus(rootPlaceholder.getPath());if(status == null)continue;}// HADOOP-3497 semantics: the user-defined filter is applied at the// end, once the full path is built up.if(filter.accept(status.getPath())){
results.add(status);}}}/*
* When the input pattern "looks" like just a simple filename, and we
* can't find it, we return null rather than an empty array.
* This is a special case which the shell relies on.
*
* To be more precise: if there were no results, AND there were no
* groupings (aka brackets), and no wildcards in the input (aka stars),
* we return null.
*/if((!sawWildcard)&& results.isEmpty()&&(flattenedPatterns.size()<=1)){return null;}return results.toArray(newFileStatus[0]);}
调用到了Globber的getFileStatus和listStatus方法:private FileStatus getFileStatus(Path path)throws IOException {try{if(fs != null){return fs.getFileStatus(path);}else{return fc.getFileStatus(path);}}catch(FileNotFoundException e){return null;}}private FileStatus[]listStatus(Path path)throws IOException {try{if(fs != null){return fs.listStatus(path);}else{return fc.util().listStatus(path);}}catch(FileNotFoundException e){returnnewFileStatus[0];}}
至此,终于看到fs.listStatus了。即后面就是调用实际的FileSystem实现的getFileStatus和listStatus方法了,就不在此分析了。
*************分两部分:其次分析processArguments(expandArguments(args));********************protectedvoidprocessArguments(LinkedList<PathData> args)throws IOException {for(PathData arg : args){try{processArgument(arg);}catch(IOException e){displayError(e);}}}protectedvoidprocessArgument(PathData item)throws IOException {if(item.exists){processPathArgument(item);}else{processNonexistentPath(item);}}//存在则会进行格式化处理,见下面protectedvoidprocessPathArgument(PathData item)throws IOException {// null indicates that the call is not via recursion, ie. there is// no parent directory that was expanded
depth =0;processPaths(null, item);}//不存在的处理是直接抛错给用户提示信息protectedvoidprocessNonexistentPath(PathData item)throws IOException {thrownewPathNotFoundException(item.toString());}