Hadoop源码分析之DNS

Hadoop的典型运行场景在集群(cluster)上,其中每台机器都称为节点(node),节点之间连成一个局域网。
集群内的节点之间可以通过IP地址通讯,也可以通过节点的域名即URL通信,这就需要有DNS的帮助。这意味着,在网络可以通达的某处存在着DNS服务,因而可以根据对方的URL,查找到其IP地址。着也意味着集群内的这些节点都应有个域名,并且登记在DNS中。
一般涉及到DNS的操作都在操作系统或底层的库程序中,对于应用层是透明的。比方说我们通过HTTP访问网站就只需要提供其域名,而HTTP驱动层自然会与DNS服务器交互以获得目标网站的IP地址。但是Hadoop并不甘于DNS对其保持透明,因为它的有些操作需要知道具体节点的IP地址,因此Hadoop定义了一个名为DNS的类。
源码如下:

package org.apache.hadoop.net; //1.如果标注的是Public,说明被注解的类型对多有工程和应用可用。 //2.如果标注的是LimitedPrivate,说明被注解的类型只能用于某些特定的工程或应用,如Common,HDFS,MapReduce,ZooKeeper,HBase等。 //3.如果标注的是Private,说明被注解的类型只能用于Hadoop。 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})//1.如果标注的是Stable,说明主版本是稳定的,不同主版本之间可能不兼容。 //2.如果标注的是Evolving,说明是不停在变化的,不同小版本之间也可能不兼容。 //3.如果标注的是Unstable,说明稳定性没有任何保证。 @InterfaceStability.Unstable public class DNS {private static final Log LOG = LogFactory.getLog(DNS.class); /** * The cached hostname -initially null. */private static final String cachedHostname = resolveLocalHostname(); private static final String cachedHostAddress = resolveLocalHostIPAddress(); private static final String LOCALHOST = "localhost"; /** * Returns the hostname associated with the specified IP address by the * provided nameserver. * * Loopback addresses * @param hostIp The address to reverse lookup * @param ns The host name of a reachable DNS server * @return The host name associated with the provided IP * @throws NamingException If a NamingException is encountered */ //这是逆向查询,从IP地址查找其域名,参数ns是DNS服务器的地址 public static String reverseDns(InetAddress hostIp, String ns) throws NamingException { // // Builds the reverse IP lookup form // This is formed by reversing the IP numbers and appending in-addr.arpa // //倒排IP地址的4个字段,例如“192.188.0.1”就变成“1.0.188.192.in-addr.arpa” String[] parts = hostIp.getHostAddress().split("\\."); String reverseIP = parts[3] + "." + parts[2] + "." + parts[1] + "." + parts[0] + ".in-addr.arpa"; //形成一个DNS查询语句 DirContext ictx = new InitialDirContext(); Attributes attribute; try { attribute = ictx.getAttributes("dns://"// Use "dns:///" if the default + ((ns == null) ? "" : ns) + // nameserver is to be used "/" + reverseIP, new String[] { "PTR" }); } finally { ictx.close(); }String hostname = attribute.get("PTR").get().toString(); int hostnameLength = hostname.length(); if (hostname.charAt(hostnameLength - 1) == '.') { hostname = hostname.substring(0, hostnameLength - 1); } return hostname; }/** * @return NetworkInterface for the given subinterface name (eg eth0:0) *or null if no interface with the given name can be found */ private static NetworkInterface getSubinterface(String strInterface) throws SocketException { Enumeration nifs = NetworkInterface.getNetworkInterfaces(); while (nifs.hasMoreElements()) { Enumeration subNifs = nifs.nextElement().getSubInterfaces(); while (subNifs.hasMoreElements()) { NetworkInterface nif = subNifs.nextElement(); if (nif.getName().equals(strInterface)) { return nif; } } } return null; }/** * @param nif network interface to get addresses for * @return set containing addresses for each subinterface of nif, *see below for the rationale for using an ordered set */ private static LinkedHashSet getSubinterfaceInetAddrs( NetworkInterface nif) { LinkedHashSet addrs = new LinkedHashSet(); Enumeration subNifs = nif.getSubInterfaces(); while (subNifs.hasMoreElements()) { NetworkInterface subNif = subNifs.nextElement(); addrs.addAll(Collections.list(subNif.getInetAddresses())); } return addrs; }/** * Like {@link DNS#getIPs(String, boolean), but returns all * IPs associated with the given interface and its subinterfaces. */ public static String[] getIPs(String strInterface) throws UnknownHostException { return getIPs(strInterface, true); }/** * Returns all the IPs associated with the provided interface, if any, in * textual form. * * @param strInterface *The name of the network interface or sub-interface to query *(eg eth0 or eth0:0) or the string "default" * @param returnSubinterfaces *Whether to return IPs associated with subinterfaces of *the given interface * @return A string vector of all the IPs associated with the provided *interface. The local host IP is returned if the interface *name "default" is specified or there is an I/O error looking *for the given interface. * @throws UnknownHostException *If the given interface is invalid * */ //获取绑定在某个网口(例如eth0)上的所有IP地址 public static String[] getIPs(String strInterface, boolean returnSubinterfaces) throws UnknownHostException { if ("default".equals(strInterface)) { return new String[] { cachedHostAddress }; } NetworkInterface netIf; try { netIf = NetworkInterface.getByName(strInterface); if (netIf == null) { netIf = getSubinterface(strInterface); } } catch (SocketException e) { LOG.warn("I/O error finding interface " + strInterface + ": " + e.getMessage()); return new String[] { cachedHostAddress }; } if (netIf == null) { throw new UnknownHostException("No such interface " + strInterface); }// NB: Using a LinkedHashSet to preserve the order for callers // that depend on a particular element being 1st in the array. // For example, getDefaultIP always returns the first element. LinkedHashSet allAddrs = new LinkedHashSet(); allAddrs.addAll(Collections.list(netIf.getInetAddresses())); if (!returnSubinterfaces) { allAddrs.removeAll(getSubinterfaceInetAddrs(netIf)); }String ips[] = new String[allAddrs.size()]; int i = 0; for (InetAddress addr : allAddrs) { ips[i++] = addr.getHostAddress(); } return ips; }/** * Returns the first available IP address associated with the provided * network interface or the local host IP if "default" is given. * * @param strInterface *The name of the network interface or subinterface to query *(e.g. eth0 or eth0:0) or the string "default" * @return The IP address in text form, the local host IP is returned *if the interface name "default" is specified * @throws UnknownHostException *If the given interface is invalid */ //获取绑定在某个网口上的默认IP地址 public static String getDefaultIP(String strInterface) throws UnknownHostException { String[] ips = getIPs(strInterface); return ips[0]; }/** * Returns all the host names associated by the provided nameserver with the * address bound to the specified network interface * * @param strInterface *The name of the network interface or subinterface to query *(e.g. eth0 or eth0:0) * @param nameserver *The DNS host name * @return A string vector of all host names associated with the IPs tied to *the specified interface * @throws UnknownHostException if the given interface is invalid */ //获取绑定于网口strlnterface的所有域名,参数nameserver是DNS的服务器地址或IP地址,可以为null public static String[] getHosts(String strInterface, String nameserver) throws UnknownHostException { String[] ips = getIPs(strInterface); //获取该网口的所有IP地址 Vector hosts = new Vector(); for (int ctr = 0; ctr < ips.length; ctr++) {//逐一查询绑定于这些地址的域名 try { hosts.add(reverseDns(InetAddress.getByName(ips[ctr]), nameserver)); } catch (UnknownHostException ignored) { } catch (NamingException ignored) { } } if (hosts.isEmpty()) { LOG.warn("Unable to determine hostname for interface " + strInterface); return new String[] { cachedHostname }; } else { return hosts.toArray(new String[hosts.size()]); //返回这些域名 } }/** * Determine the local hostname; retrieving it from cache if it is known * If we cannot determine our host name, return "localhost" * @return the local hostname or "localhost" */ private static String resolveLocalHostname() {//获取本机的主机名 String localhost; try { localhost = InetAddress.getLocalHost().getCanonicalHostName(); } catch (UnknownHostException e) { LOG.warn("Unable to determine local hostname " + "-falling back to \"" + LOCALHOST + "\"", e); localhost = LOCALHOST; } return localhost; }/** * Get the IPAddress of the local host as a string. * This will be a loop back value if the local host address cannot be * determined. * If the loopback address of "localhost" does not resolve, then the system's * network is in such a state that nothing is going to work. A message is * logged at the error level and a null pointer returned, a pointer * which will trigger failures later on the application * @return the IPAddress of the local host or null for a serious problem. */ private static String resolveLocalHostIPAddress() {//获取本机的IP地址 String address; try { address = InetAddress.getLocalHost().getHostAddress(); } catch (UnknownHostException e) { LOG.warn("Unable to determine address of the host" + "-falling back to \"" + LOCALHOST + "\" address", e); try { address = InetAddress.getByName(LOCALHOST).getHostAddress(); } catch (UnknownHostException noLocalHostAddressException) { //at this point, deep trouble LOG.error("Unable to determine local loopback address " + "of \"" + LOCALHOST + "\" " + "-this system's network configuration is unsupported", e); address = null; } } return address; }/** * Returns all the host names associated by the default nameserver with the * address bound to the specified network interface * * @param strInterface *The name of the network interface to query (e.g. eth0) * @return The list of host names associated with IPs bound to the network *interface * @throws UnknownHostException *If one is encountered while querying the default interface * */ public static String[] getHosts(String strInterface) throws UnknownHostException { return getHosts(strInterface, null); }/** * Returns the default (first) host name associated by the provided * nameserver with the address bound to the specified network interface * * @param strInterface *The name of the network interface to query (e.g. eth0) * @param nameserver *The DNS host name * @return The default host names associated with IPs bound to the network *interface * @throws UnknownHostException *If one is encountered while querying the default interface */ //参数nameserver为DNS服务器地址 public static String getDefaultHost(String strInterface, String nameserver) throws UnknownHostException { if ("default".equals(strInterface)) { return cachedHostname; }if ("default".equals(nameserver)) { return getDefaultHost(strInterface); }String[] hosts = getHosts(strInterface, nameserver); return hosts[0]; }/** * Returns the default (first) host name associated by the default * nameserver with the address bound to the specified network interface * * @param strInterface *The name of the network interface to query (e.g. eth0). *Must not be null. * @return The default host name associated with IPs bound to the network *interface * @throws UnknownHostException *If one is encountered while querying the default interface */ public static String getDefaultHost(String strInterface) throws UnknownHostException { return getDefaultHost(strInterface, null); }}

【Hadoop源码分析之DNS】模块外程序在使用时直接使用DNS.getDefaultHost这样的方式调用。举个使用的例子,DataNode中有一个getHostName()的方法,用来获取本节点的主机名,代码如下:
package org.apache.hadoop.hdfs.server.datanode; private static String getHostName(Configuration config) throws UnknownHostException { String name = config.get(DFS_DATANODE_HOST_NAME_KEY); //本节点主机名 //先查看配置文件是否已经设定 //如果没有设定,就求助于DNS服务 if (name == null) { name = DNS.getDefaultHost( config.get(DFS_DATANODE_DNS_INTERFACE_KEY,//网卡名称 DFS_DATANODE_DNS_INTERFACE_DEFAULT), config.get(DFS_DATANODE_DNS_NAMESERVER_KEY,//DNS服务器 DFS_DATANODE_DNS_NAMESERVER_DEFAULT)); } return name; }

    推荐阅读