HashMap源码分析

通过研究高水平的代码, 不断思考我们才能写出更好的代码, HashMap的高性能表现使得不少人去研究它是如何实现, 以及HashSet也只是保证了一个HashMap, 该文可能会由主体到细节, 不断更新.

如果你对HashMap中的hash算法不太了解, 那么请先去了解一下, 在阅读本文. hashmap的hash算法

本文的解析大部分以注释的方式嵌入了代码中.

class Node (构成hashmap的节点)

    static class Node<K,V> implements Map.Entry<K,V> {
        final int hash;
        final K key;
        V value;
        Node<K,V> next;

        Node(int hash, K key, V value, Node<K,V> next) {
            this.hash = hash;
            this.key = key;
            this.value = value;
            this.next = next;
        }

        public final K getKey()        { return key; }
        public final V getValue()      { return value; }
        public final String toString() { return key + "=" + value; }

        public final int hashCode() {
            return Objects.hashCode(key) ^ Objects.hashCode(value);
        }

        public final V setValue(V newValue) {
            V oldValue = value;
            value = newValue;
            return oldValue;
        }

        public final boolean equals(Object o) {
            if (o == this)
                return true;
            if (o instanceof Map.Entry) {
                Map.Entry<?,?> e = (Map.Entry<?,?>)o;
                if (Objects.equals(key, e.getKey()) &&
                    Objects.equals(value, e.getValue()))
                    return true;
            }
            return false;
        }
    }

Node是HashMap的节点, 一个与其他不相同的实例key代表着一个Node实例的存在.

类字段:

hash 每一个key和value经过put()方法(或其他方法)进入之后, key即不可变, 所以这里的hash也不可辨, 被final修饰
key 同上
value map节点的值, 这个是可变的.
next table[x]链表中, next代表了链表节点的下一个值.

Node是一个简单的节点对象, 并不是我们关注的重点.

HashMap中的字段

    private static final long serialVersionUID = 362498820763181265L;
	//哈希桶的默认初始大小16  2^4
    static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16

	//最大容量  1073741824
    static final int MAXIMUM_CAPACITY = 1 << 30;


	//装载因子, 默认为0.75
	/**
	 *javadoc上说明,默认加载因子为0.75, 在时间和空间成本之间提供了良好的权衡。  
	 *较高的值会降低占用的内存，但会增加查找成本（反映在HashMap类的大多数操  
	 *作中，包括get和put)
	 **/
    static final float DEFAULT_LOAD_FACTOR = 0.75f;

	//哈希桶中链表数量大于8, 就会转换成红黑树来存储节点
    static final int TREEIFY_THRESHOLD = 8;

    /**
     * The bin count threshold for untreeifying a (split) bin during a
     * resize operation. Should be less than TREEIFY_THRESHOLD, and at
     * most 6 to mesh with shrinkage detection under removal.
     */
	 
	// 红黑树的节点数小于6时, 转化为链表?
    static final int UNTREEIFY_THRESHOLD = 6;

    /**
     * The smallest table capacity for which bins may be treeified.
     * (Otherwise the table is resized if too many nodes in a bin.)
     * Should be at least 4 * TREEIFY_THRESHOLD to avoid conflicts
     * between resizing and treeification thresholds.
     */
	 //最小树容量64???? 有点迷
    static final int MIN_TREEIFY_CAPACITY = 64;
 /* ---------------- Fields -------------- */

	//哈希桶
    transient Node<K,V>[] table;

	//HashMap的Set copy
    transient Set<Map.Entry<K,V>> entrySet;

	//实际节点的多少, map的大小
    transient int size;

    /**
     * The number of times this HashMap has been structurally modified
     * Structural modifications are those that change the number of mappings in
     * the HashMap or otherwise modify its internal structure (e.g.,
     * rehash).  This field is used to make iterators on Collection-views of
     * the HashMap fail-fast.  (See ConcurrentModificationException).
     */
	//修改次数, 用来实现hashmap的fast-fail,  并发访问期间跳出异常( ConcurrentModificationException)
    transient int modCount;


	//下一次进行resize()时候的预期值.
    int threshold;

	//装载因子, 默认为0.75
	/**
	 *javadoc上说明,默认加载因子为0.75, 在时间和空间成本之间提供了良好的权衡。  
	 *较高的值会降低占用的内存，但会增加查找成本（反映在HashMap类的大多数操  
	 *作中，包括get和put)
	 **/
    final float loadFactor;

put(K key, V value)

    /**
     * Associates the specified value with the specified key in this map.
     * If the map previously contained a mapping for the key, the old
     * value is replaced.
     *
     * @param key key with which the specified value is to be associated
     * @param value value to be associated with the specified key
     * @return the previous value associated with <tt>key</tt>, or
     *         <tt>null</tt> if there was no mapping for <tt>key</tt>.
     *         (A <tt>null</tt> return can also indicate that the map
     *         previously associated <tt>null</tt> with <tt>key</tt>.)
     */
    public V put(K key, V value) {
        return putVal(hash(key), key, value, false, true);
    }

    /**
     * Implements Map.put and related methods.
     *
     * @param hash hash for key
     * @param key the key
     * @param value the value to put
     * @param onlyIfAbsent if true, don't change existing value
     * @param evict if false, the table is in creation mode.
     * @return previous value, or null if none
     */
    final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
                   boolean evict) {
        Node<K,V>[] tab; Node<K,V> p; int n, i;
		//如果当前的哈希同为空或者长度为0的时候, 重排map
        if ((tab = table) == null || (n = tab.length) == 0)
            n = (tab = resize()).length;
		//(n-1)&hash将会得到key所在哈希桶的具体下标, 同时将p设置为哈希桶下标的节点
		//如果哈希桶中的具体下标为null, 那么可以直接设置第一个节点为我们加入的节点
        if ((p = tab[i = (n - 1) & hash]) == null)
            tab[i] = newNode(hash, key, value, null);
        else {
            Node<K,V> e; K k;
			//判断第一个节点key是否就是我们要找到的key, 如果是, 将e=p
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k))))
                e = p;
			//判断该链表是否以及转化为红黑树, 如果是, 则在红黑树中进行查找.
            else if (p instanceof TreeNode)
                e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
            else {
				//这里记录链表中的节点的数量, 以便下面判断到达转化红黑树的阈值
                for (int binCount = 0; ; ++binCount) {
                    if ((e = p.next) == null) {
						//到达尾结点, 将尾结点的next指向新节点.
                        p.next = newNode(hash, key, value, null); 
						//如果>=7, 则"树化"这个链表
                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                            treeifyBin(tab, hash);
                        break;
                    }
					//这里如果key原本是存在的, 那么break.
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        break;
					//链表的遍历 p=e
                    p = e;
                }
            }
			//上面操作中, 如果key原本就存在于map中, 那么这个e就保存着key所在节点的引用
			//如果e不为空, 那么就根据onlyIfAbsent来设置新值
			//put()的onlyIfAbsent为false, 也就是无论以前的value是多少, 都设置成新的 value
			//putIfAbsent()的onlyIfAbsent为true, 只有当以前的value为null时候才会设置新的value
            if (e != null) { // existing mapping for key
                V oldValue = e.value;
                if (!onlyIfAbsent || oldValue == null)
                    e.value = value;
                afterNodeAccess(e);
                return oldValue;
            }
        }
		//修改次数++
        ++modCount;
		//实际容量++, 判断是否需要重排hashmap.
        if (++size > threshold)
            resize();
		//为hashMap的子类LinkedHashMap服务
        afterNodeInsertion(evict);
        return null;
    }
	
	@Override
    public V putIfAbsent(K key, V value) {
        return putVal(hash(key), key, value, true, true);
    }

put()方法是一个adapter, 下面的putVal()才是真正执行具体步骤的方法. 这里我也放上了putIfAbsent(), putIfAbsent()方法只有当key所对应的值为null才会设置参数中提供的值.

get(Object key)

	//适配函数get()
    public V get(Object key) {
        Node<K,V> e;
		//查找不到返回null, 查找到返回具体值
        return (e = getNode(hash(key), key)) == null ? null : e.value;
    }


    final Node<K,V> getNode(int hash, Object key) {
        Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
        if ((tab = table) != null && (n = tab.length) > 0 &&
		//(n-1)&hash匹配数组下标, 获取哈希桶首个节点
            (first = tab[(n - 1) & hash]) != null) {
			//检查第一个首位元素是否是要匹配的节点
            if (first.hash == hash && // always check first node
                ((k = first.key) == key || (key != null && key.equals(k))))
                return first;
				//判断首个节点"之后"的节点, 如果null, 那么说明匹配不到.
            if ((e = first.next) != null) 
			//如果哈希桶中该下标首个节点为红黑树
                if (first instanceof TreeNode)	
				//去该红黑树中查找匹配的值
                    return ((TreeNode<K,V>)first).getTreeNode(hash, key);
                do {
				//遍历该链表进行查找
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        return e;
					//向下遍历结束条件
                } while ((e = e.next) != null);
            }
        }
        return null;
    }

containsKey(Object key)

    public boolean containsKey(Object key) {
        return getNode(hash(key), key) != null;
    }

和getKey()一样通过getNode()方法来进行查找.

containsValue(Object value)

如果实例中存在节点的值等于参数value, 则返回true

    public boolean containsValue(Object value) {
        Node<K,V>[] tab; V v;
		//判断容量
        if ((tab = table) != null && size > 0) {
		//依次遍历整个哈希桶
            for (int i = 0; i < tab.length; ++i) {
			//链表中的每个值进行遍历
			//这里为什么只是考虑链表的情况, 难道不考虑树化的节点么? 待我思考再来答辩!
                for (Node<K,V> e = tab[i]; e != null; e = e.next) {
                    if ((v = e.value) == value ||
                        (value != null && value.equals(v)))
                        return true;
                }
            }
        }
        return false;
    }

public boolean remove(Object key, Object value)

    @Override
    public boolean remove(Object key, Object value) {
        return removeNode(hash(key), key, value, true, true) != null;
    }
    public V remove(Object key) {
        Node<K,V> e;
        return (e = removeNode(hash(key), key, null, false, true)) == null ?
            null : e.value;
    }

	//@param matchValue 是否需要匹配value
	//@param value 需要匹配的value
	//removeNode()有很多参数, 显然不单单为了remove()而存在, 这里我们只分析大致思路
    final Node<K,V> removeNode(int hash, Object key, Object value,
                               boolean matchValue, boolean movable) {
        Node<K,V>[] tab; Node<K,V> p; int n, index;
		//判断容量
        if ((tab = table) != null && (n = tab.length) > 0 &&
		//(n-1)&hash 匹配下标.
            (p = tab[index = (n - 1) & hash]) != null) {
            Node<K,V> node = null, e; K k; V v;
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k))))
				//首个节点即匹配中, 使用node记录该节点
                node = p;
            else if ((e = p.next) != null) {
			//判断是否为红黑树
                if (p instanceof TreeNode)
                    node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
                else {
                    do {
					//链表向下依次遍历匹配
                        if (e.hash == hash &&
                            ((k = e.key) == key ||
                             (key != null && key.equals(k)))) {
							 //node记录匹配的节点
                            node = e;
                            break;
                        }
						//p记录匹配节点的上一个节点
                        p = e;
                    } while ((e = e.next) != null);
                }
            }
			//这里&&后面判断是否需要同时匹配值
            if (node != null && (!matchValue || (v = node.value) == value ||
                                 (value != null && value.equals(v)))) {
                if (node instanceof TreeNode)
                    ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
                else if (node == p)
				//首个节点匹配的情况, 直接指向node的下个节点
                    tab[index] = node.next;
                else
				//匹配的节点的上一个节点指向匹配节点的下一个节点, 所以匹配的节点等待被垃圾收集(除非这个引用数不为0)
                    p.next = node.next;
				//修改次数++
                ++modCount;
				//容量--
                --size;
				//LinkedHashMap的预留函数
                afterNodeRemoval(node);
                return node;
            }
        }
        return null;
    }

PREVIOUSJava 各种hash算法分析

NEXTJava 基础博文Collections