通过研究高水平的代码, 不断思考 我们才能写出更好的代码, HashMap的高性能表现使得不少人去研究它是如何实现, 以及HashSet也只是保证了一个HashMap, 该文可能会由主体到细节, 不断更新.
如果你对HashMap中的hash算法不太了解, 那么请先去了解一下, 在阅读本文. hashmap的hash算法
本文的解析大部分以注释的方式嵌入了代码中.
class Node (构成hashmap的节点)
static class Node<K,V> implements Map.Entry<K,V> {
final int hash;
final K key;
V value;
Node<K,V> next;
Node(int hash, K key, V value, Node<K,V> next) {
this.hash = hash;
this.key = key;
this.value = value;
this.next = next;
}
public final K getKey() { return key; }
public final V getValue() { return value; }
public final String toString() { return key + "=" + value; }
public final int hashCode() {
return Objects.hashCode(key) ^ Objects.hashCode(value);
}
public final V setValue(V newValue) {
V oldValue = value;
value = newValue;
return oldValue;
}
public final boolean equals(Object o) {
if (o == this)
return true;
if (o instanceof Map.Entry) {
Map.Entry<?,?> e = (Map.Entry<?,?>)o;
if (Objects.equals(key, e.getKey()) &&
Objects.equals(value, e.getValue()))
return true;
}
return false;
}
}
Node是HashMap的节点, 一个与其他不相同的实例key代表着一个Node实例的存在.
类字段:
- hash 每一个key和value经过put()方法(或其他方法)进入之后, key即不可变, 所以这里的hash也不可辨, 被final修饰
- key 同上
- value map节点的值, 这个是可变的.
- next table[x]链表中, next代表了链表节点的下一个值.
Node是一个简单的节点对象, 并不是我们关注的重点.
HashMap中的字段
private static final long serialVersionUID = 362498820763181265L;
//哈希桶的默认初始大小16 2^4
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16
//最大容量 1073741824
static final int MAXIMUM_CAPACITY = 1 << 30;
//装载因子, 默认为0.75
/**
*javadoc上说明,默认加载因子为0.75, 在时间和空间成本之间提供了良好的权衡。
*较高的值会降低占用的内存,但会增加查找成本(反映在HashMap类的大多数操
*作中,包括get和put)
**/
static final float DEFAULT_LOAD_FACTOR = 0.75f;
//哈希桶中链表数量大于8, 就会转换成红黑树来存储节点
static final int TREEIFY_THRESHOLD = 8;
/**
* The bin count threshold for untreeifying a (split) bin during a
* resize operation. Should be less than TREEIFY_THRESHOLD, and at
* most 6 to mesh with shrinkage detection under removal.
*/
// 红黑树的节点数小于6时, 转化为链表?
static final int UNTREEIFY_THRESHOLD = 6;
/**
* The smallest table capacity for which bins may be treeified.
* (Otherwise the table is resized if too many nodes in a bin.)
* Should be at least 4 * TREEIFY_THRESHOLD to avoid conflicts
* between resizing and treeification thresholds.
*/
//最小树容量64???? 有点迷
static final int MIN_TREEIFY_CAPACITY = 64;
/* ---------------- Fields -------------- */
//哈希桶
transient Node<K,V>[] table;
//HashMap的Set copy
transient Set<Map.Entry<K,V>> entrySet;
//实际节点的多少, map的大小
transient int size;
/**
* The number of times this HashMap has been structurally modified
* Structural modifications are those that change the number of mappings in
* the HashMap or otherwise modify its internal structure (e.g.,
* rehash). This field is used to make iterators on Collection-views of
* the HashMap fail-fast. (See ConcurrentModificationException).
*/
//修改次数, 用来实现hashmap的fast-fail, 并发访问期间跳出异常( ConcurrentModificationException)
transient int modCount;
//下一次进行resize()时候的预期值.
int threshold;
//装载因子, 默认为0.75
/**
*javadoc上说明,默认加载因子为0.75, 在时间和空间成本之间提供了良好的权衡。
*较高的值会降低占用的内存,但会增加查找成本(反映在HashMap类的大多数操
*作中,包括get和put)
**/
final float loadFactor;
put(K key, V value)
/**
* Associates the specified value with the specified key in this map.
* If the map previously contained a mapping for the key, the old
* value is replaced.
*
* @param key key with which the specified value is to be associated
* @param value value to be associated with the specified key
* @return the previous value associated with <tt>key</tt>, or
* <tt>null</tt> if there was no mapping for <tt>key</tt>.
* (A <tt>null</tt> return can also indicate that the map
* previously associated <tt>null</tt> with <tt>key</tt>.)
*/
public V put(K key, V value) {
return putVal(hash(key), key, value, false, true);
}
/**
* Implements Map.put and related methods.
*
* @param hash hash for key
* @param key the key
* @param value the value to put
* @param onlyIfAbsent if true, don't change existing value
* @param evict if false, the table is in creation mode.
* @return previous value, or null if none
*/
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
boolean evict) {
Node<K,V>[] tab; Node<K,V> p; int n, i;
//如果当前的哈希同为空或者长度为0的时候, 重排map
if ((tab = table) == null || (n = tab.length) == 0)
n = (tab = resize()).length;
//(n-1)&hash将会得到key所在哈希桶的具体下标, 同时将p设置为哈希桶下标的节点
//如果哈希桶中的具体下标为null, 那么可以直接设置第一个节点为我们加入的节点
if ((p = tab[i = (n - 1) & hash]) == null)
tab[i] = newNode(hash, key, value, null);
else {
Node<K,V> e; K k;
//判断第一个节点key是否就是我们要找到的key, 如果是, 将e=p
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
e = p;
//判断该链表是否以及转化为红黑树, 如果是, 则在红黑树中进行查找.
else if (p instanceof TreeNode)
e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
else {
//这里记录链表中的节点的数量, 以便下面判断到达转化红黑树的阈值
for (int binCount = 0; ; ++binCount) {
if ((e = p.next) == null) {
//到达尾结点, 将尾结点的next指向新节点.
p.next = newNode(hash, key, value, null);
//如果>=7, 则"树化"这个链表
if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
treeifyBin(tab, hash);
break;
}
//这里如果key原本是存在的, 那么break.
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
break;
//链表的遍历 p=e
p = e;
}
}
//上面操作中, 如果key原本就存在于map中, 那么这个e就保存着key所在节点的引用
//如果e不为空, 那么就根据onlyIfAbsent来设置新值
//put()的onlyIfAbsent为false, 也就是无论以前的value是多少, 都设置成新的 value
//putIfAbsent()的onlyIfAbsent为true, 只有当以前的value为null时候才会设置新的value
if (e != null) { // existing mapping for key
V oldValue = e.value;
if (!onlyIfAbsent || oldValue == null)
e.value = value;
afterNodeAccess(e);
return oldValue;
}
}
//修改次数++
++modCount;
//实际容量++, 判断是否需要重排hashmap.
if (++size > threshold)
resize();
//为hashMap的子类LinkedHashMap服务
afterNodeInsertion(evict);
return null;
}
@Override
public V putIfAbsent(K key, V value) {
return putVal(hash(key), key, value, true, true);
}
put()方法是一个adapter, 下面的putVal()才是真正执行具体步骤的方法. 这里我也放上了putIfAbsent(), putIfAbsent()方法只有当key所对应的值为null才会设置参数中提供的值.
get(Object key)
//适配函数get()
public V get(Object key) {
Node<K,V> e;
//查找不到返回null, 查找到返回具体值
return (e = getNode(hash(key), key)) == null ? null : e.value;
}
final Node<K,V> getNode(int hash, Object key) {
Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
if ((tab = table) != null && (n = tab.length) > 0 &&
//(n-1)&hash匹配数组下标, 获取哈希桶首个节点
(first = tab[(n - 1) & hash]) != null) {
//检查第一个首位元素是否是要匹配的节点
if (first.hash == hash && // always check first node
((k = first.key) == key || (key != null && key.equals(k))))
return first;
//判断首个节点"之后"的节点, 如果null, 那么说明匹配不到.
if ((e = first.next) != null)
//如果哈希桶中该下标首个节点为红黑树
if (first instanceof TreeNode)
//去该红黑树中查找匹配的值
return ((TreeNode<K,V>)first).getTreeNode(hash, key);
do {
//遍历该链表进行查找
if (e.hash == hash &&
((k = e.key) == key || (key != null && key.equals(k))))
return e;
//向下遍历结束条件
} while ((e = e.next) != null);
}
}
return null;
}
containsKey(Object key)
public boolean containsKey(Object key) {
return getNode(hash(key), key) != null;
}
和getKey()一样通过getNode()方法来进行查找.
containsValue(Object value)
如果实例中存在节点的值等于参数value, 则返回true
public boolean containsValue(Object value) {
Node<K,V>[] tab; V v;
//判断容量
if ((tab = table) != null && size > 0) {
//依次遍历整个哈希桶
for (int i = 0; i < tab.length; ++i) {
//链表中的每个值进行遍历
//这里为什么只是考虑链表的情况, 难道不考虑树化的节点么? 待我思考再来答辩!
for (Node<K,V> e = tab[i]; e != null; e = e.next) {
if ((v = e.value) == value ||
(value != null && value.equals(v)))
return true;
}
}
}
return false;
}
public boolean remove(Object key, Object value)
@Override
public boolean remove(Object key, Object value) {
return removeNode(hash(key), key, value, true, true) != null;
}
public V remove(Object key) {
Node<K,V> e;
return (e = removeNode(hash(key), key, null, false, true)) == null ?
null : e.value;
}
//@param matchValue 是否需要匹配value
//@param value 需要匹配的value
//removeNode()有很多参数, 显然不单单为了remove()而存在, 这里我们只分析大致思路
final Node<K,V> removeNode(int hash, Object key, Object value,
boolean matchValue, boolean movable) {
Node<K,V>[] tab; Node<K,V> p; int n, index;
//判断容量
if ((tab = table) != null && (n = tab.length) > 0 &&
//(n-1)&hash 匹配下标.
(p = tab[index = (n - 1) & hash]) != null) {
Node<K,V> node = null, e; K k; V v;
if (p.hash == hash &&
((k = p.key) == key || (key != null && key.equals(k))))
//首个节点即匹配中, 使用node记录该节点
node = p;
else if ((e = p.next) != null) {
//判断是否为红黑树
if (p instanceof TreeNode)
node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
else {
do {
//链表向下依次遍历匹配
if (e.hash == hash &&
((k = e.key) == key ||
(key != null && key.equals(k)))) {
//node记录匹配的节点
node = e;
break;
}
//p记录匹配节点的上一个节点
p = e;
} while ((e = e.next) != null);
}
}
//这里&&后面判断是否需要同时匹配值
if (node != null && (!matchValue || (v = node.value) == value ||
(value != null && value.equals(v)))) {
if (node instanceof TreeNode)
((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
else if (node == p)
//首个节点匹配的情况, 直接指向node的下个节点
tab[index] = node.next;
else
//匹配的节点的上一个节点指向匹配节点的下一个节点, 所以匹配的节点等待被垃圾收集(除非这个引用数不为0)
p.next = node.next;
//修改次数++
++modCount;
//容量--
--size;
//LinkedHashMap的预留函数
afterNodeRemoval(node);
return node;
}
}
return null;
}
PREVIOUSJava 各种hash算法分析