@yudesong
2018-02-12T03:03:17.000000Z
字数 7846
阅读 783
HashMap
首先看看HashMap的结构图,采用了数组+链表的方式。

实现原理

public class HashMap<K,V> extends AbstractMap<K,V> implements Map<K,V>,Cloneable, Serializable {//实际存储的key-value键值对的个数transient int size;//阈值,当table == {}时,该值为初始容量(初始容量默认为16);当table被填充了,也就是为table分配内存空间后,threshold一般为 capacity*loadFactory。HashMap在进行扩容时需要参考threshold,后面会详细谈到int threshold;//负载因子,代表了table的填充度有多少,默认是0.75final float loadFactor;//用于快速失败,由于HashMap非线程安全,在对HashMap进行迭代时,如果期间其他线程的参与导致HashMap的结构发生变化了(比如put,remove等操作),需要抛出异常ConcurrentModificationExceptiontransient int modCount;//HashMap的主干数组,可以看到就是一个Entry数组,初始值为空数组{},主干数组的长度一定是2的次幂,至于为什么这么做,后面会有详细分析。transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE;public HashMap(int initialCapacity, float loadFactor) {//此处对传入的初始容量进行校验,最大不能超过MAXIMUM_CAPACITY = 1<<30(230)if (initialCapacity < 0)throw new IllegalArgumentException("Illegal initial capacity: " +initialCapacity);if (initialCapacity > MAXIMUM_CAPACITY)initialCapacity = MAXIMUM_CAPACITY;if (loadFactor <= 0 || Float.isNaN(loadFactor))throw new IllegalArgumentException("Illegal load factor: " +loadFactor);this.loadFactor = loadFactor;threshold = initialCapacity;init();//init方法在HashMap中没有实际实现,不过在其子类如 linkedHashMap中就会有对应实现}public V put(K key, V value) {//如果table数组为空数组{},进行数组填充(为table分配实际内存空间),入参为threshold,此时threshold为initialCapacity 默认是1<<4(24=16)if (table == EMPTY_TABLE) {inflateTable(threshold);}//如果key为null,存储位置为table[0]或table[0]的冲突链上if (key == null)return putForNullKey(value);int hash = hash(key);//对key的hashcode进一步计算,确保散列均匀int i = indexFor(hash, table.length);//获取在table中的实际位置for (Entry<K,V> e = table[i]; e != null; e = e.next) {//如果该对应数据已存在,执行覆盖操作。用新value替换旧value,并返回旧valueObject k;if (e.hash == hash && ((k = e.key) == key || key.equals(k))) {V oldValue = e.value;e.value = value;e.recordAccess(this);return oldValue;}}modCount++;//保证并发访问时,若HashMap内部结构发生变化,快速响应失败addEntry(hash, key, value, i);//新增一个entryreturn null;}public V get(Object key) {//如果key为null,则直接去table[0]处去检索即可。if (key == null)return getForNullKey();Entry<K,V> entry = getEntry(key);return null == entry ? null : entry.getValue();}final Entry<K,V> getEntry(Object key) {if (size == 0) {return null;}//通过key的hashcode值计算hash值int hash = (key == null) ? 0 : hash(key);//indexFor (hash&length-1) 获取最终数组索引,然后遍历链表,通过equals方法比对找出对应记录for (Entry<K,V> e = table[indexFor(hash, table.length)];e != null;e = e.next) {Object k;if (e.hash == hash &&((k = e.key) == key || (key != null && key.equals(k))))return e;}return null;}static class Entry<K,V> implements Map.Entry<K,V> {final K key;V value;Entry<K,V> next;//存储指向下一个Entry的引用,单链表结构int hash;//对key的hashcode值进行hash运算后得到的值,存储在Entry,避免重复计算/*** Creates new entry.*/Entry(int h, K k, V v, Entry<K,V> n) {value = v;next = n;key = k;hash = h;}}

public ConcurrentHashMap(int initialCapacity,float loadFactor, int concurrencyLevel) {if (!(loadFactor > 0) || initialCapacity < 0 || concurrencyLevel <= 0)throw new IllegalArgumentException();//MAX_SEGMENTS 为1<<16=65536,也就是最大并发数为65536if (concurrencyLevel > MAX_SEGMENTS)concurrencyLevel = MAX_SEGMENTS;//2的sshif次方等于ssize,例:ssize=16,sshift=4;ssize=32,sshif=5int sshift = 0;//ssize 为segments数组长度,根据concurrentLevel计算得出int ssize = 1;while (ssize < concurrencyLevel) {++sshift;ssize <<= 1;}//segmentShift和segmentMask这两个变量在定位segment时会用到,后面会详细讲this.segmentShift = 32 - sshift;this.segmentMask = ssize - 1;if (initialCapacity > MAXIMUM_CAPACITY)initialCapacity = MAXIMUM_CAPACITY;//计算cap的大小,即Segment中HashEntry的数组长度,cap也一定为2的n次方.int c = initialCapacity / ssize;if (c * ssize < initialCapacity)++c;int cap = MIN_SEGMENT_TABLE_CAPACITY;while (cap < c)cap <<= 1;//创建segments数组并初始化第一个Segment,其余的Segment延迟初始化Segment<K,V> s0 =new Segment<K,V>(loadFactor, (int)(cap * loadFactor),(HashEntry<K,V>[])new HashEntry[cap]);Segment<K,V>[] ss = (Segment<K,V>[])new Segment[ssize];UNSAFE.putOrderedObject(ss, SBASE, s0);this.segments = ss;}public V put(K key, V value) {Segment<K,V> s;//concurrentHashMap不允许key/value为空if (value == null)throw new NullPointerException();//hash函数对key的hashCode重新散列,避免差劲的不合理的hashcode,保证散列均匀int hash = hash(key);//返回的hash值无符号右移segmentShift位与段掩码进行位运算,定位segmentint j = (hash >>> segmentShift) & segmentMask;if ((s = (Segment<K,V>)UNSAFE.getObject // nonvolatile; recheck(segments, (j << SSHIFT) + SBASE)) == null) // in ensureSegments = ensureSegment(j);return s.put(key, hash, value, false);}public V get(Object key) {Segment<K,V> s;HashEntry<K,V>[] tab;int h = hash(key);long u = (((h >>> segmentShift) & segmentMask) << SSHIFT) + SBASE;//先定位Segment,再定位HashEntryif ((s = (Segment<K,V>)UNSAFE.getObjectVolatile(segments, u)) != null &&(tab = s.table) != null) {for (HashEntry<K,V> e = (HashEntry<K,V>) UNSAFE.getObjectVolatile(tab, ((long)(((tab.length - 1) & h)) << TSHIFT) + TBASE);e != null; e = e.next) {K k;if ((k = e.key) == key || (e.hash == h && key.equals(k)))return e.value;}}return null;}final V put(K key, int hash, V value, boolean onlyIfAbsent) {HashEntry<K,V> node = tryLock() ? null :scanAndLockForPut(key, hash, value);//tryLock不成功时会遍历定位到的HashEnry位置的链表(遍历主要是为了使CPU缓存链表),若找不到,则创建HashEntry。tryLock一定次数后(MAX_SCAN_RETRIES变量决定),则lock。若遍历过程中,由于其他线程的操作导致链表头结点变化,则需要重新遍历。V oldValue;try {HashEntry<K,V>[] tab = table;int index = (tab.length - 1) & hash;//定位HashEntry,可以看到,这个hash值在定位Segment时和在Segment中定位HashEntry都会用到,只不过定位Segment时只用到高几位。HashEntry<K,V> first = entryAt(tab, index);for (HashEntry<K,V> e = first;;) {if (e != null) {K k;if ((k = e.key) == key ||(e.hash == hash && key.equals(k))) {oldValue = e.value;if (!onlyIfAbsent) {e.value = value;++modCount;}break;}e = e.next;}else {if (node != null)node.setNext(first);elsenode = new HashEntry<K,V>(hash, key, value, first);int c = count + 1;//若c超出阈值threshold,需要扩容并rehash。扩容后的容量是当前容量的2倍。这样可以最大程度避免之前散列好的entry重新散列,具体在另一篇文章中有详细分析,不赘述。扩容并rehash的这个过程是比较消耗资源的。if (c > threshold && tab.length < MAXIMUM_CAPACITY)rehash(node);elsesetEntryAt(tab, index, node);++modCount;count = c;oldValue = null;break;}}} finally {unlock();}return oldValue;}
CopyOnWriteArrayList是Java并发包中提供的一个并发容器,它是个线程安全且读操作无锁的ArrayList,写操作则通过创建底层数组的新副本来实现,是一种读写分离的并发策略,我们也可以称这种容器为"写时复制器",Java并发包中类似的容器还有CopyOnWriteSet。
实现原理
我们都知道,集合框架中的ArrayList是非线程安全的,Vector虽是线程安全的,但由于简单粗暴的锁同步机制,性能较差。而CopyOnWriteArrayList则提供了另一种不同的并发处理策略。很多时候,我们的系统应对的都是读多写少的并发场景。CopyOnWriteArrayList容器允许并发读,读操作是无锁的,性能较高。至于写操作,比如向容器中添加一个元素,则首先将当前容器复制一份,然后在新副本上执行写操作,结束之后再将原容器的引用指向新容器。
优缺点分析
了解了CopyOnWriteArrayList的实现原理,分析它的优缺点及使用场景就很容易了。
优点:读操作性能很高,因为无需任何同步措施,比较适用于读多写少的并发场景。Java的list在遍历时,若中途有别的线程对list容器进行修改,则会抛出ConcurrentModificationException异常。而CopyOnWriteArrayList由于其"读写分离"的思想,遍历和修改操作分别作用在不同的list容器,所以在使用迭代器进行遍历时候,也就不会抛出ConcurrentModificationException异常了
缺点:缺点也很明显,一是内存占用问题,毕竟每次执行写操作都要将原容器拷贝一份,数据量大时,对内存压力较大,可能会引起频繁GC;二是无法保证实时性,Vector对于读写操作均加锁同步,可以保证读和写的强一致性。而CopyOnWriteArrayList由于其实现策略的原因,写和读分别作用在新老不同容器上,在写操作执行过程中,读不会阻塞但读取到的却是老容器的数据。
public boolean add(E e) {//ReentrantLock加锁,保证线程安全final ReentrantLock lock = this.lock;lock.lock();try {Object[] elements = getArray();int len = elements.length;//拷贝原容器,长度为原容器长度加一Object[] newElements = Arrays.copyOf(elements, len + 1);//在新副本上执行添加操作newElements[len] = e;//将原容器引用指向新副本setArray(newElements);return true;} finally {//解锁lock.unlock();}}public E remove(int index) {//加锁final ReentrantLock lock = this.lock;lock.lock();try {Object[] elements = getArray();int len = elements.length;E oldValue = get(elements, index);int numMoved = len - index - 1;if (numMoved == 0)//如果要删除的是列表末端数据,拷贝前len-1个数据到新副本上,再切换引用setArray(Arrays.copyOf(elements, len - 1));else {//否则,将除要删除元素之外的其他元素拷贝到新副本中,并切换引用Object[] newElements = new Object[len - 1];System.arraycopy(elements, 0, newElements, 0, index);System.arraycopy(elements, index + 1, newElements, index,numMoved);setArray(newElements);}return oldValue;} finally {//解锁lock.unlock();}}public E get(int index) {return get(getArray(), index);}private E get(Object[] a, int index) {return (E) a[index];}
参考博客
1.HashMap实现原理及源码分析
2.CopyOnWriteArrayList实现原理及源码分析
3. 造成HashMap非线程安全的原因