C#与数据结构--哈希表(Hashtable)
2009-06-24 07:08:25 来源:WEB开发网Hashtable在删除一个不存在冲突的元素时(hash_coll为正数),会把键和值都设为null,hash_coll的值设为0。这种没有冲突的空位称为“无冲突空位”,如图8.9所示,k2被删除后索引2处就属于无冲突空位,当一个Hashtable被初始化后,buckets数组中的所有位置都是无冲突空位。
哈希表通过关键字查找元素时,首先计算出键的哈希地址,然后通过这个哈希地址直接访问数组的相应位置并对比两个键值,如果相同,则查找成功并返回;如果不同,则根据hash_coll的值来决定下一步操作。当hash_coll为0或正数时,表明没有冲突,此时查找失败;如果hash_coll为负数时,表明存在冲突,此时需通过二度哈希继续计算哈希地址进行查找,如此反复直到找到相应的键值表明查找成功,如果在查找过程中遇到hash_coll为正数或计算二度哈希的次数等于哈希表长度则查找失败。由此可知,将hash_coll的高位设为冲突位主要是为了提高查找速度,避免无意义地多次计算二度哈希的情况。
8.4.2 Hashtable的代码实现
哈希表的实现较为复杂,为了简化代码,本例忽略了部分出错判断,在测试时请不要设key值为空。
1 using System;
2 public class Hashtable
3 {
4 private struct bucket
5 {
6 public Object key; //键
7 public Object val; //值
8 public int hash_coll; //哈希码
9 }
10 private bucket[] buckets; //存储哈希表数据的数组(数据桶)
11 private int count; //元素个数
12 private int loadsize; //当前允许存储的元素个数
13 private float loadFactor; //填充因子
14 //默认构造方法
15 public Hashtable() : this(0, 1.0f) { }
16 //指定容量的构造方法
17 public Hashtable(int capacity, float loadFactor)
18 {
19 if (!(loadFactor >= 0.1f && loadFactor <= 1.0f))
20 throw new ArgumentOutOfRangeException(
21 "填充因子必须在0.1~1之间");
22 this.loadFactor = loadFactor > 0.72f ? 0.72f : loadFactor;
23 //根据容量计算表长
24 double rawsize = capacity / this.loadFactor;
25 int hashsize = (rawsize > 11) ? //表长为大于11的素数
26 HashHelpers.GetPrime((int)rawsize) : 11;
27 buckets = new bucket[hashsize]; //初始化容器
28 loadsize = (int)(this.loadFactor * hashsize);
29 }
30 public virtual void Add(Object key, Object value) //添加
31 {
32 Insert(key, value, true);
33 }
34 //哈希码初始化
35 private uint InitHash(Object key,int hashsize,
36 out uint seed,out uint incr)
37 {
38 uint hashcode = (uint)GetHash(key) & 0x7FFFFFFF; //取绝对值
39 seed = (uint)hashcode; //h1
40 incr = (uint)(1 + (((seed >> 5)+1) % ((uint)hashsize-1)));//h2
41 return hashcode; //返回哈希码
42 }
43 public virtual Object this[Object key] //索引器
44 {
45 get
46 {
47 uint seed; //h1
48 uint incr; //h2
49 uint hashcode = InitHash(key, buckets.Length,
50 out seed, out incr);
51 int ntry = 0; //用于表示h(key,i)中的i值
52 bucket b;
53 int bn = (int)(seed % (uint)buckets.Length); //h(key,0)
54 do
55 {
56 b = buckets[bn];
57 if (b.key == null) //b为无冲突空位时
58 { //找不到相应的键,返回空
59 return null;
60 }
61 if (((b.hash_coll & 0x7FFFFFFF) == hashcode) &&
62 KeyEquals(b.key, key))
63 { //查找成功
64 return b.val;
65 }
66 bn = (int)(((long)bn + incr) %
67 (uint)buckets.Length); //h(key+i)
68 } while (b.hash_coll < 0 && ++ntry < buckets.Length);
69 return null;
70 }
71 set
72 {
73 Insert(key, value, false);
74 }
75 }
76 private void expand() //扩容
77 { //使新的容量为旧容量的近似两倍
78 int rawsize = HashHelpers.GetPrime(buckets.Length * 2);
79 rehash(rawsize);
80 }
81 private void rehash(int newsize) //按新容量扩容
82 {
83 bucket[] newBuckets = new bucket[newsize];
84 for (int nb = 0; nb < buckets.Length; nb++)
85 {
86 bucket oldb = buckets[nb];
87 if ((oldb.key != null) && (oldb.key != buckets))
88 {
89 putEntry(newBuckets, oldb.key, oldb.val,
90 oldb.hash_coll & 0x7FFFFFFF);
91 }
92 }
93 buckets = newBuckets;
94 loadsize = (int)(loadFactor * newsize);
95 return;
96 }
97 //在新数组内添加旧数组的一个元素
98 private void putEntry(bucket[] newBuckets, Object key,
99 Object nvalue, int hashcode)
100 {
101 uint seed = (uint)hashcode; //h1
102 uint incr = (uint)(1 + (((seed >> 5) + 1) %
103 ((uint)newBuckets.Length - 1))); //h2
104 int bn = (int)(seed % (uint)newBuckets.Length);//哈希地址
105 do
106 { //当前位置为有冲突空位或无冲突空位时都可添加新元素
107 if ((newBuckets[bn].key == null) ||
108 (newBuckets[bn].key == buckets))
109 { //赋值
110 newBuckets[bn].val = nvalue;
111 newBuckets[bn].key = key;
112 newBuckets[bn].hash_coll |= hashcode;
113 return;
114 }
115 //当前位置已存在其他元素时
116 if (newBuckets[bn].hash_coll >= 0)
117 { //置hash_coll的高位为1
118 newBuckets[bn].hash_coll |=
119 unchecked((int)0x80000000);
120 }
121 //二度哈希h1(key)+h2(key)
122 bn = (int)(((long)bn + incr) % (uint)newBuckets.Length);
123 } while (true);
124 }
125 protected virtual int GetHash(Object key)
126 { //获取哈希码
127 return key.GetHashCode();
128 }
129 protected virtual bool KeyEquals(Object item, Object key)
130 { //用于判断两key是否相等
131 return item == null ? false : item.Equals(key);
132 }
133 //当add为true时用作添加元素,当add为false时用作修改元素值
134 private void Insert(Object key, Object nvalue, bool add)
135 { //如果超过允许存放元素个数的上限则扩容
136 if (count >= loadsize)
137 {
138 expand();
139 }
140 uint seed; //h1
141 uint incr; //h2
142 uint hashcode = InitHash(key, buckets.Length,out seed, out incr);
143 int ntry = 0; //用于表示h(key,i)中的i值
144 int emptySlotNumber = -1; //用于记录空位
145 int bn = (int)(seed % (uint)buckets.Length); //索引号
146 do
147 { //如果是有冲突空位,需继续向后查找以确定是否存在相同的键
148 if (emptySlotNumber == -1 && (buckets[bn].key == buckets) &&
149 (buckets[bn].hash_coll < 0))
150 {
151 emptySlotNumber = bn;
152 }
153 if (buckets[bn].key == null) //确定没有重复键才添加
154 {
155 if (emptySlotNumber != -1) //使用之前的空位
156 bn = emptySlotNumber;
157 buckets[bn].val = nvalue;
158 buckets[bn].key = key;
159 buckets[bn].hash_coll |= (int)hashcode;
160 count++;
161 return;
162 }
163 //找到重复键
164 if (((buckets[bn].hash_coll & 0x7FFFFFFF)==hashcode) &&
165 KeyEquals(buckets[bn].key, key))
166 { //如果处于添加元素状态,则由于出现重复键而报错
167 if (add)
168 {
169 throw new ArgumentException("添加了重复的键值!");
170 }
171 buckets[bn].val = nvalue; //修改批定键的元素
172 return;
173 }
174 //存在冲突则置hash_coll的最高位为1
175 if (emptySlotNumber == -1)
176 {
177 if (buckets[bn].hash_coll >= 0)
178 {
179 buckets[bn].hash_coll |= unchecked((int)0x80000000);
180 }
181 }
182 bn = (int)(((long)bn + incr) % (uint)buckets.Length);//二度哈希
183 } while (++ntry < buckets.Length);
184 throw new InvalidOperationException("添加失败!");
185 }
186 public virtual void Remove(Object key) //移除一个元素
187 {
188 uint seed; //h1
189 uint incr; //h2
190 uint hashcode = InitHash(key, buckets.Length,out seed, out incr);
191 int ntry = 0; //h(key,i)中的i
192 bucket b;
193 int bn = (int)(seed % (uint)buckets.Length); //哈希地址
194 do
195 {
196 b = buckets[bn];
197 if (((b.hash_coll & 0x7FFFFFFF) == hashcode) &&
198 KeyEquals(b.key, key)) //如果找到相应的键值
199 { //保留最高位,其余清0
200 buckets[bn].hash_coll &= unchecked((int)0x80000000);
201 if (buckets[bn].hash_coll != 0) //如果原来存在冲突
202 { //使key指向buckets
203 buckets[bn].key = buckets;
204 }
205 else //原来不存在冲突
206 { //置key为空
207 buckets[bn].key = null;
208 }
209 buckets[bn].val = null; //释放相应的“值”。
210 count--;
211 return;
212 } //二度哈希
213 bn = (int)(((long)bn + incr) % (uint)buckets.Length);
214 } while (b.hash_coll < 0 && ++ntry < buckets.Length);
215 }
216 public override string ToString()
217 {
218 string s = string.Empty;
219 for (int i = 0; i < buckets.Length; i++)
220 {
221 if (buckets[i].key != null && buckets[i].key != buckets)
222 { //不为空位时打印索引、键、值、hash_coll
223 s += string.Format("{0,-5}{1,-8}{2,-8}{3,-8}rn",
224 i.ToString(), buckets[i].key.ToString(),
225 buckets[i].val.ToString(),
226 buckets[i].hash_coll.ToString());
227 }
228 else
229 { //是空位时则打印索引和hash_coll
230 s += string.Format("{0,-21}{1,-8}rn", i.ToString(),
231 buckets[i].hash_coll.ToString());
232 }
233 }
234 return s;
235 }
236 public virtual int Count //属性
237 { //获取元素个数
238 get { return count; }
239 }
240 }
Hashtable和ArrayList的实现有似的地方,比如两者都是以数组为基础做进一步地抽象而来,两者都可以成倍地自动扩展容量。
编辑推荐阅读:
C#与数据结构--树论--红黑树(Red Black Tree)(下)
C#与数据结构--树论--红黑树(Red Black Tree)(上)
C#与数据结构--图的遍历
C#与数据结构--二叉树的遍历
更多精彩
赞助商链接