Quantcast
Channel: VBForums - CodeBank - Visual Basic 6 and earlier
Viewing all articles
Browse latest Browse all 1529

Simple and fast, lightweight HashList-Class (no APIs)

$
0
0
Not much to add to the Threads Title...

the implementation of cHashList comes in only about 100 lines of code, and it can
be used as a VB.Collection-compatible replacement with much better performance.

The following Methods/Properties are supported:
- CompareMode (to switch between case-insensitive and case-sensitive compares, default-behaviour is like the VB.Collection)
- UniqueKeys (to allow multiple entries with the same Key, when switched from the True-defaultsetting to False)
- Count
- Add(Item As Variant, Optional Key As String)
- Exists(Key As String)
- IndexByKey(Key As String)
- KeyByIndex(ByVal IndexOneBased As Long)
- Item(KeyOrOneBasedIndex As Variant) ... (Get, Let and Set)
- ReInit(Optional ByVal ExpectedItemCount As Long = 5000)

Indexed access (for both, Keys and Items) is by orders of magnitude faster than the VB-Collection.
What's possible now as well (compared to the VB-Collection) is the ability to overwrite Item-Values
(at a given Index- or Key-position).

Note, that in the above List a Remove-Method is missing -
I've left this out for two reasons:
1) to demonstrate that a simplified HashList-Implementation doesn't necessarily need to be a linked List
2) because Remove is not used very often, when a Collection is used primarily as a fast "Key-Lookup-Container"
... (for Queue- or Stack-scenarios one can always use the normal VBA.Collection)

Performance is about 6 times as fast, when Key-Value-pairs are added -
and about twice as fast when Items are retrieved per Key-Lookup...

Here's a ScreenShot:


Here's the Class-Code of cHashList:
Code:

Option Explicit
 
Private Type DataTableEntry
  Key As String
  Value As Variant
End Type
Private Type HashTableEntry
  DataIndexes() As Long
End Type
 
Private DataTable() As DataTableEntry, HashTable() As HashTableEntry
Private mCount As Long, mDTUBound As Long, mHashTableSize As Long
 
Public CompareMode As VbCompareMethod, UniqueKeys As Boolean

Private Sub Class_Initialize()
  UniqueKeys = True
  CompareMode = vbTextCompare
  ReInit
End Sub

Public Sub ReInit(Optional ByVal ExpectedItemCount As Long = 5000)
  mHashTableSize = 8
  Do Until mHashTableSize * 5 > ExpectedItemCount: mHashTableSize = mHashTableSize * 2: Loop
  ReDim HashTable(0 To mHashTableSize - 1)
 
  Dim i As Long
  For i = 0 To UBound(HashTable): ReDim HashTable(i).DataIndexes(0 To 0): Next
  mDTUBound = 16: ReDim DataTable(0 To mDTUBound)
  mCount = 0
End Sub

Public Property Get Count() As Long
  Count = mCount
End Property

Public Function Exists(Key As String) As Boolean
  Exists = FindIndex(Key, CalcHash(Key)) > 0
End Function
Public Function IndexByKey(Key As String) As Long
  IndexByKey = FindIndex(Key, CalcHash(Key))
End Function

Public Sub Add(Item, Optional Key As String)
Dim HashValue As Long, UB As Long
  HashValue = CalcHash(Key)
  If UniqueKeys Then If FindIndex(Key, HashValue) Then Err.Raise 457
 
  'prolong and add to the new entries to the DataTable-Array
  mCount = mCount + 1
  If mDTUBound < mCount Then mDTUBound = mDTUBound * 1.5: ReDim Preserve DataTable(0 To mDTUBound)
  DataTable(mCount).Key = Key
  DataTable(mCount).Value = Item
 
  'add the new DataIndex to the proper Hash-Buckets .DataIndexes-Array
  With HashTable(HashValue)
    UB = UBound(.DataIndexes): UB = UB + 1
    ReDim Preserve .DataIndexes(0 To UB)
    .DataIndexes(UB) = mCount
  End With
End Sub

Public Property Get KeyByIndex(ByVal IndexOneBased As Long)
  If IndexOneBased < 1 Or IndexOneBased > mCount Then Err.Raise 9
  KeyByIndex = DataTable(IndexOneBased).Key
End Property

Public Property Get Item(KeyOrOneBasedIndex)
Dim Index As Long
  If VarType(KeyOrOneBasedIndex) = vbString Then
    Index = FindIndex(KeyOrOneBasedIndex, CalcHash(KeyOrOneBasedIndex))
    If Index = 0 Then Err.Raise 457
  Else
    Index = KeyOrOneBasedIndex
    If Index < 1 Or Index > mCount Then Err.Raise 9
  End If
  If IsObject(DataTable(Index).Value) Then
    Set Item = DataTable(Index).Value
  Else
    Item = DataTable(Index).Value
  End If
End Property

Public Property Let Item(KeyOrOneBasedIndex, NewItem)
Dim Index As Long
  If VarType(KeyOrOneBasedIndex) = vbString Then
    Index = FindIndex(KeyOrOneBasedIndex, CalcHash(KeyOrOneBasedIndex))
    If Index = 0 Then Err.Raise 457
  Else
    Index = KeyOrOneBasedIndex
    If Index < 1 Or Index > mCount Then Err.Raise 9
  End If
  If IsObject(NewItem) Then
    Set DataTable(Index).Value = NewItem
  Else
    DataTable(Index).Value = NewItem
  End If
End Property
Public Property Set Item(KeyOrOneBasedIndex, NewItem)
  Item(KeyOrOneBasedIndex) = NewItem
End Property

Private Function FindIndex(Key, ByVal HashValue As Long) As Long
Dim i As Long, CM As VbCompareMethod
  With HashTable(HashValue)
    CM = CompareMode
    For i = 1 To UBound(.DataIndexes)
      If StrComp(Key, DataTable(.DataIndexes(i)).Key, CM) = 0 Then
        FindIndex = .DataIndexes(i): Exit Function
      End If
    Next
  End With 'returns Zero, when no Key can be found
End Function

Private Function CalcHash(Key) As Long
Dim i As Long, L As Long, B() As Byte
  If CompareMode Then B = LCase$(Key) Else B = Key
  L = 7919
    For i = UBound(B) To 0 Step -1: L = (i + B(i) + L) * 37 And &H7FFFFF: Next
  CalcHash = L * B(0) Mod mHashTableSize
End Function

Friend Sub CheckHashDistribution()
Dim i As Long, UB As Long, cc As Long, Min As Long, Max As Long
  Min = &H7FFFFFFF
  For i = 0 To UBound(HashTable)
    UB = UBound(HashTable(i).DataIndexes)
    If UB Then
      If Min > UB Then Min = UB
      If Max < UB Then Max = UB
      cc = cc + 1
    End If
  Next
  Debug.Print "Distribution over a HashTable with"; UBound(HashTable) + 1; "slots:"
  Debug.Print "Used-HashSlots:"; cc
  Debug.Print "Min-Entries:"; Min
  Debug.Print "Max-Entries:"; Max
End Sub

And here the Code of the Test-Form:
Code:

Option Explicit

Private Const TestEntryCount As Long = 100000

Private C As Collection, H As cHashList

Private Sub Form_Click()
Dim i As Long, T!, Item
  AutoRedraw = True
  Cls
  Print "Count of Test-Entries:"; TestEntryCount; vbLf
 
  Set C = New Collection
  Set H = New cHashList
      H.ReInit TestEntryCount
     
  T = Timer
    For i = 1 To TestEntryCount
      C.Add i, "K" & i
    Next
  Print "Collection-Add:", Timer - T & "sec"
 
  T = Timer
    For i = 1 To TestEntryCount
      H.Add i, "K" & i
    Next
  Print "cHashList-Add:", Timer - T & "sec"; vbLf
 
  T = Timer
    For i = 1 To TestEntryCount
      Item = C.Item("K" & i)
    Next
  Print "Collection-ItemByKey:", Timer - T & "sec"
 
  T = Timer
    For i = 1 To TestEntryCount
      Item = H.Item("K" & i)
    Next
  Print "cHashList-ItemByKey:", Timer - T & "sec"
 
  Print vbLf; "Indexed access is not compared (it would be much faster per HashList)"
  H.CheckHashDistribution
End Sub

Have fun with it...

Olaf

Viewing all articles
Browse latest Browse all 1529

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>