HASHTABLE
No notes
Syntax:
C
//Implementation of methods //TODO: - Table_Delete() call //----------------------------------------------------------------------------// //TODO:Refactoring. //Table core structures. Not a subject to change //Table Node typedef struct TNode { TKey key; TVal val; struct TNode* next; //Pointer to next node for chaining } TNode; //TODO: get appropiate names for member objects typedef struct Table { LangUint size; //Size of THash table: count of nodes with data LangUint hash_mask; //Significiant bits of THash used to get data TNode** body; //Array of pointers to node objects } Table; //Exported interface functions. Export_Func Table* Table_Create(); Export_Func TVal Table_Get(Table* self, TKey key); Export_Func void Table_Set(Table* self, TKey key, TVal val); Export_Func void Table_DumpStat(Table* self); //----------------------------------------------------------------------------// //Constructors (Level-0 methods) //TODO:Optimise use of memory with semi-custom block allocation //Table Constructor Table* Table_Create() { Table* inst = malloc(sizeof(Table)); inst->size = 0; inst->hash_mask = 0; //Caution: needs be zero-filled inst->body = NULL; return inst; } //Node constructor TNode* TNode_Create() { TNode* inst = malloc(sizeof(TNode)); //Important: inst->next = NULL is actually a one element chain inst->next = NULL; return inst; } //----------------------------------------------------------------------------// //Level-A (Core abstraction) methods, usually - trivial getters and setters Level_A LangUint Table_GetLength(Table* self) { return self->size; } Level_A void Table_SetLength(Table* self, LangUint length) { self->size = length; } Level_A TNode* Table_GetNode(Table* self, LangUint index) { return self->body[index]; } Level_A void Table_SetNode(Table* self, LangUint index, TNode* node) { self->body[index] = node; } Level_A LangUint Table_GetHmask(Table* self) { return self->hash_mask; } Level_A void Table_SetHmask(Table *self, LangUint hmask) { self->hash_mask = hmask; } //----------------------------------------------------------------------------// //Level-B methods (Intermediate) Macro_Func void Table_GrowHmask(Table* self) { LangUint old_mask = Table_GetHmask(self); LangUint new_mask; //Catch empty tables as special case if(old_mask == 0) { new_mask = 1; } else { //Size is at least 1, that means mask 0x1 //This makes mask wider by 1 bit and replaces rightmost 0 with 1 new_mask = ((old_mask << 1) | 1); } Table_SetHmask(self, new_mask); } //Get a main position of key in table self (using hash_mask) Macro_Func LangUint Table_GetMpos(Table* self, TKey key) { return TKey_GetHash(key) & Table_GetHmask(self); } /*Insert left Node before right and return ptr to left node **Before: ** /---\ /---\ ** | L | | R |--> ? ** \---/ \---/ **After: ** /---\ /---\ ** | L |-->| R |--> ? ** \---/ \---/ */ Macro_Func TNode* TNode_InsertToChain(TNode* left, TNode* right) { left->next = right; return left; } //Remove Node from chain. Returns chain. Firstelement is required in case //it is removed, so second element is returned or NULL. In other cases //firstelement is returned unchanged. //Note: to remove node from chain we need to know previous , current and next //nodes. So next is included in current, but we need also previous and first. Macro_Func TNode* TNode_RemoveFromChain(TNode* firstelem, TNode* previous, TNode* to_remove) { if(previous == NULL && (firstelem == to_remove)) { return to_remove->next; } else { previous->next = to_remove->next; return firstelem; } } //Possibly move some nodes from current_position to given //invariant_position. None nodes could be moved; chains may be empty. //Node position is either current_pos or invariant_pos //It is expected that this function is called after growing table: //it makes possibility of splitting nonzero as hash_info becomes //one-bit more accurate. Otherwise calling this F() is meaningless Macro_Func void Table_SplitChains(Table* self, LangUint current_pos, LangUint invariant_pos) { TNode* current_chain = Table_GetNode(self, current_pos); TNode* invariant_chain = NULL; TNode* previous = NULL; //Previous node for case of removing TNode* current = current_chain; //Current node, for operating and checking TNode* temp_current; //Temp location for current node //Iterate over all nodes in (possibly empty) chain while(current != NULL) { //If node should be moved to invariant_position - do it if(Table_GetMpos(self, current->key) == invariant_pos) { //Remove current node from it's original chain current_chain = TNode_RemoveFromChain( current_chain, previous, current); //Save current to temp position as TNode_InsertToChain modifies it temp_current = current->next; //Insert removed node to it's new chain invariant_chain = TNode_InsertToChain(current, invariant_chain); current = temp_current; } else { //Traverse to next node: update previous and current previous = current; current = current->next; } } //Write modificated (or not) chains to table Table_SetNode(self, current_pos, current_chain); Table_SetNode(self, invariant_pos, invariant_chain); } //Resize Table's array block Macro_Func void Table_GrowBody(Table* self, LangUint oldsize, LangUint newsize) { assert(newsize >= oldsize, "Attempt to shrink a table too much"); assert(newsize <= MAX_TBODY_SIZE, "Table body size limit reached"); //Actual newsize = newsize * bytes_per_TNode_ptr self->body = realloc(self->body, newsize * sizeof(TNode*)); assert(self->body != NULL, "Fatal virtual memory error"); //Rudimental clearing of new half of block //Quite same work is done as a side effect of Table_Rehash() LangUint i; for(i = oldsize; i < newsize; i += 1) { self->body[i] = NULL; } } //Get size of body block of Table self Macro_Func LangUint Table_GetBodySize(Table* self) { //Note that size of body block is strictly connected with //width of table's hash_mask: actually size of body block // = hash_mask + 1 if hash_mask is non-zero. //In case of zero size is always zero (table is empty) LangUint hash_mask = Table_GetHmask(self); if(hash_mask == 0) { return 0; } else { return hash_mask + 1; } } //Grow table by next power of 2 ;grow hash_mask ; //and replace keys using more accurate hash_info //(because of new hash_mask) Macro_Func void Table_Grow(Table* self) { LangUint old_body_size = Table_GetBodySize(self); Table_GrowHmask(self); LangUint new_body_size = Table_GetBodySize(self); Table_GrowBody(self, old_body_size, new_body_size); //Rehash every chain size_t i; const size_t rehash_distance = old_body_size; for(i = 0; i < old_body_size; i += 1) { Table_SplitChains(self, i, i + rehash_distance); } } //Search chain of Nodes, in table self at index with key key //Return TNode* if found, and NULL if not Macro_Func TNode* Table_SearchChain(Table* self, LangUint index, TKey key) { TNode* current; //Cuurent node to be checked current = Table_GetNode(self, index); //Traverse colliding nodes to the end while(current != NULL) { //Check equality of keys with colliding hashes if( TKey_Equal(current->key ,key) ) { return current; } current = current->next; } //If key not found finally return NULL return NULL; } //Check if Table becomes overcrowded Macro_Func bool Table_NeedsGrowing(Table* self) { assert(self->size <= MAX_TABLE_SIZE, "Table size limit reached"); /*If we have as many nodes as diffirent bit-patterns in hash_mask, **tables is known as "overcrowded" - next element makes population **of table > 100%, and we have to grow it. **Note that not all main ("body") node positions can be occupied. **Note also that for empty table first Table_Set call means rehash. */ if(Table_GetLength(self) >= Table_GetBodySize(self)) { return true; } else { return false; } } //----------------------------------------------------------------------------// //Level-C methods(Interface) //Set association for TKey->TVal in table self Level_C void Table_Set(Table* self, TKey key, TVal val) { //Check if table can accept new key if(Table_NeedsGrowing(self)) { Table_Grow(self); } //Get main index of key LangUint key_mpos = Table_GetMpos(self, key); //Check if key should replace previous association for that key TNode* same_key_node = Table_SearchChain(self, key_mpos, key); if(same_key_node == NULL) { //Create new node TNode* node = TNode_Create(); node->key = key; node->val = val; //Insert node to index of table self, probably to collision chain TNode* right = Table_GetNode(self, key_mpos); node = TNode_InsertToChain(node, right); Table_SetNode(self, key_mpos, node); //Finally, increment length of table self->size += 1; } else { //No need to create a new node //Just set a new value to already existing with same key same_key_node->val = val; //Note that there is no need to increment length } } //Get TVal from Table self with TKey Level_C TVal Table_Get(Table* self, TKey key) { LangUint key_mpos = Table_GetMpos(self, key); //If exists get value from chain TNode* value_node = Table_SearchChain(self, key_mpos, key); //Check for error condition if(value_node == NULL) { return NIL; } else { return value_node->val; } } //Dump characterstics of table and graph representation of it's body Level_C void Table_Dump(Table* self) { assert(Table_GetLength(self) < MAX_PRINT_TLENGTH, "Table too big to dump"); printf("{Dump of table at %p}\n", self); printf("{Length = %i}\n", Table_GetLength(self)); printf("{Hash_mask = %X}\n", Table_GetHmask(self)); LangUint i; for(i=0; i<Table_GetBodySize(self); i += 1) { TNode* current = Table_GetNode(self, i); if(current == NULL) { printf("[NIL]"); } while(current != NULL) { //Print abstract TKey and corresponfing TVal, using tenplate functions. printf("["); TKey_Print(current->key); printf(":"); TVal_Print(current->val); printf("]"); //Move to next node in chain. current = current->next; } printf("\n"); } } //Dump some statistics of table's internals Level_C void Table_DumpStat(Table* self) { //Iterate over all cells in table's body LangUint i; //Statistic vars LangUint num_empty_nodes = 0; LangUint num_busy_nodes = 0; float avg_nodes_per_chain = 0; LangUint temp_num_nodes = 0; TNode* current = NULL; for(i=0; i<Table_GetBodySize(self); i+=1) { //Count empty nodes if(Table_GetNode(self, i) == NULL) { //Increment counter of empty nodes num_empty_nodes += 1; } else { //Count number of elements in chain for(current = Table_GetNode(self, i); current != NULL; current = current->next) { temp_num_nodes += 1; } } } //Print data num_busy_nodes = Table_GetBodySize(self) - num_empty_nodes; printf("{%i busy nodes}\n", num_busy_nodes); printf("{%i empty nodes}\n", num_empty_nodes); avg_nodes_per_chain = (float)Table_GetLength(self)/(float)num_busy_nodes; printf("{%f nodes per chain}\n", avg_nodes_per_chain); }