HASHTABLE

by nya on December 24th, 2009
No notes
Syntax: C
Show lines - Hide lines - Show in textbox - Download
//Implementation of methods
//TODO: - Table_Delete() call
//----------------------------------------------------------------------------//
//TODO:Refactoring.
 
//Table core structures. Not a subject to change
//Table Node
typedef struct TNode {
	TKey key;
	TVal val;
	struct TNode* next;	//Pointer to next node for chaining
} TNode;
 
//TODO: get appropiate names for member objects
typedef struct Table {
	LangUint size;	//Size of THash table: count of nodes with data
	LangUint hash_mask;	//Significiant bits of THash used to get data
	TNode** body;	//Array of pointers to node objects
} Table;
 
//Exported interface functions.
Export_Func Table* Table_Create();
Export_Func TVal Table_Get(Table* self, TKey key);
Export_Func void Table_Set(Table* self, TKey key, TVal val);
Export_Func void Table_DumpStat(Table* self);
 
//----------------------------------------------------------------------------//
//Constructors (Level-0 methods)
//TODO:Optimise use of memory with semi-custom block allocation
//Table Constructor
Table* Table_Create() {
	Table* inst = malloc(sizeof(Table));
	inst->size = 0;
	inst->hash_mask = 0;
	//Caution: needs be zero-filled
	inst->body = NULL;
	return inst;
}
 
//Node constructor
TNode* TNode_Create() {
	TNode* inst = malloc(sizeof(TNode));
	//Important: inst->next = NULL is actually a one element chain
	inst->next = NULL;
	return inst;
}
 
//----------------------------------------------------------------------------//
//Level-A (Core abstraction) methods, usually - trivial getters and setters
Level_A LangUint Table_GetLength(Table* self) {
	return self->size;
}
Level_A void Table_SetLength(Table* self, LangUint length) {
	self->size = length;
}
 
Level_A TNode* Table_GetNode(Table* self, LangUint index) {
	return self->body[index];
}
Level_A void Table_SetNode(Table* self, LangUint index, TNode* node) {
	self->body[index] = node;
}
 
Level_A LangUint Table_GetHmask(Table* self) {
	return self->hash_mask;
}
Level_A void Table_SetHmask(Table *self, LangUint hmask) {
	self->hash_mask = hmask;
}
 
//----------------------------------------------------------------------------//
//Level-B methods (Intermediate)
Macro_Func void Table_GrowHmask(Table* self) {
	LangUint old_mask = Table_GetHmask(self);
	LangUint new_mask;
 
	//Catch empty tables as special case
	if(old_mask == 0) {
		new_mask = 1;
	} else {
		//Size is at least 1, that means mask 0x1
		//This makes mask wider by 1 bit and replaces rightmost 0 with 1  
		new_mask = ((old_mask << 1) | 1);
	}
 
	Table_SetHmask(self, new_mask);
}
 
//Get a main position of key in table self (using hash_mask)
Macro_Func LangUint Table_GetMpos(Table* self, TKey key) {
	return TKey_GetHash(key) & Table_GetHmask(self);	
}
 
/*Insert left Node before right and return ptr to left node
**Before:
** /---\   /---\
** | L |   | R |--> ?
** \---/   \---/
**After:
** /---\   /---\
** | L |-->| R |--> ?
** \---/   \---/
*/
Macro_Func TNode* TNode_InsertToChain(TNode* left, TNode* right) {
	left->next = right;
	return left;
}
 
//Remove Node from chain. Returns chain. Firstelement is required in case
//it is removed, so second element is returned or NULL. In other cases
//firstelement is returned unchanged.
//Note: to remove node from chain we need to know previous , current and next
//nodes. So next is included in current, but we need also previous and first.
Macro_Func TNode* TNode_RemoveFromChain(TNode* firstelem, TNode* previous,
	TNode* to_remove) {
	if(previous == NULL && (firstelem == to_remove)) {
		return to_remove->next;
	} else {
		previous->next = to_remove->next;
		return firstelem;
	}
}
 
//Possibly move some nodes from current_position to given
//invariant_position. None nodes could be moved; chains may be empty.
//Node position is either current_pos or invariant_pos
//It is expected that this function is called after growing table:
//it makes possibility of splitting nonzero as hash_info becomes
//one-bit more accurate. Otherwise calling this F() is meaningless
Macro_Func void Table_SplitChains(Table* self, LangUint current_pos,
	LangUint invariant_pos) {
 
	TNode* current_chain = Table_GetNode(self, current_pos);
	TNode* invariant_chain = NULL;
 
	TNode* previous = NULL; //Previous node for case of removing
	TNode* current = current_chain; //Current node, for operating and checking
 
	TNode* temp_current; //Temp location for current node
 
	//Iterate over all nodes in (possibly empty) chain
	while(current != NULL) {
		//If node should be moved to invariant_position - do it
		if(Table_GetMpos(self, current->key) == invariant_pos) {
 
			//Remove current node from it's original chain
			current_chain = TNode_RemoveFromChain(
											current_chain,
											previous,
											current);
 
			//Save current to temp position as TNode_InsertToChain modifies it
			temp_current = current->next;
			//Insert removed node to it's new chain
			invariant_chain = TNode_InsertToChain(current, invariant_chain);
			current = temp_current;
		} else {
			//Traverse to next node: update previous and current
			previous = current;
			current = current->next;
		}
	}
 
	//Write modificated (or not) chains to table
	Table_SetNode(self, current_pos, current_chain);
	Table_SetNode(self, invariant_pos, invariant_chain);
}
 
//Resize Table's array block
Macro_Func void Table_GrowBody(Table* self, LangUint oldsize,
	LangUint newsize) {
 
	assert(newsize >= oldsize, "Attempt to shrink a table too much");
	assert(newsize <= MAX_TBODY_SIZE, "Table body size limit reached");
 
	//Actual newsize = newsize * bytes_per_TNode_ptr
	self->body = realloc(self->body, newsize * sizeof(TNode*));
 
	assert(self->body != NULL, "Fatal virtual memory error");
 
	//Rudimental clearing of new half of block
	//Quite same work is done as a side effect of Table_Rehash()
	LangUint i;
	for(i = oldsize; i < newsize; i += 1) {
		self->body[i] = NULL;
	}
}
 
//Get size of body block of Table self
Macro_Func LangUint Table_GetBodySize(Table* self) {
	//Note that size of body block is strictly connected with
	//width of table's hash_mask: actually size of body block
	// = hash_mask + 1 if hash_mask is non-zero.
	//In case of zero size is always zero (table is empty)
	LangUint hash_mask = Table_GetHmask(self);
	if(hash_mask == 0) {
		return 0;
	} else {
		return hash_mask + 1;
	}
}
 
//Grow table by next power of 2 ;grow hash_mask ;
//and replace keys using more accurate hash_info
//(because of new hash_mask)
Macro_Func void Table_Grow(Table* self) {
	LangUint old_body_size = Table_GetBodySize(self);
 
	Table_GrowHmask(self);
 
	LangUint new_body_size = Table_GetBodySize(self);
 
	Table_GrowBody(self, old_body_size, new_body_size);
 
	//Rehash every chain
	size_t i;
	const size_t rehash_distance = old_body_size;
	for(i = 0; i < old_body_size; i += 1) {
		Table_SplitChains(self, i, i + rehash_distance);
	}
}
 
//Search chain of Nodes, in table self at index with key key
//Return TNode* if found, and NULL if not
Macro_Func TNode* Table_SearchChain(Table* self, LangUint index, TKey key) {
	TNode* current; //Cuurent node to be checked
 
	current = Table_GetNode(self, index);
 
	//Traverse colliding nodes to the end
	while(current != NULL) {
		//Check equality of keys with colliding hashes
		if( TKey_Equal(current->key ,key) ) {
			return current;
		}
		current = current->next;
	}
	//If key not found finally return NULL
	return NULL;
}
 
//Check if Table becomes overcrowded
Macro_Func bool Table_NeedsGrowing(Table* self) {
 
	assert(self->size <= MAX_TABLE_SIZE, "Table size limit reached");
 
	/*If we have as many nodes as diffirent bit-patterns in hash_mask,
	**tables is known as "overcrowded" - next element makes population
	**of table > 100%, and we have to grow it.
	**Note that not all main ("body") node positions can be occupied.
	**Note also that for empty table first Table_Set call means rehash.
	*/
	if(Table_GetLength(self) >= Table_GetBodySize(self)) {
		return true;
	} else {
		return false;
	}
}
//----------------------------------------------------------------------------//
//Level-C methods(Interface)
 
//Set association for TKey->TVal in table self
Level_C void Table_Set(Table* self, TKey key, TVal val) {
	//Check if table can accept new key
	if(Table_NeedsGrowing(self)) {
		Table_Grow(self);
	}
 
	//Get main index of key
	LangUint key_mpos = Table_GetMpos(self, key);
 
	//Check if key should replace previous association for that key
	TNode* same_key_node = Table_SearchChain(self, key_mpos, key);
 
	if(same_key_node == NULL) {
		//Create new node
		TNode* node = TNode_Create();
		node->key = key;
		node->val = val;
 
		//Insert node to index of table self, probably to collision chain
		TNode* right = Table_GetNode(self, key_mpos);
		node = TNode_InsertToChain(node, right);
		Table_SetNode(self, key_mpos, node);
		//Finally, increment length of table
		self->size += 1;
	} else {
		//No need to create a new node
		//Just set a new value to already existing with same key
		same_key_node->val = val;
		//Note that there is no need to increment length
	}
}
 
//Get TVal from Table self with TKey
Level_C TVal Table_Get(Table* self, TKey key) {
	LangUint key_mpos = Table_GetMpos(self, key);
	//If exists get value from chain
	TNode* value_node = Table_SearchChain(self, key_mpos, key);
	//Check for error condition
	if(value_node == NULL) {
		return NIL;
	} else {
		return value_node->val;
	}
}
 
//Dump characterstics of table and graph representation of it's body
Level_C void Table_Dump(Table* self) {
	assert(Table_GetLength(self) < MAX_PRINT_TLENGTH, "Table too big to dump");
 
	printf("{Dump of table at %p}\n", self);
	printf("{Length = %i}\n", Table_GetLength(self));
	printf("{Hash_mask = %X}\n", Table_GetHmask(self));
 
	LangUint i;
	for(i=0; i<Table_GetBodySize(self); i += 1) {
		TNode* current = Table_GetNode(self, i);
		if(current == NULL) {
			printf("[NIL]");
		}
		while(current != NULL) {
			//Print abstract TKey and corresponfing TVal, using tenplate functions.
			printf("[");
			TKey_Print(current->key);
			printf(":");
			TVal_Print(current->val);
			printf("]");
			//Move to next node in chain.
			current = current->next;
		}
		printf("\n");
	}
}
 
//Dump some statistics of table's internals
Level_C void Table_DumpStat(Table* self) {
	//Iterate over all cells in table's body
	LangUint i;
	//Statistic vars
	LangUint num_empty_nodes = 0;
	LangUint num_busy_nodes = 0;
 
	float avg_nodes_per_chain = 0;
	LangUint temp_num_nodes = 0;
 
	TNode* current = NULL;
	for(i=0; i<Table_GetBodySize(self); i+=1) {
		//Count empty nodes
		if(Table_GetNode(self, i) == NULL) {
			//Increment counter of empty nodes
			num_empty_nodes += 1;
		} else {
			//Count number of elements in chain
			for(current = Table_GetNode(self, i);
				current != NULL;
				current = current->next) {
				temp_num_nodes += 1;
			}
 
		}
	}
 
	//Print data
	num_busy_nodes = Table_GetBodySize(self) - num_empty_nodes;
	printf("{%i busy nodes}\n", num_busy_nodes);
 
	printf("{%i empty nodes}\n", num_empty_nodes);
 
	avg_nodes_per_chain = (float)Table_GetLength(self)/(float)num_busy_nodes;
	printf("{%f nodes per chain}\n", avg_nodes_per_chain);
}
 
 
 
 

Leave a Reply

Note: XHTML is allowed. Your email address will never be published.

Subscribe to this comment feed via RSS