This is the mail archive of the guile@cygnus.com mailing list for the guile project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
/***************************************************************************** * Copyright (C) 1998, Jay Glascoe, NASA Goddard Institute for Space Studies * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. *****************************************************************************/ /***************************************************************************** * File "hashtab-type.c", by Jay Glascoe. 10/27/98 * Inspired by Jim Blandy's image-type example, * the Python dictionary object, and a whole host * of messages on the Guile mailing list. * Special thanks to Maciej Stachowiak of MIT * and Harvey J. Stein of BFM Financial Research. * * This is a Guile extension providing procedures to * create and operate on auto-resizing hash tables. * * A hash table is a pair, it looks like this * * (header . vector) * * The car of the pair, the header, is a vector. It's used to * keep track of various book keeping details. It looks * like this * * #(number-entries number-nonempty-buckets auto-shrink-flag) * * The cdr of the hash table pair is also a vector. It's * elements are referred to as "buckets" or "entry lists". * It looks like this * * #(bucket1 bucket2 ... bucketN) * * Each bucket is a list of entries * * (entry1 entry2 ... entryN) * * Each entry is a pair, or "association". The car of the * association is the "hash value" of the entry's key. The * cdr of the association is a key-value pair. Entries look * like this * * (hash (key . value)) * * The procedures defined in this extension are listed below. * * primitive: make-hashtab * Return a new auto-resizing hash table (auto-shrink is * disabled by default). * * primitive: hashtab-enable hashtab symbol * primitive: hashtab-disable hashtab symbol * "symbol" should be one of 'auto-shrink, ... [no others yet] * Turn the specified option on/off. * * primitive: hashtab-set! hashtab key value * Insert "value" in hash table "hashtab" under key "key". * If "key" is already present in "hashtab", overwrite the * previous associated value with "value". * * primitive: hashtab-ref hashtab key [default] * Look up "key" in the hash table "hashtab", and return the value * (if any) associated with it. If key is not found, return * default (or #f if no default argument is supplied). * * primitive: hashtab-del! hashtab key [default] * Delete the entry in hash table "hashtab" having key "key", * and return the value associated with "key" (if any). * If key is not found, return default (or #f if no default * argument is supplied). * *****************************************************************************/ #include <math.h> #include <libguile.h> #include <guile/gh.h> /* #include "my-hasher.h" */ /* this is okay for hashes with up to 1,000,000 entries */ #define my_hasher(hash_ptr, obj, message, proc_name) \ *(hash_ptr) = scm_hasher((obj), 2097157L, 10) #define DEFAULT_NUMBER_BUCKETS 4 #define MAX_MEAN_NONEMPTY_BUCKETS_SIZE 3 /* 3.14159265 ;) */ static SCM my_scm_make_vector(register long i, SCM fill) { /* modified to allow for a long i (rather than a SCM i) */ SCM v; register long j; SCM *velts; SCM_NEWCELL(v); SCM_DEFER_INTS; SCM_SETCHARS(v, scm_must_malloc(i?(long)(i*sizeof(SCM)):1L, "my_scm_make_vector")); SCM_SETLENGTH(v, i, scm_tc7_vector); velts = SCM_VELTS(v); j = 0; while (--i >= j) (velts)[i] = fill; SCM_ALLOW_INTS; return v; } #define make_dflt_bucket() SCM_EOL #define make_entry(hash, key, value) \ scm_cons(scm_long2num(hash), scm_cons(key, value)) static SCM auto_shrink_symbol; static SCM default_bucket; static void init_mysymbols(void) /* called by init_hashtab_type */ { default_bucket = SCM_CDR(scm_sysintern("*default-hashtab-bucket*", make_dflt_bucket())); auto_shrink_symbol = SCM_CAR(scm_sysintern0("auto-shrink")); return; } #define NUMBER_ENTRIES_INDEX 0 #define NUMBER_NONEMPTY_BUCKETS_INDEX 1 #define AUTO_SHRINK_FLAG_INDEX 2 #define NUMBER_HEADER_ELEMENTS 3 static SCM make_header(SCM number_entries, SCM number_nonempty_buckets, SCM auto_shrink_flag) { /* #(number-entries number-nonempty-buckets auto-shrink-flag) */ SCM vector = my_scm_make_vector(NUMBER_HEADER_ELEMENTS, SCM_UNDEFINED); SCM *velts = SCM_VELTS(vector); velts[NUMBER_ENTRIES_INDEX] = number_entries; velts[NUMBER_NONEMPTY_BUCKETS_INDEX] = number_nonempty_buckets; velts[AUTO_SHRINK_FLAG_INDEX] = auto_shrink_flag; return vector; } static SCM my_make_hashtab(SCM, SCM); SCM_PROC(s_make_hashtab, "make-hashtab", 0, 0, 0, make_hashtab); static SCM make_hashtab(void) { /* an argument-less constructor. fool proof! ;) */ long inumber_buckets = DEFAULT_NUMBER_BUCKETS; SCM auto_shrink_flag = SCM_BOOL_F; return my_make_hashtab(inumber_buckets, auto_shrink_flag); } static SCM my_make_hashtab(long inumber_buckets, SCM auto_shrink_flag) { SCM header = make_header(SCM_INUM0, SCM_INUM0, auto_shrink_flag); SCM vector = my_scm_make_vector(inumber_buckets, SCM_EOL); SCM hashtab = scm_cons(header, vector); return hashtab; } SCM_PROC(s_hashtab_enable, "hashtab-enable", 2, 0, 0, hashtab_enable); SCM_PROC(s_hashtab_disable, "hashtab-disable", 2, 0, 0, hashtab_disable); static SCM hashtab_enable(SCM hashtab, SCM symbol) { /* turn auto-shrink on */ SCM header = SCM_CAR(hashtab); SCM *header_elts = SCM_VELTS(header); SCM_ASSERT(symbol == auto_shrink_symbol, symbol, SCM_ARG2, "hashtab-enable"); header_elts[AUTO_SHRINK_FLAG_INDEX] = SCM_BOOL_T; return SCM_BOOL_T; } static SCM hashtab_disable(SCM hashtab, SCM symbol) { /* turn auto-shrink off */ SCM header = SCM_CAR(hashtab); SCM *header_elts = SCM_VELTS(header); SCM_ASSERT(symbol == auto_shrink_symbol, symbol, SCM_ARG2, "hashtab-disable"); header_elts[AUTO_SHRINK_FLAG_INDEX] = SCM_BOOL_T; return SCM_BOOL_F; } SCM_PROC(s_hashtab_ref, "hashtab-ref", 2, 1, 0, hashtab_ref); static SCM hashtab_ref(SCM hashtab, SCM key, SCM not_here) { /* Given a hash table, a key, and (optionally) a "not-here" * object, return the value associated with the key. * Return "not-here" (#f by default) if there is no such * key in the hashtable. */ SCM vector = SCM_CDR(hashtab); SCM *velts = SCM_VELTS(vector); long vec_len = SCM_LENGTH(vector); long hash = 0; long i; SCM bucket, tail; my_hasher(&hash, key, SCM_ARG2, "hashtab-ref"); i = hash & (vec_len - 1); bucket = velts[i]; if (SCM_UNBNDP(not_here)) not_here = SCM_BOOL_F; for (tail = bucket; tail != SCM_EOL; tail = SCM_CDR(tail)) { SCM entry = SCM_CAR(tail); SCM my_scm_hash = SCM_CAR(entry); long myhash = scm_num2long(my_scm_hash, "woops!", "hashtab-ref"); if (myhash == hash) { SCM pair = SCM_CDR(entry); SCM mykey = SCM_CAR(pair); if (scm_equal_p(mykey, key) == SCM_BOOL_T) return SCM_CDR(pair); } } return not_here; } static SCM my_hashtab_set(SCM hashtab,long hash, SCM key, SCM value,int *resize_flag_ptr) { /* called by hashtab_setx * Insert the key-value pair into the hashtable. * Signal the need for resize if the table is too small. */ SCM myvec = SCM_CDR(hashtab); SCM header = SCM_CAR(hashtab); SCM *header_elts = SCM_VELTS(header); long vec_len = SCM_LENGTH(myvec); long i = hash & (vec_len - 1); SCM number_entries = header_elts[NUMBER_ENTRIES_INDEX]; long inumber_entries = SCM_INUM(number_entries); SCM *velts = SCM_VELTS(myvec); SCM bucket = velts[i]; int bucket_size = 1; SCM old_entry_list = bucket; SCM entry_list; if (bucket == SCM_EOL) bucket_size = 0; for (entry_list = old_entry_list; entry_list != SCM_EOL; entry_list = SCM_CDR(entry_list)) { SCM entry = SCM_CAR(entry_list); SCM my_scm_hash = SCM_CAR(entry); SCM pair; SCM mykey; long myhash; myhash = scm_num2long(my_scm_hash, "bif!", "hashtab-set!"); if (myhash != hash) continue; pair = SCM_CDR(entry); mykey = SCM_CAR(pair); if (scm_equal_p(mykey, key) == SCM_BOOL_T) { SCM_SETCDR(pair, value); return value; } } /* cons new entry onto bucket, and do some book-keeping */ { SCM entry = make_entry(hash, key, value); SCM number_entries = header_elts[NUMBER_ENTRIES_INDEX]; long inumber_entries = SCM_INUM(number_entries); SCM number_nonempty_buckets = header_elts[NUMBER_NONEMPTY_BUCKETS_INDEX]; long inumber_nonempty_buckets = SCM_INUM(number_nonempty_buckets); if (bucket_size == 0) { ++inumber_nonempty_buckets; header_elts[NUMBER_NONEMPTY_BUCKETS_INDEX] = SCM_MAKINUM(inumber_nonempty_buckets); velts[i] = scm_cons(entry, SCM_EOL); } else SCM_SETCDR(bucket, scm_cons(entry, SCM_CDR(bucket))); ++inumber_entries; header_elts[NUMBER_ENTRIES_INDEX] = SCM_MAKINUM(inumber_entries); if (inumber_entries > MAX_MEAN_NONEMPTY_BUCKETS_SIZE * inumber_nonempty_buckets) *resize_flag_ptr = 1; return value; } } SCM_PROC(s_hashtab_setx, "hashtab-set!", 3, 0, 0, hashtab_setx); static SCM hashtab_setx(SCM hashtab, SCM key, SCM value) { /* Calls my_hashtab_set after hashing the key argument. * If my_hashtab_set signals that resize is necessary, * then call resize_hashtab. */ long hash = 0; int resize_flag = 0; SCM retval; my_hasher(&hash, key, SCM_ARG2, "hashtab-set!"); retval = my_hashtab_set(hashtab, hash, key, value, &resize_flag); if (resize_flag == 1) { SCM new_hashtab = resize_hashtab(hashtab, 1); SCM_SETCAR(hashtab, SCM_CAR(new_hashtab)); SCM_SETCDR(hashtab, SCM_CDR(new_hashtab)); } return retval; } static SCM resize_hashtab(SCM hashtab, int flag) { /* assuming (correctly) that each key from the old * hash table is unique is a big win here, so we don't * call hashtab_setx. instead, we duplicate its code * and make the optimizing modifications */ SCM header = SCM_CAR(hashtab); SCM *header_elts = SCM_VELTS(header); SCM old_vec = SCM_CDR(hashtab); SCM *old_velts = SCM_VELTS(old_vec); register long old_len = SCM_LENGTH(old_vec); SCM auto_shrink_flag = header_elts[AUTO_SHRINK_FLAG_INDEX]; register long inumber_nonempty_buckets = 0; register long inumber_entries = 0; long new_len; SCM new_hashtab; SCM *new_velts; SCM *new_header_elts; SCM my_inum1 = SCM_MAKINUM(1); long new_len_minus_one; register long i; if (flag == -1) /* shrink the hash table */ new_len = old_len / 2; else /* grow the hash table */ new_len = old_len * 2; new_len_minus_one = new_len - 1; new_hashtab = my_make_hashtab(new_len, auto_shrink_flag); new_velts = SCM_VELTS(SCM_CDR(new_hashtab)); new_header_elts = SCM_VELTS(SCM_CAR(new_hashtab)); for (i = 0; i < old_len; ++i) { SCM entry_list; SCM bucket = old_velts[i]; for (entry_list = bucket; entry_list != SCM_EOL; entry_list = SCM_CDR(entry_list), SCM_SETCDR(bucket, entry_list)) /* delete the old bucket * en passant. */ { SCM entry = SCM_CAR(entry_list); long myhash; SCM my_scm_hash = SCM_CAR(entry); long index = 0; SCM new_bucket; myhash = scm_num2long(my_scm_hash, "jeez!", "resize_hashtab"); index = myhash & new_len_minus_one; new_bucket = new_velts[index]; if (new_bucket == SCM_EOL) { new_velts[index] = scm_cons(entry, SCM_EOL); ++inumber_nonempty_buckets; } else SCM_SETCDR(new_bucket, scm_cons(entry, SCM_CDR(new_bucket))); ++inumber_entries; } /* let go of the old bucket (what's left of it anyway) * so Guile may gc it. Apologies to anyone holding on * to the old bucket, but any code depending on the previous * state of the hash should fail gracefully (any such code * shouldn't have been written in the first place). */ old_velts[i] = SCM_EOL; } /* book keeping */ { SCM new_number_nonempty_buckets =SCM_MAKINUM(inumber_nonempty_buckets); SCM new_number_entries = SCM_MAKINUM(inumber_entries); SCM new_header = SCM_CAR(new_hashtab); SCM *new_header_elts = SCM_VELTS(new_header); new_header_elts[NUMBER_NONEMPTY_BUCKETS_INDEX] = new_number_nonempty_buckets; new_header_elts[NUMBER_ENTRIES_INDEX] = new_number_entries; if (flag == -1 && inumber_entries > MAX_MEAN_NONEMPTY_BUCKETS_SIZE * inumber_nonempty_buckets) { fprintf(stderr, "PANIC: downsized hash table prematurely. " "disabling auto-shrink.\n"); fflush(stderr); new_header_elts[AUTO_SHRINK_FLAG_INDEX] = SCM_BOOL_F; } return new_hashtab; } } static void my_scm_delete_x(register long hash, SCM *bucket_ptr, SCM key, SCM *value_ptr, register short *found_flag_ptr) { /* heavily modified version of scm_delete_x. * now PH balanced for women. */ SCM *prev; SCM walk; for (prev = bucket_ptr, walk = *bucket_ptr; walk != SCM_EOL; walk = SCM_CDR (walk)) { SCM entry = SCM_CAR(walk); if (hash == scm_num2long(SCM_CAR(entry), "tweet!", "my_scm_delete_x")) { SCM pair = SCM_CDR(entry); if (SCM_BOOL_T == scm_equal_p(SCM_CAR(pair), key)) { *value_ptr = SCM_CDR(pair); *prev = SCM_CDR(walk); *found_flag_ptr = 1; break; } else prev = SCM_CDRLOC (walk); } else prev = SCM_CDRLOC (walk); } return; } SCM_PROC(s_hashtab_delx, "hashtab-del!", 2, 1, 0, hashtab_delx); static SCM hashtab_delx(SCM hashtab, SCM key, SCM not_here) { /* calls my_scm_delete_x for the actual deletion * then performs book keeping and auto-shrinks if necessary * * NOTE: I've put as many optimizations as I can think of in * here, but it's still slow (relative to Guile's fixed size * hash tables). Why? Because (hash (key . value)) is * significantly larger than just (key . value) and guile * has an easier time gc'ing the smaller of the two. */ SCM vector = SCM_CDR(hashtab); SCM *velts = SCM_VELTS(vector); long vec_len = SCM_LENGTH(vector); long hash = 0; long i; SCM value; short found_flag = 0; SCM *bucket_ptr; if (SCM_UNBNDP(not_here)) not_here = SCM_BOOL_F; my_hasher(&hash, key, SCM_ARG2, "hashtab-del!"); i = hash & (vec_len - 1); bucket_ptr = velts + i; my_scm_delete_x(hash, bucket_ptr, key, &value, &found_flag); if (! found_flag) return not_here; { SCM header = SCM_CAR(hashtab); SCM *header_elts = SCM_VELTS(header); SCM number_nonempty_buckets = header_elts[NUMBER_NONEMPTY_BUCKETS_INDEX]; long inumber_nonempty_buckets = SCM_INUM(number_nonempty_buckets); SCM number_entries = header_elts[NUMBER_ENTRIES_INDEX]; long inumber_entries = SCM_INUM(number_entries); SCM auto_shrink_flag = header_elts[AUTO_SHRINK_FLAG_INDEX]; if (*bucket_ptr == SCM_EOL) { --inumber_nonempty_buckets; number_nonempty_buckets = SCM_MAKINUM(inumber_nonempty_buckets); header_elts[NUMBER_NONEMPTY_BUCKETS_INDEX] = number_nonempty_buckets; } --inumber_entries; number_entries = SCM_MAKINUM(inumber_entries); header_elts[NUMBER_ENTRIES_INDEX] = number_entries; if (auto_shrink_flag == SCM_BOOL_F) return value; if (vec_len > DEFAULT_NUMBER_BUCKETS && 2 * inumber_entries < inumber_nonempty_buckets * MAX_MEAN_NONEMPTY_BUCKETS_SIZE) { SCM new_hashtab = resize_hashtab(hashtab, -1); SCM_SETCAR(hashtab, SCM_CAR(new_hashtab)); SCM_SETCDR(hashtab, SCM_CDR(new_hashtab)); } return value; } } void init_hashtab_type(void) { init_mysymbols(); scm_make_gsubr (s_make_hashtab, 0, 0, 0, make_hashtab); scm_make_gsubr (s_hashtab_ref, 2, 1, 0, hashtab_ref); scm_make_gsubr (s_hashtab_setx, 3, 0, 0, hashtab_setx); scm_make_gsubr (s_hashtab_delx, 2, 1, 0, hashtab_delx); scm_make_gsubr (s_hashtab_enable, 2, 0, 0, hashtab_enable); scm_make_gsubr (s_hashtab_disable, 2, 0, 0, hashtab_disable); }