This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: calloc() implementation question


OndÅej

On Thu, 2013-12-12 at 19:41 +0100, OndÅej BÃlka wrote:
> To see how big is speedup/loss write a benchmark that compares variant
> with memset and one with inline expansion. 
> 
> void *
> calloc2 (size_t n)
> {
>   return memset (malloc (n), 0, n);
> }
> 
> void *
> calloc3 (size_t n)
> {
>   void *x = malloc (n);
>   if (n < 9 * 16)
>     ...
> }

that was an amazing suggestion as it did allow me to have some new
insight in glibc malloc/calloc performance.

first, the manual unrolled memset did turned out to be faster than the
unconditionnal memset() call.

The other unexpected result was that malloc + memset() (calloc2()) is
always faster than the real calloc() for allocation size <= ~ 64KB.

that was unexpected as I tought that because calloc may skip the memset
step, it would be faster and at worse be on par with malloc + memset().

I did my tests on Linux kernel 3.12.4 on a
Intel(R) Atom(TM) CPU N455   @ 1.66GHz

#include <stdlib.h>
#include <string.h>

void *calloc1(size_t nmemb, size_t size)
{
	return calloc(nmemb,size);
}

void *calloc2(size_t nmemb, size_t size)
{
	return memset(malloc(nmemb*size),0,nmemb*size);
}

void *calloc3(size_t nmemb, size_t size)
{
	size_t *d = (size_t *)malloc(nmemb*size);
	size_t clearsize = nmemb*size;
	size_t nclears = clearsize/sizeof(size_t);
	if (nclears > 9)
		memset(d,0,clearsize);
	else {
		*(d+0) = 0;
		*(d+1) = 0;
		*(d+2) = 0;
		if (nclears > 4) {
			*(d+3) = 0;
			*(d+4) = 0;
			if (nclears > 6) {
				*(d+5) = 0;
				*(d+6) = 0;
				if (nclears > 8) {
					*(d+7) = 0;
					*(d+8) = 0;
				}
			}
		}
	}
	return d;
}

#include <stdlib.h>
#include <time.h>
#include <stdio.h>

#define CALLOC_FUNC calloc1
#define PAT_FUNC    pat1_getsize
#define NUMITER     2000000
#define PAT1_SZ     11

void *calloc2(size_t nmemb, size_t size);
void *calloc3(size_t nmemb, size_t size);

static size_t pat1_getsize(size_t iter) { return PAT1_SZ; }
static size_t pat2_getsize(size_t iter) { return  9; }
static size_t pat3_getsize(size_t iter)
{
	static size_t arr[] = { 3, 5, 7, 9, 11, 9, 7, 5 };
	return arr[(iter%(sizeof(arr)/sizeof(size_t)))];
}

/*
 * a - b
 */
static long diff_timespec_ns( struct timespec *a, struct timespec *b )
{
        if ( a->tv_nsec < b->tv_nsec ) {
                a->tv_nsec += 1000000000;
                a->tv_sec--;
        }
        return a->tv_nsec - b->tv_nsec + (a->tv_sec - b->tv_sec)*1000000000;
}

int main(int argc, char *argv[])
{
	struct timespec start,end;
	size_t i;
	clock_gettime(CLOCK_REALTIME,&start);

	for( i = 0; i < NUMITER; ++i )
		CALLOC_FUNC(PAT_FUNC(i),sizeof(size_t));

	clock_gettime(CLOCK_REALTIME,&end);
	printf("executed in %ld ns\n",diff_timespec_ns(&end,&start));
	return 0;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]