[gclist] why malloc/free instead of GC?
Basile STARYNKEVITCH
basile@starynkevitch.net
Tue, 18 Feb 2003 23:13:49 +0100
For completeness, I changed my tiny test a bit to allocate smaller
objects, to take into account Hans Boehm's remark on typical object
size
################################
// file essm.c
// compile for malloc: gcc -O essm.c -o essm
// compile for Boehm's GC: gcc -O -DUSEGC essm.c -o essm_gc -lgc
// compile for Qish GC:
/// gcc -O -I../Qish/include -DUSEQISH essm.c -o essm_qish -L../Qish/lib -lqish -ldl
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/times.h>
#include <assert.h>
#ifdef USEGC
#include <gc.h>
#define malloc(S) GC_malloc(S)
#ifdef USEGCFREE
#define free(P) GC_free(P)
#else
#define free(P) {}
#endif
#endif
void *tabptr[16];
#ifdef USEQISH
#include "qish.h"
#define tabptr qish_roots
#define HEADEROF(Ad) (*(unsigned*)(Ad))
struct obj_st
{
unsigned header;
void *tab[0];
};
void *
essm_qish_gc_copy (void **padr, void *dst, const void *src)
{
int i = 0;
static int cnt;
struct obj_st *odst = dst;
const struct obj_st *osrc = src;;
unsigned header = osrc->header;
cnt++;
odst->header = osrc->header;
for (i = 0; i < header; i++)
odst->tab[i] = osrc->tab[i];
*padr = odst;
return (void *) (odst->tab + header);
}
void *
essm_qish_minor_scan (void *ptr)
{
int i = 0;
static int cnt;
struct obj_st *ob = ptr;
unsigned header = ob->header;
cnt++;
for (i = 0; i < header; i++)
QISHGC_MINOR_UPDATE (ob->tab[i]);
return (void *) (ob->tab + header);
}
void *
essm_qish_full_scan (void *ptr)
{
int i = 0;
static int cnt;
struct obj_st *ob = ptr;
unsigned header = ob->header;
cnt++;
for (i = 1; i <= header; i++)
QISHGC_FULL_UPDATE (ob->tab[i]);
return (void *) (ob->tab + header);
}
void
essm_qish_fixed_scan (void *ptr, int sz)
{
qish_panic ("fixed_scan should not be called ptr=%p sz=%d", ptr, sz);
}
#endif
int
main (int argc, char **argv)
{
long long maxcnt = 1000000;
#define MAXALLOC 20
long long taballoc[MAXALLOC + 1];
long long cumalloc = 0;
long long i = 0;
int r = 0, s = 0, n = 0;
double usert, syst, tick;
struct tms t;
struct st *p = 0;
if (argc > 1)
maxcnt = atol (argv[1]) * 1000;
#ifdef USEQISH
qishgc_init ();
qish_gc_copy_p = essm_qish_gc_copy;
qish_minor_scan_p = essm_qish_minor_scan;
qish_fixed_scan_p = essm_qish_fixed_scan;
qish_full_scan_p = essm_qish_full_scan;
#endif //USEQISH
memset (&t, 0, sizeof (t));
memset (taballoc, 0, sizeof (taballoc));
if (maxcnt < 100000)
maxcnt = 100000;
printf ("begin maxcnt=%lld=%e\n", maxcnt, (double) maxcnt);
for (i = 0; i < maxcnt; i++)
{
if ((i & 0x1fffff) == 0)
printf ("i=%lld [=%.3g %%]\n", i, 100.0*(double)i/maxcnt);
r = lrand48 () & 0xf;
#ifndef USEQISH
if (tabptr[r])
free (tabptr[r]);
#endif
n = lrand48 () % 131072 + 4;
// approximate s = integer square root(n)
s = n / 256 + 2;
s = (s + n / s) / 2;
s = (s + n / s) / 2;
s = (s + n / s) / 2;
s = (s + n / s) / 2;
s = (s + n / s) / 2;
s = (s + n / s) / 2;
s = (s + n / s) / 2;
s = (s + n / s) / 2;
s = (s + 8) & (~3);
cumalloc += s;
if (s / 16 < MAXALLOC)
taballoc[s / 16]++;
#ifndef USEQISH
tabptr[r] = malloc (s);
if (s > 4 * sizeof (void *) && lrand48 () % 8 < 3)
((void **) (tabptr[r]))[1] = tabptr[lrand48 () % 8];
#else
n = s / 4 + 1;
{
volatile struct {
struct obj_st *ptr;
} _locals_ = {0};
#define l_ptr _locals_.ptr
BEGIN_LOCAL_FRAME_WITHOUT_ARGS ();
assert(n>0 && n<1000);
QISH_ALLOCATE (l_ptr, sizeof (struct obj_st) + n * sizeof (void *));
l_ptr->header = n;
if (n > 3 && lrand48 () % 8 < 3) {
l_ptr->tab[0] = (void *) (tabptr[lrand48 () % 8]);
QISH_WRITE_NOTIFY (l_ptr);
}
tabptr[r] = l_ptr;
dbgprintf("r=%d n=%d l_ptr=%p", r, n, l_ptr);
EXIT_FRAME ();
}
#endif
if (!tabptr[r])
fprintf (stderr, "malloc(%d) failed i=%lld\n", s, i);
};
times (&t);
tick = (double) sysconf (_SC_CLK_TCK);
usert = ((double) t.tms_utime) / tick;
syst = ((double) t.tms_stime) / tick;
printf
("end maxcnt=%lld=%e cumulated alloc=%lld=%.3g bytes, mean %.3g bytes\n",
maxcnt, (double) maxcnt, cumalloc, (double) cumalloc,
(double) cumalloc / (double) maxcnt);
#if 0
// don't work as I want...
for (i = 0; i < MAXALLOC; i++)
{
long long ta = taballoc[i];
printf ("alloc<%d: %lld=%.3g i.e. %.3g %%\n",
16 + i * 16, ta, (double) ta,
100.0 * ((double) ta / ((double) maxcnt)));
};
#endif
#ifdef USEQISH
printf("done %d minor & %d full garbage collections\n",
qish_nb_minor_collections, qish_nb_full_collections);
#endif
printf
("%s cputime user=%g system=%g tick=%g total == per iteration user=%g system=%g\n",
argv[0], usert, syst, tick, usert / (double) maxcnt,
syst / (double) maxcnt);
return 0;
}
// eof essm.c
################################
with gcc -DUSEGC -O essm.c -o essm_gc -lgc
the program ./essm_gc gives (using Boehm's GC)
begin maxcnt=1000000=1.000000e+06
i=0 [=0 %]
end maxcnt=1000000=1.000000e+06 cumulated alloc=248908533=2.49e+08 bytes, mean 249 bytes
./essm_gc cputime user=2.13 system=0.02 tick=100 total == per iteration user=2.13e-06 system=2e-08
with gcc -O -g essm.c -o essm
the program ./essm gives (using malloc/free)
begin maxcnt=1000000=1.000000e+06
i=0 [=0 %]
end maxcnt=1000000=1.000000e+06 cumulated alloc=247413992=2.47e+08 bytes, mean 247 bytes
./essm cputime user=0.72 system=0 tick=100 total == per iteration user=7.2e-07 system=0
For completeness and shameless plug I also hacked the same (useless)
program to use my Qish generational copying GC - see
http://freshmeat.net/projects/qish for details on Qish. I even added a
pair of BEGIN_LOCAL_FRAME_WITHOUT_ARGS + EXIT_FRAME macros, even if on
this particular example they are useless (since the result of
allocation goes into a global root).
./essm_qish
begin maxcnt=1000000=1.000000e+06
i=0 [=0 %]
end maxcnt=1000000=1.000000e+06 cumulated alloc=247407768=2.47e+08 bytes, mean 247 bytes
done 29 minor & 0 full garbage collections
./essm_qish cputime user=0.72 system=0.28 tick=100
total == per iteration user=7.2e-07 system=2.8e-07
Now I try to run it bigger, with more allocations (so to trigger
several full garbage collections)
To have the full GC executed sevveral times, I run it more:
$PWD/essm_qish 54321
begin maxcnt=54321000=5.432100e+07
i=0 [=0 %]
i=2097152 [=3.86 %]
i=4194304 [=7.72 %]
i=6291456 [=11.6 %]
i=8388608 [=15.4 %]
i=10485760 [=19.3 %]
i=12582912 [=23.2 %]
i=14680064 [=27 %]
i=16777216 [=30.9 %]
i=18874368 [=34.7 %]
i=20971520 [=38.6 %]
i=23068672 [=42.5 %]
i=25165824 [=46.3 %]
i=27262976 [=50.2 %]
i=29360128 [=54 %]
i=31457280 [=57.9 %]
i=33554432 [=61.8 %]
i=35651584 [=65.6 %]
i=37748736 [=69.5 %]
i=39845888 [=73.4 %]
i=41943040 [=77.2 %]
i=44040192 [=81.1 %]
i=46137344 [=84.9 %]
i=48234496 [=88.8 %]
i=50331648 [=92.7 %]
i=52428800 [=96.5 %]
end maxcnt=54321000=5.432100e+07 cumulated alloc=13437122740=1.34e+10 bytes, mean 247 bytes
done 1645 minor & 6 full garbage collections
/home/basile/Misc/essm_qish cputime user=41.45 system=13.37 tick=100 total == per iteration user=7.63057e-07 system=2.46129e-07
With the same allocation count the Boehm's test end with
i=52428800 [=96.5 %]
end maxcnt=54321000=5.432100e+07 cumulated alloc=13437153848=1.34e+10 bytes, mean 247 bytes
/home/basile/Misc/essm_gc cputime user=116.91 system=0.19 tick=100
total == per iteration user=2.15221e-06 system=3.49773e-09
So a malloc/free is about 0.6 microseconds while a GC_malloc is about
2 (or 2.2) microseconds, and a Qish allocation is about 1.1
microseconds (on average) *including garbage collection time* (of
course Qish has more overhead in practice, because of the mandatory
local roots registration and of the write barrier; and Qish is much
less confortable to code with, since it requires a particular coding
style).
Sorry to Hans Boehm for having provided an unrealistic benchmark.
(If any reader of this list happens to have tried Qish I would be
delighted to get feedback; Qish is opensource, under LGPL)
Regards.
--
Basile STARYNKEVITCH http://starynkevitch.net/Basile/
email: basile<at>starynkevitch<dot>net
alias: basile<at>tunes<dot>org
8, rue de la Faïencerie, 92340 Bourg La Reine, France