[gclist] why malloc/free instead of GC?

Basile STARYNKEVITCH basile@starynkevitch.net
Tue, 18 Feb 2003 23:13:49 +0100


For completeness, I changed my tiny test a bit to allocate smaller
objects, to take into account Hans Boehm's remark on typical object
size


################################
// file essm.c

// compile for malloc: gcc -O essm.c -o essm
// compile for Boehm's GC: gcc -O -DUSEGC essm.c -o essm_gc -lgc
// compile for Qish GC: 
/// gcc -O -I../Qish/include -DUSEQISH essm.c -o essm_qish -L../Qish/lib -lqish -ldl

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/times.h>
#include <assert.h>

#ifdef USEGC
#include <gc.h>
#define malloc(S) GC_malloc(S)
#ifdef USEGCFREE
#define free(P) GC_free(P)
#else
#define free(P) {}
#endif
#endif

void *tabptr[16];

#ifdef USEQISH
#include "qish.h"
#define tabptr qish_roots
#define HEADEROF(Ad) (*(unsigned*)(Ad))

struct obj_st
{
  unsigned header;
  void *tab[0];
};

void *
essm_qish_gc_copy (void **padr, void *dst, const void *src)
{
  int i = 0;
  static int cnt;
  struct obj_st *odst = dst;
  const struct obj_st *osrc = src;;
  unsigned header = osrc->header;
  cnt++;
  odst->header = osrc->header;
  for (i = 0; i < header; i++)
    odst->tab[i] = osrc->tab[i];
  *padr = odst;
  return (void *) (odst->tab + header);
}

void *
essm_qish_minor_scan (void *ptr)
{
  int i = 0;
  static int cnt;
  struct obj_st *ob = ptr;
  unsigned header = ob->header;
  cnt++;
  for (i = 0; i < header; i++)
    QISHGC_MINOR_UPDATE (ob->tab[i]);
  return (void *) (ob->tab + header);
}

void *
essm_qish_full_scan (void *ptr)
{
  int i = 0;
  static int cnt;
  struct obj_st *ob = ptr;
  unsigned header = ob->header;
  cnt++;
  for (i = 1; i <= header; i++)
    QISHGC_FULL_UPDATE (ob->tab[i]);
  return (void *) (ob->tab + header);
}

void
essm_qish_fixed_scan (void *ptr, int sz)
{
  qish_panic ("fixed_scan should not be called ptr=%p sz=%d", ptr, sz);
}

#endif


int
main (int argc, char **argv)
{
  long long maxcnt = 1000000;
#define MAXALLOC 20
  long long taballoc[MAXALLOC + 1];
  long long cumalloc = 0;
  long long i = 0;
  int r = 0, s = 0, n = 0;
  double usert, syst, tick;
  struct tms t;
  struct st *p = 0;
  if (argc > 1)
    maxcnt = atol (argv[1]) * 1000;
#ifdef USEQISH
  qishgc_init ();
  qish_gc_copy_p = essm_qish_gc_copy;
  qish_minor_scan_p = essm_qish_minor_scan;
  qish_fixed_scan_p = essm_qish_fixed_scan;
  qish_full_scan_p = essm_qish_full_scan;
#endif //USEQISH
  memset (&t, 0, sizeof (t));
  memset (taballoc, 0, sizeof (taballoc));
  if (maxcnt < 100000)
    maxcnt = 100000;
  printf ("begin maxcnt=%lld=%e\n", maxcnt, (double) maxcnt);
  for (i = 0; i < maxcnt; i++)
    {
      if ((i & 0x1fffff) == 0)
	printf ("i=%lld [=%.3g %%]\n", i, 100.0*(double)i/maxcnt);
      r = lrand48 () & 0xf;
#ifndef USEQISH
      if (tabptr[r])
	free (tabptr[r]);
#endif
      n = lrand48 () % 131072 + 4;
      // approximate s = integer square root(n)
      s = n / 256 + 2;
      s = (s + n / s) / 2;
      s = (s + n / s) / 2;
      s = (s + n / s) / 2;
      s = (s + n / s) / 2;
      s = (s + n / s) / 2;
      s = (s + n / s) / 2;
      s = (s + n / s) / 2;
      s = (s + n / s) / 2;
      s = (s + 8) & (~3);
      cumalloc += s;
      if (s / 16 < MAXALLOC)
	taballoc[s / 16]++;
#ifndef USEQISH
      tabptr[r] = malloc (s);
      if (s > 4 * sizeof (void *) && lrand48 () % 8 < 3)
	((void **) (tabptr[r]))[1] = tabptr[lrand48 () % 8];
#else
      n = s / 4 + 1;
      {
	volatile struct	{
	  struct obj_st *ptr;
	}	_locals_ =	{0};
#define l_ptr _locals_.ptr
	BEGIN_LOCAL_FRAME_WITHOUT_ARGS ();
	assert(n>0 && n<1000);
	QISH_ALLOCATE (l_ptr, sizeof (struct obj_st) + n * sizeof (void *));
	l_ptr->header = n;
	if (n > 3 && lrand48 () % 8 < 3)  {
	    l_ptr->tab[0] = (void *) (tabptr[lrand48 () % 8]);
	    QISH_WRITE_NOTIFY (l_ptr);
	}
	tabptr[r] = l_ptr;
	dbgprintf("r=%d n=%d l_ptr=%p", r, n, l_ptr);
	EXIT_FRAME ();
      }
#endif
      if (!tabptr[r])
	fprintf (stderr, "malloc(%d) failed i=%lld\n", s, i);
    };
  times (&t);
  tick = (double) sysconf (_SC_CLK_TCK);
  usert = ((double) t.tms_utime) / tick;
  syst = ((double) t.tms_stime) / tick;
  printf
    ("end maxcnt=%lld=%e cumulated alloc=%lld=%.3g bytes, mean %.3g bytes\n",
     maxcnt, (double) maxcnt, cumalloc, (double) cumalloc,
     (double) cumalloc / (double) maxcnt);
#if 0
  // don't work as I want...
  for (i = 0; i < MAXALLOC; i++)
    {
      long long ta = taballoc[i];
      printf ("alloc<%d: %lld=%.3g i.e. %.3g %%\n",
	      16 + i * 16, ta, (double) ta,
	      100.0 * ((double) ta / ((double) maxcnt)));
    };
#endif
#ifdef USEQISH
  printf("done %d minor & %d full garbage collections\n", 
	 qish_nb_minor_collections, qish_nb_full_collections);
#endif
  printf
    ("%s cputime user=%g system=%g tick=%g total == per iteration  user=%g system=%g\n",
     argv[0], usert, syst, tick, usert / (double) maxcnt,
     syst / (double) maxcnt);
  return 0;
}
// eof essm.c 


################################

with gcc -DUSEGC -O essm.c -o essm_gc -lgc
the program ./essm_gc gives (using Boehm's GC)

begin maxcnt=1000000=1.000000e+06
i=0 [=0 %]
end maxcnt=1000000=1.000000e+06 cumulated alloc=248908533=2.49e+08 bytes, mean 249 bytes
 ./essm_gc cputime user=2.13 system=0.02 tick=100 total == per iteration  user=2.13e-06 system=2e-08

with gcc -O -g essm.c -o essm 
the program ./essm gives (using malloc/free)
begin maxcnt=1000000=1.000000e+06
i=0 [=0 %]
end maxcnt=1000000=1.000000e+06 cumulated alloc=247413992=2.47e+08 bytes, mean 247 bytes
 ./essm cputime user=0.72 system=0 tick=100 total == per iteration  user=7.2e-07 system=0

For completeness and shameless plug I also hacked the same (useless)
program to use my Qish generational copying GC - see
http://freshmeat.net/projects/qish for details on Qish. I even added a
pair of BEGIN_LOCAL_FRAME_WITHOUT_ARGS + EXIT_FRAME macros, even if on
this particular example they are useless (since the result of
allocation goes into a global root).

 ./essm_qish
begin maxcnt=1000000=1.000000e+06
i=0 [=0 %]
end maxcnt=1000000=1.000000e+06 cumulated alloc=247407768=2.47e+08 bytes, mean 247 bytes
done 29 minor & 0 full garbage collections
 ./essm_qish cputime user=0.72 system=0.28 tick=100
total == per iteration  user=7.2e-07 system=2.8e-07

Now I try to run it bigger, with more allocations (so to trigger
several full garbage collections)

To have the full GC executed sevveral times, I run it more:
$PWD/essm_qish 54321
begin maxcnt=54321000=5.432100e+07
i=0 [=0 %]
i=2097152 [=3.86 %]
i=4194304 [=7.72 %]
i=6291456 [=11.6 %]
i=8388608 [=15.4 %]
i=10485760 [=19.3 %]
i=12582912 [=23.2 %]
i=14680064 [=27 %]
i=16777216 [=30.9 %]
i=18874368 [=34.7 %]
i=20971520 [=38.6 %]
i=23068672 [=42.5 %]
i=25165824 [=46.3 %]
i=27262976 [=50.2 %]
i=29360128 [=54 %]
i=31457280 [=57.9 %]
i=33554432 [=61.8 %]
i=35651584 [=65.6 %]
i=37748736 [=69.5 %]
i=39845888 [=73.4 %]
i=41943040 [=77.2 %]
i=44040192 [=81.1 %]
i=46137344 [=84.9 %]
i=48234496 [=88.8 %]
i=50331648 [=92.7 %]
i=52428800 [=96.5 %]
end maxcnt=54321000=5.432100e+07 cumulated alloc=13437122740=1.34e+10 bytes, mean 247 bytes
done 1645 minor & 6 full garbage collections
/home/basile/Misc/essm_qish cputime user=41.45 system=13.37 tick=100 total == per iteration  user=7.63057e-07 system=2.46129e-07

With the same allocation count the Boehm's test end with

i=52428800 [=96.5 %]
end maxcnt=54321000=5.432100e+07 cumulated alloc=13437153848=1.34e+10 bytes, mean 247 bytes
/home/basile/Misc/essm_gc cputime user=116.91 system=0.19 tick=100
total == per iteration  user=2.15221e-06 system=3.49773e-09

So a malloc/free is about 0.6 microseconds while a GC_malloc is about
2 (or 2.2) microseconds, and a Qish allocation is about 1.1
microseconds (on average) *including garbage collection time* (of
course Qish has more overhead in practice, because of the mandatory
local roots registration and of the write barrier; and Qish is much
less confortable to code with, since it requires a particular coding
style).



Sorry to Hans Boehm for having provided an unrealistic benchmark.

(If any reader of this list happens to have tried Qish I would be
delighted to get feedback; Qish is opensource, under LGPL)

Regards.
-- 

Basile STARYNKEVITCH         http://starynkevitch.net/Basile/ 
email: basile<at>starynkevitch<dot>net 
alias: basile<at>tunes<dot>org 
8, rue de la Faïencerie, 92340 Bourg La Reine, France