C garbage collector.

John Q. Linux jql@accessone.com
Tue, 4 Jun 1996 16:36:37 -0700 (PDT)


Hello all,

  I've been hearing about Tunes from Fare in IRC for over a year and 
decided to see if I could help. :)

  I asked Fare what I could do to help Tunes and he asked if I could 
write a real-time copying GC based on 
ftp://ftp.netcom.com/pub/hb/hbaker/RealTimeGC.html in C using a large 
mmap()'d file. I told him that it was a bit over my head, but I re-read 
hbaker's paper a few more times and did it anyways. Admittedly, this is a 
pretty lame GC which, from my minimal testing, doesn't work very well. :)
  It's pretty-much a verbatim copy of the stackless Algol psuedo-code 
example in RealTimeGC.html. This version is not expected to work for 
anyone else but me, but it's reasonably well commented and it has *alot* 
of debugging output. This is *not* meant to be working code. And the todo 
list is extremely large. Here's a small list of things to do:

    * Implement a stack
    * Maybe I could add some atoms? :)
    * Try to get forwarding pointers to work how I want them to
    * Rewrite the whole thing because I now understand what it should do :)
    * Write a scheme interpreter so I can test it properly

  That actually looks pretty long. I've included the source-code as a 
patch at the bottom of this message. You have to make some edits to gc.h 
and you need to create a file to mmap(). I made a 40-meg file, but I 
really should just implement the gc in the first 30-bytes so I can have 
it work naturally. :)

  Well, be warned, this probably doesn't work, and probably won't compile 
for you, but I thought I'd share. Enjoy...


Binary files ../empty/.backup/Mon Jun  3 23:34:21 PDT 1996.tar.gz and ./.backup/Mon Jun  3 23:34:21 PDT 1996.tar.gz differ
diff -urP ../empty/gc.c ./gc.c
--- ../empty/gc.c	Wed Dec 31 16:00:00 1969
+++ ./gc.c	Tue Jun  4 16:06:43 1996
@@ -0,0 +1,102 @@
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include "gc.h"
+
+void getmem(void);      /* mmap() some memory and assign to the GC ptrs      */
+
+/*
+ * Here come the global pointers, I hope this is all of em.
+ * Why do I have to bother with the char * and void *'s anyways? I'm 
+ * starting to get really annoyed with them. :)
+ */
+
+mem fromspace;          /* The place where objects are copied from by the GC */
+mem tospace;            /* tospace is where objects are copied to/created    */
+cell *free;             /* The bottom of the available free space in tospace */
+cell *scanptr;          /* The address of the next cell to be scanned        */
+cell *top;              /* Pointer to the top of the free space in tospace   */
+mem _top;               /* These are used to make sure we know the absolute  */
+mem _bottom;            /* start/end of the memory block (just in case)      */
+
+int k = 10;              /* Ratio of GC's per CONS. Higher = more, slower    */
+
+/* We'll try stacks later */
+/*int stack_top = 0;*/  /* The top element of the stack                      */
+/*int stack_scan = 0;*/ /* The latest stack element to be scanned            */
+
+char *progname;
+
+int main(int argc, char *argv[]) {
+  cell *foo, *bar, *baz, *blah;
+
+  progname = argv[0];     /* I hate this. */
+  getmem();
+
+  dump_ptrs(); /* We want to see the allocations, so print the initial ptrs */
+  foo = cons(NULL, NULL);dump_ptrs();    /* Lame tests. I definitly need    */
+  bar = cons(NULL, NULL);dump_ptrs();    /* some type of scheme interpreter */
+  baz = cons(NULL, NULL);dump_ptrs();    /* or something to test with.      */
+  blah = cons(NULL, NULL);dump_ptrs();
+
+  replacd(foo, bar);
+  replacd(bar, baz);
+  replacd(baz, foo);
+  dump_ptrs();
+  replaca(foo, (mem)foo);
+  replaca(bar, (mem)foo);
+  replaca(baz, (mem)foo);
+  dump_ptrs();
+
+  flip();  /* switch the (from/to)spaces. My mmap file is much too big */
+  blah = cdr(foo);
+  blah = cons(foo, bar);
+  blah = cons(foo, bar);
+  blah = cons(foo, bar);
+
+  dump_ptrs();
+
+  return 0;
+}
+
+void getmem(void) {
+  int fd;
+
+  if((fd = open(MMAP_FILE, O_RDWR)) == -1) {
+    perror(progname);
+    exit(-1);
+  }
+  if((_bottom = mmap((caddr_t) 0, MMAP_LEN, PROT_READ|PROT_WRITE,
+		      MAP_SHARED|MAP_FILE, fd, 0)) == (caddr_t) -1) {
+    perror(progname);
+    exit(-1);
+  }
+
+  _top        = _bottom+MMAP_LEN;
+  tospace     = _bottom;
+  free        = tospace;
+  fromspace   = _bottom + SPACE_SIZE;
+  top         = (cell *) (tospace + (SPACE_SIZE - sizeof(cell)));  /* UGH! */
+  scanptr     = tospace;
+}
+
+void dump_ptrs(void) {
+  printf(
+"_bottom:\t%p
+_top:\t\t%p
+free:\t\t%p
+fromspace:\t%p
+tospace:\t%p
+scanptr:\t%p
+top:\t\t%p
+", _bottom, _top, free, fromspace, tospace, scanptr, top);
+}
+
+void error(char *error) {
+  printf(error);
+  dump_ptrs();
+  exit(-1);
+}
diff -urP ../empty/gc.h ./gc.h
--- ../empty/gc.h	Wed Dec 31 16:00:00 1969
+++ ./gc.h	Tue Jun  4 15:33:17 1996
@@ -0,0 +1,57 @@
+#include <sys/types.h>
+
+/*
+ * MMAP_LEN is the size of MMAP_FILE. MMAP_FILE is the path to the filename
+ * to mmap(). It's odd that I named mine persistent-tunes even when it's
+ * not persistent yet. Well, I'm working on it. :)
+ */
+ 
+#define PAGE 4096
+#define MMAP_FILE "/usr/local/etc/persistent-tunes"
+#define MMAP_LEN (PAGE*(10<<10))
+#define SPACE_SIZE (MMAP_LEN/2)
+
+/*
+#define CAR(x) (((cell *)x)->car=(dword_ptr)move((dword_ptr)((cell *)x)->car))
+#define CDR(x) (((cell *)x)->cdr=(cell*)move((dword_ptr)((cell *)x)->cdr))
+#define ATOM(x) (!in_tospace(x))
+#define EQ(x,y) ((x == y) ? 1 : 0)
+#define REPLACA(x,y) (((cell *)x)->car = y)
+#define REPLACD(x,y) (((cell *)x)->cdr = y)
+*/
+
+typedef void * mem;         /* pointer to actual blocks of un-typed memory */
+typedef long * dword_ptr;   /* Where were you dword ptr when I needed you? */
+typedef void * gen_ptr;     /* A generic pointer for those unknown types   */
+typedef void * car_ptr;     /* What the hell is a car???                   */
+
+void dump_ptrs(void);   /* Dump the pointers (for debugging)                 */
+void error(char *);
+
+typedef struct cell {
+  car_ptr car;
+  struct cell *cdr;
+} cell;
+
+extern mem fromspace;
+extern mem tospace;
+extern cell *free;
+extern cell *scanptr;
+extern cell *top;
+extern mem _top;
+extern mem _bottom;
+
+extern int k;
+
+
+cell *cons(mem car, cell *cdr);
+void flip(void);
+mem move(cell *p);
+mem copy(mem p);
+
+car_ptr car(cell *p);
+mem cdr(cell *p);
+int atom(mem p);
+int eq(mem x, mem y);
+void replaca(cell *x, mem *y);
+void replacd(cell *x, cell *y);
diff -urP ../empty/lists.c ./lists.c
--- ../empty/lists.c	Wed Dec 31 16:00:00 1969
+++ ./lists.c	Tue Jun  4 16:06:55 1996
@@ -0,0 +1,103 @@
+#include "gc.h"
+
+/* There's alot of debugging code in here, but there's lots to debug. */
+
+cell *cons(mem new_car, cell *new_cdr) {
+  int i;
+
+#if DEBUG
+  printf("cons(%p, %p);\n", new_car, new_cdr);
+#endif
+  if(free == top) {
+    if(scanptr < free) error("scanptr < free???\n");
+    flip();
+    new_car = move(new_car);
+    new_cdr = move(new_cdr);
+  }
+#if DEBUG
+  printf("%p < %p?\n", scanptr, free);
+#endif
+  for(i = 0; i < k && (mem)scanptr < (mem)free; i++) {
+    car(scanptr);
+    cdr(scanptr);
+    scanptr += 1;
+  }
+  if(free == top) error("free == top??\n");
+  top -= 1;
+  top->car = new_car;
+  top->cdr = new_cdr;
+  return top;
+}
+
+void flip(void) {  /* 'flip' the spaces. No error-checking, quick and dirty */
+#if DEBUG
+  printf("flip()\n");
+#endif
+  free        = tospace;
+  tospace     = fromspace;
+  fromspace   = free;
+  free        = tospace;
+  top         = (tospace + (SPACE_SIZE - sizeof(cell)));  /* UGH! */
+  scanptr     = tospace;
+#if DEBUG
+  dump_ptrs();
+#endif
+}
+
+mem move(cell *p) {
+#if DEBUG
+  printf("move(%p)\n",p);
+#endif
+  if(!in_fromspace(p)) return p;
+  else {
+#if DEBUG
+    printf("We're in fromspace, move me\n");
+    printf("if(!in_tospace(%p)) p->car = copy(%p);\n",((cell *)p)->car,p);
+#endif
+    if(!in_tospace(((cell *)p)->car)) ((cell *)p)->car = copy(p);
+#if DEBUG
+    dump_ptrs();
+#endif
+    return ((cell *)p)->car;
+  }
+}
+
+mem copy(mem p) {
+#if DEBUG
+  printf("copy(%p)\n",p);
+#endif
+  if(free >= top) error("free >= top???\n");
+  free->car = (mem)((long *)p)[0]; /* This is the typecast from hell */
+  free->cdr = (cell *)((long *)p)[1]; /* Save me!!!  */
+  free += 1;
+  return free - sizeof(cell);  /* I like B & (B:=B+2) better, don't you? */
+}
+
+int in_fromspace(mem p) {
+#if DEBUG
+  printf("in_fromspace(%p)\n%p >= %p\n", p, p, fromspace);
+#endif
+  if(p >= fromspace) {
+#if DEBUG
+    printf("if(%p > %p || %p < %p) return 1;\n",fromspace,tospace,p,tospace);
+#endif
+    if(fromspace > tospace || p < tospace) return 1;
+    else return 0;
+  }
+  else return 0;
+}
+
+int in_tospace(mem p) {
+  if(p >= tospace) {
+    if(tospace > fromspace || p < fromspace) return 1;
+    else return 0;
+  }
+  else return 0;
+}
+
+car_ptr car(cell *p) { return p->car = move(p->car); }
+mem cdr(cell *p)     { return p->cdr = move(p->cdr); }
+int atom(mem p)      { return !in_tospace(p); }
+int eq(mem x, mem y) { return (x == y) ? 1 : 0; }
+void replaca(cell *x, mem *y) { x->car = y; }
+void replacd(cell *x, cell *y) { x->cdr = y; }