[virtmach] Bio and Bib
Joel Jones
jjones@uiuc.edu
Sun, 14 Nov 1999 18:18:37 -0600
I thought I'd introduce myself to the list by giving a brief
introduction to the work I'm doing, as well as a bibliography I've
developed on virtual machines.
I am investigating verifiable, machine-independent annotations to the
Java .class file to bring the quality of the code generated by a
``just-in-time'' compiler closer to that of an optimizing compiler
without a significant increase in code generation time. This
division of labor has expensive machine-independent analysis
performed off-line and inexpensive machine-dependent code-generation
performed on the client. I call this phenomenon ``super-linear
analysis and linear exploitation.'' These annotations were designed
mindful of the concurrency features of the Java language. The
current set of annotations describe virtual register assignment,
register spilling, and redundant load-store removal.
I am using the kaffe Java virtual machine. One of the interesting
(well, maybe :) ) aspects of the way I am using kaffe is to
"parameterize" its use of the Java Virtual machine to be more data
driven. By this, I mean that I use a table of little language
descriptions of various parts of the semantics of the JVM to drive
certain parts of my implementation. I use a spreadsheet to manage
the little bits, and awk scripts to alternatively generate Java or C
source code for my front-end (written in Java) or for kaffe. This
has reduced the number of coding errors and would also allow me to
retarget kaffe for the various "micro" versions of the JVM.
Joel Jones
jjones@uiuc.edu
@InCollection{Abramsky85,
author = "S. Abramsky and R. Sykes",
editor = "J.-P. Jouannaud",
title = "Secd-m: {A} Virtual Machine for Applicative
Programming",
booktitle = "Functional Programming Languages and Computer
Architecture",
pages = "81--98",
publisher = "Springer-Verlag",
address = "Berlin, DE",
year = "1985",
keywords = "functional nancy symposium parallel non-determinism",
ISBN = "3-540-15975-4",
abstract = "We present a virtual machine to support applicative
multiprogramming - the description of concurrent,
asynchronous systems such as operating systems in a
functional style. The machine extend's Landin's secd
machine to support multiple concurrent expression
evaluation, non-determinism in the form of the fair
merge, and a full range of input and output devices.
This allows system programs to be written in a
functional style. The secd-m machine has been
implemented and a number of functional concurrent
programs demonstrated.",
note = "Lecture Notes in Computer Science 201Proceedings of.
Conference at Nancy.",
}
@InProceedings{FeeleyMarc90a,
author = "Marc Feeley and James S. Miller",
booktitle = "Conference on Lisp and Functional programming",
title = "{A} parallel virtual machine for efficient Scheme
compilation",
year = "90",
address = "Nice, France",
URL = "ftp://ftp.cs.indiana.edu/pub/scheme-repository/txt/pvm.ps.Z",
keywords = "Gambit",
month = jun,
scope = "implemen",
abstract = "Programs compiled by Gambit, our Scheme compiler,
achieve performance as much as twice that of the
fastest available Scheme compilers. Gambit is easily
ported, while retaining its high performance, through
the use of a simple virtual machine (PVM). PVM allows a
wide variety of machine-independent optimizations and
it supports parallel computation based on the {\tt
future} construct. PVM conveys high-level information
bidirectionally between the machine-independent front
end of the compiler and the machine-dependent back end,
making it easy to implement a number of common back end
optimizations that are difficult to achieve for other
virtual machines. PVM is similar to many real computer
architectures and has an option to efficiently gather
dynamic measurements of virtual machine usage. These
measurements can be used in performance prediction for
ports to other architectures as well as design
decisions related to proposed optimizations and object
representations.",
keywords = "Lisp, Futures, Virtual Machine",
}
@PhdThesis{Piumarta:phd:1992,
author = "Ian K. Piumarta",
email = "ikp@cs.man.ac.uk",
title = "Delayed Code Generation in a Smalltalk-80 Compiler",
school = "University of Manchester",
month = sep,
year = "1992",
source = "Dept. Library",
pages = "174",
sjb = "Surprisingly there is hardly any reference to the work
of the Self group",
abstract = "More than any other programming system, Smalltalk-80
stretches the object-oriented paradigm to its limits.
Representing all programmer-accessible data (input and
output facilities, contexts, processes, functions, and
so on) as objects is the cause of many implementation
difficulties. Polymorphism, the dynamic binding of
function names to function bodies at runtime, and the
transparent management of dynamic memory allocation
only aggravate the situation further. Traditional
implementations (in other words, all the commercially
available implementations), try to narrow the semantic
gap between the language and the platform upon which it
runs by compiling Smalltalk for an idealised virtual
machine that uses simple language-oriented
instructions. This approach has advantages for both the
compiler writer (the target language is optimised for
running programs written in the source language) and
for the runtime system (compiled code is small and easy
to map back to the source for debugging). Reducing the
complexity of the compiler also speeds up compilation,
which is highly desirable in exploratory programming
environment such as Smalltalk-80. The down side is that
reducing the complexity of the compiler and target
language causes a corresponding increase in the
complexity in the runtime system which has to work much
harder if it is to ensure efficient execution of code.
This thesis argues and demonstrates that it is possible
to compile Smalltalk-80 directly into machine code for
stock hardware, and to do this efficiently in terms of
both compiler performance (the compiler must be small
and fast) and generated code performance. The
techniques developed for ``delayed code generation'' in
single-pass recursive descent compilation (or code
generation by walking a parse tree) are applicable to
almost any language, and some discussion of the
application of delayed code generation to the
compilation of C is also presented. some investigation
into the applicability and effectiveness of various
compile- and run-time optimisations is presented,
quantified by benchmark results covering a wide range
of activities from critical operations (such as
addition) to entire subsystems (such as the text
display interface).",
}
@TechReport{Ramsdell:1993,
author = "John D. Ramsdell",
email = "ramsdell@mitre.org",
title = "The Revised {VLISP} PreScheme Front End",
institution = "MITRE",
year = "1993",
URL =
"ftp://cs.indiana.edu/pub/scheme-repository/txt/vlisp/preschemerevised
.dvi.Z",
pages = "91",
checked = "19940101",
source = "URL",
abstract = "Verified programming Languaeg Implementation Project
developed a formally verified implementation of the
Scheme programming language. It used a systems
programming dialect of Scheme, called VLISP PreScheme
to program the VLISP Virtual Machine, a byte-code
interpreter. The original compiler only accepted
programs that specify iterative processes. This
document describes a revision of the language and its
compiler. The most important change is the compiler
provides a stack to save control information for
procedure calls so programs that specify recursive
processes are accepted. the revision expands the
systems programming tasks for which VLISP PreScheme can
be used and simplifies the task of matcing an algorithm
with its code.",
}
@TechReport{Taivalsaari98,
author = "Antero Taivalsaari",
title = "Implementing a Java^{\TM} Virtual Machine in the Java
Programming Language",
institution = "Sun Microsystems",
year = "1998",
month = mar,
numver = "SMLI TR-98-64",
abstract = "JavaInJava is a Java Virtual machine written in the
Java^{TM} programming language. The system was built at
Sun Microsystems in order to examine the feasibility of
constructing high-quality virtual machines using the
Java programming language and to experiment with new
virtual machine implementation techniques. In this
paper we describe the overall architecture of
JavaInJava and summarize a number of interesting
technical issues that were encountered during its
implementation.",
}
@InProceedings{Azevedo1999,
author = "Ana Azevedo and Alex Nicolau and Joe Hummel",
title = "Java Annotation-aware Just-in-Time ({AJIT})
Compilation System",
booktitle = "ACM 1999 Java Grande Conference",
year = "1999",
abstract = "The Java Bytecode Language lacks expressiveness for
traditional compiler optimizations, making this
portable, secure software distribution format
inefficent as a program representation for high
performance. This inefficiency results from the
underlying stack model, as well as the fact that many
bytecode operations intrinsically include
sub-operations (e.g., \texttt{iaload} includes the
address computation, array bounds checks and the actual
oad of the array element). The stack model, with no
operand registers and limiting access to the top of the
stack, prevents the reuse of values and bytecode
reordering. Likewise, the bytecodes have not mechanism
to indicate which sub-operations in the bytecode stream
are redundant or subsumed by previous ones. As a
consequence, the Java Bytecode language inhibits the
expression of important compiler optimizations,
including common sub-expression eliminatin, register
allocatin and instruction scheduling. As a result, the
bytecode stream generated by the Java front-end is
significantly under-optimized. The most common solution
is the use of a Just-in-Time (JIT) compiler to not only
generate native code, but perform optimization as well.
However, the latter is a time consuming operation in an
already time-constrained translation process. In this
paper we present an alternative to an optimizing JIT
compiler that makes use of code annotations generate by
the Java front-end. These annotations carry information
concerning compiler optimization. During the
translation process, an annotation-aware JIT (AJIT)
system the uses this information to produce
high-performance native code without performing much of
the necessary analyses or transformations. We describe
the implementation of the first prototype of our
annotation-aware JIT system and show performance resuls
comparing our system with other Java Virtual Machines
(JVMs) running on SPARC architecture.",
}
@Article{Hummel1997,
author = "Joe Hummel and Ana Azevedo and David Kolson and Alex
Nicolau",
title = "Annotating the Java Bytecodes in Support of
Optimization",
journal = "Concurrency: Practice and Experience",
year = "1997",
month = nov,
volume = "9",
number = "11",
pages = "1003--1016",
howpublished = "PPoPP Workshop June 21 97 on Java for Computational
Science and Engineering",
abstract = "The efficient execution of Java programs presents a
challenge to hardware and software designers alike. The
difficulty however lies with the Java bytecodes. Their
model of a simplistic, platform-independent stack
machine is well-suited for portability, though at the
expense of execution speed. Various approaches are
being proposed to increase the speed of Java bytecode
programs, including: (1) on-the-fly compilation to
native code (also known as JIT or ``just-in-time'' c
ompilation); (2) traditional (``ahead-of-time'')
compilation of bytecodes to some higher-level
intermediate form and then to native code; and (3)
translation of bytecodes to a higher-level language and
then use of an existing compiler to produce native
code. Speedups on the order of 50 over standard
bytecode interpretation have been claimed. All of these
approaches rely upon bytecode analysis (of varying
sophistication) to extract information about the
program, which is then used to optimize the native code
during the translation process. However, extracting
information from a lower-level representation such as
the Java bytecodes can be very expensive. And given teh
fact that most approaches for executing Java bytecodes
cannot spend a great deal of time recovering high-level
information, the solutions adopted during the
translation process must use faster and less accurate
analysis techniques, thus penalizing the quality of the
native code. In this paper we propose an optimization
approach based on bytecode anntations. The bytecodes
are annotated during the original source code to
bytecode translation, allowing both traditional
interpretation by a JVM and aggressive optmization by
an annotation-aware bytecode compiler. Annotations do
not hider portability nor compatibility, while
preserving optimization information that is expensive
to recompute. Preliminary results yield bytecode with
C-like performance using JIT technology.",
}
@Misc{kaffeURL,
author = "{Transvirtual Technologies}",
title = "{Kaffe OpenVM}$^{TM}$",
URL = "http://www.transvirtual.com/",
}
@Misc{HotSpotURL,
author = "{Sun Microsystems Incorporated}",
title = "The {J}ava {H}ot{S}pot$^{TM}$ Performance Engine
Architecture: {A} White Paper About {S}un's Second
Generation Performance Technology",
year = "1999",
month = apr,
URL = "http://java.sun.com/products/hotspot/whitepaper.html",
}
@TechReport{Freund:1998,
title = "A Type System for Object Initialization In the
{J}ava$^{TM}$ Bytecode Language",
author = "Stephen N. Freund and John C. Mitchell",
institution = "Department of Computer Science, Stanford University",
year = "1998",
month = apr,
number = "CS-TN-98-62",
abstract = "In the standard Java implementation, a Java language
program is compiled to Java bytecode. This bytecode may
be sent across the network to another site, where it is
then interpreted by the Java Virtual Machine. Since
bytecode may be written by hand, or corrupted during
network transmission, the Java Virtual Machine contains
a bytecode verifier that performs a number of
consistency checks before code is interpreted. As
illustrated by previous attacks on the Java Virtual
Machine, these tests, which include type correctness,
are critical for system security. In order to analyze
existing bytecode verifiers and to understand the
properties that should be verified, we develop a
precise specification of statically-correct Java
bytecode, in the form of a type system. Our focus in
this paper is a subset of the bytecode language dealing
with object creation and initialization. For this
subset, we prove that for every Java bytecode program
that satisfies our typing constraints, every object is
initialized before it is used. The type system is
easily combined with a previous system developed by
Stata and Abadi for bytecode subroutines. Our analysis
of subroutines and object initialization reveals a
previously unpublished bug in the Sun JDK bytecode
verifier.",
URL =
"http://elib.stanford.edu/Dienst/Repository/2.0/Body/stanford.cs%2fCS-
TN-98-62/pdf",
}
@book{AitKaci:1999,
author = {Hassan A\"{i}t-Kaci},
title = "Warren's Abstract Machine: A Tutorial
Reconstruction",
publisher = "published by the author",
year = "1999",
url = "http://www.isg.sfu.ca/~hak/documents/wambook.ps.gz"
}
@conference{ее,
title = "Towards a Provably-Correct Implementation of the {JVM}
Bytecode Verifier",
author = "Alessandro Coglio and Allen Goldberg and Zhenyu Qian",
booktitle = "Proceedings of the OOPSLA '98 Workshop on the Formal
Underpinnings of Java",
year = "1998",
city = "Vancouver",
month = oct,
howpublisehd = "Kestrel Institute Technical Report KES.U.98.5, August 1998"
}
@InProceedings{CCS98*49,
author = "Allen Goldberg",
title = "A Specification of {Java} Loading and Bytecode
Verification",
pages = "49--58",
ISBN = "1-58113-007-4",
booktitle = "Proceedings of the 5th {ACM} Conference on Computer
and Communications Security ({CCS}-98)",
month = nov # "~3--5",
publisher = "ACM Press",
address = "New York",
year = "1998",
abstract = "This paper gives a mathematical specification the Java
Virtual Machine (JVM) bytecode verifier. The
specification is an axiomatic description of the
verifier that makes precise subtle aspects of the JVM
semantics and the verifier. We focus on the use of data
flow analysis to verify type-correctness and the use of
typing contexts to insure global type consistency in
the context of an arbitrary strategy for dynamic class
loading. The specification types interfaces with
sufficient accuracy to eliminate run-time type checks.
Our approach is to specify a generic dataflow
architecture and formalize the JVM verifier as an
instance of this architecture. The emphasis in this
paper is on readability of the specification and
mathematical clarity. The specification given is
consistent with the descriptions in the Lindholm?s and
Yellin?s The Java™ Virtual Machine Specification.
It less committed to certain implementation choices
than Sun?s version 1.1 implementation. In particular,
the specification does not commit an implementation to
any loading strategy, and detects all type errors as
early as possible.",
url = "ftp://ftp.kestrel.edu/pub/papers/goldberg/Bytecode.pdf",
}
@unpublished{Jones:annotationSemantics:1997,
author = "Joel Jones",
year = "1997",
month = "November",
title = "Annotating {Java} Class Files for Performance",
URL = "http://mtdoom.cs.uiuc.edu/AnnotationSemantics.ps",
note = "http://mtdoom.cs.uiuc.edu/AnnotationSemantics.ps",
}