blob: 17a2dd68ea817a7d8ea199a5967dcb14aed7c82c [file] [log] [blame]
# -*- python -*-
## Copyright (C) 2005, 2006, 2008 Free Software Foundation
## Written by Gary Benson <gbenson@redhat.com>
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
import classfile
import copy
# The md5 module is deprecated in Python 2.5
try:
from hashlib import md5
except ImportError:
from md5 import md5
import operator
import os
import sys
import cStringIO as StringIO
import zipfile
PATHS = {"make": "@MAKE@",
"gcj": "@prefix@/bin/gcj@gcc_suffix@",
"dbtool": "@prefix@/bin/gcj-dbtool@gcc_suffix@"}
MAKEFLAGS = []
GCJFLAGS = ["-fPIC", "-findirect-dispatch", "-fjni"]
LDFLAGS = ["-Wl,-Bsymbolic"]
MAX_CLASSES_PER_JAR = 1024
MAX_BYTES_PER_JAR = 1048576
MAKEFILE = "Makefile"
MAKEFILE_HEADER = '''\
GCJ = %(gcj)s
DBTOOL = %(dbtool)s
GCJFLAGS = %(gcjflags)s
LDFLAGS = %(ldflags)s
%%.o: %%.jar
$(GCJ) -c $(GCJFLAGS) $< -o $@
TARGETS = \\
%(targets)s
all: $(TARGETS)'''
MAKEFILE_JOB = '''
%(base)s_SOURCES = \\
%(jars)s
%(base)s_OBJECTS = \\
$(%(base)s_SOURCES:.jar=.o)
%(dso)s: $(%(base)s_OBJECTS)
$(GCJ) -shared $(GCJFLAGS) $(LDFLAGS) $^ -o $@
%(db)s: $(%(base)s_SOURCES)
$(DBTOOL) -n $@ 64
for jar in $^; do \\
$(DBTOOL) -f $@ $$jar \\
%(libdir)s/%(dso)s; \\
done'''
ZIPMAGIC, CLASSMAGIC = "PK\x03\x04", "\xca\xfe\xba\xbe"
class Error(Exception):
pass
class Compiler:
def __init__(self, srcdir, libdir, prefix = None):
self.srcdir = os.path.abspath(srcdir)
self.libdir = os.path.abspath(libdir)
if prefix is None:
self.dstdir = self.libdir
else:
self.dstdir = os.path.join(prefix, self.libdir.lstrip(os.sep))
# Calling code may modify these parameters
self.gcjflags = copy.copy(GCJFLAGS)
self.ldflags = copy.copy(LDFLAGS)
self.makeflags = copy.copy(MAKEFLAGS)
self.exclusions = []
def compile(self):
"""Search srcdir for classes and jarfiles, then generate
solibs and mappings databases for them all in libdir."""
if not os.path.isdir(self.dstdir):
os.makedirs(self.dstdir)
oldcwd = os.getcwd()
os.chdir(self.dstdir)
try:
jobs = self.getJobList()
if not jobs:
raise Error, "nothing to do"
self.writeMakefile(MAKEFILE, jobs)
for job in jobs:
job.writeJars()
system([PATHS["make"]] + self.makeflags)
for job in jobs:
job.clean()
os.unlink(MAKEFILE)
finally:
os.chdir(oldcwd)
def getJobList(self):
"""Return all jarfiles and class collections in srcdir."""
jobs = weed_jobs(find_jobs(self.srcdir, self.exclusions))
set_basenames(jobs)
return jobs
def writeMakefile(self, path, jobs):
"""Generate a makefile to build the solibs and mappings
databases for the specified list of jobs."""
fp = open(path, "w")
print >>fp, MAKEFILE_HEADER % {
"gcj": PATHS["gcj"],
"dbtool": PATHS["dbtool"],
"gcjflags": " ".join(self.gcjflags),
"ldflags": " ".join(self.ldflags),
"targets": " \\\n".join(reduce(operator.add, [
(job.dsoName(), job.dbName()) for job in jobs]))}
for job in jobs:
values = job.ruleArguments()
values["libdir"] = self.libdir
print >>fp, MAKEFILE_JOB % values
fp.close()
def find_jobs(dir, exclusions = ()):
"""Scan a directory and find things to compile: jarfiles (zips,
wars, ears, rars, etc: we go by magic rather than file extension)
and directories of classes."""
def visit((classes, zips), dir, items):
for item in items:
path = os.path.join(dir, item)
if os.path.islink(path) or not os.path.isfile(path):
continue
magic = open(path, "r").read(4)
if magic == ZIPMAGIC:
zips.append(path)
elif magic == CLASSMAGIC:
classes.append(path)
classes, paths = [], []
os.path.walk(dir, visit, (classes, paths))
# Convert the list of classes into a list of directories
while classes:
# XXX this requires the class to be correctly located in its heirachy.
path = classes[0][:-len(os.sep + classname(classes[0]) + ".class")]
paths.append(path)
classes = [cls for cls in classes if not cls.startswith(path)]
# Handle exclusions. We're really strict about them because the
# option is temporary in aot-compile-rpm and dead options left in
# specfiles will hinder its removal.
for path in exclusions:
if path in paths:
paths.remove(path)
else:
raise Error, "%s: path does not exist or is not a job" % path
# Build the list of jobs
jobs = []
paths.sort()
for path in paths:
if os.path.isfile(path):
job = JarJob(path)
else:
job = DirJob(path)
if len(job.classes):
jobs.append(job)
return jobs
class Job:
"""A collection of classes that will be compiled as a unit."""
def __init__(self, path):
self.path, self.classes, self.blocks = path, {}, None
self.classnames = {}
def addClass(self, bytes, name):
"""Subclasses call this from their __init__ method for
every class they find."""
digest = md5(bytes).digest()
self.classes[digest] = bytes
self.classnames[digest] = name
def __makeBlocks(self):
"""Split self.classes into chunks that can be compiled to
native code by gcj. In the majority of cases this is not
necessary -- the job will have come from a jarfile which will
be equivalent to the one we generate -- but this only happens
_if_ the job was a jarfile and _if_ the jarfile isn't too big
and _if_ the jarfile has the correct extension and _if_ all
classes are correctly named and _if_ the jarfile has no
embedded jarfiles. Fitting a special case around all these
conditions is tricky to say the least.
Note that this could be called at the end of each subclass's
__init__ method. The reason this is not done is because we
need to parse every class file. This is slow, and unnecessary
if the job is subsetted."""
names = {}
for hash, bytes in self.classes.items():
try:
name = classname(bytes)
except:
warn("job %s: class %s malformed or not a valid class file" \
% (self.path, self.classnames[hash]))
raise
if not names.has_key(name):
names[name] = []
names[name].append(hash)
names = names.items()
# We have to sort somehow, or the jars we generate
# We sort by name in a simplistic attempt to keep related
# classes together so inter-class optimisation can happen.
names.sort()
self.blocks, bytes = [[]], 0
for name, hashes in names:
for hash in hashes:
if len(self.blocks[-1]) >= MAX_CLASSES_PER_JAR \
or bytes >= MAX_BYTES_PER_JAR:
self.blocks.append([])
bytes = 0
self.blocks[-1].append((name, hash))
bytes += len(self.classes[hash])
# From Archit Shah:
# The implementation and the documentation don't seem to match.
#
# [a, b].isSubsetOf([a]) => True
#
# Identical copies of all classes this collection do not exist
# in the other. I think the method should be named isSupersetOf
# and the documentation should swap uses of "this" and "other"
#
# XXX think about this when I've had more sleep...
def isSubsetOf(self, other):
"""Returns True if identical copies of all classes in this
collection exist in the other."""
for item in other.classes.keys():
if not self.classes.has_key(item):
return False
return True
def __targetName(self, ext):
return self.basename + ext
def tempJarName(self, num):
return self.__targetName(".%d.jar" % (num + 1))
def tempObjName(self, num):
return self.__targetName(".%d.o" % (num + 1))
def dsoName(self):
"""Return the filename of the shared library that will be
built from this job."""
return self.__targetName(".so")
def dbName(self):
"""Return the filename of the mapping database that will be
built from this job."""
return self.__targetName(".db")
def ruleArguments(self):
"""Return a dictionary of values that when substituted
into MAKEFILE_JOB will create the rules required to build
the shared library and mapping database for this job."""
if self.blocks is None:
self.__makeBlocks()
return {
"base": "".join(
[c.isalnum() and c or "_" for c in self.dsoName()]),
"jars": " \\\n".join(
[self.tempJarName(i) for i in xrange(len(self.blocks))]),
"dso": self.dsoName(),
"db": self.dbName()}
def writeJars(self):
"""Generate jarfiles that can be native compiled by gcj."""
if self.blocks is None:
self.__makeBlocks()
for block, i in zip(self.blocks, xrange(len(self.blocks))):
jar = zipfile.ZipFile(self.tempJarName(i), "w", zipfile.ZIP_STORED)
for name, hash in block:
jar.writestr(
zipfile.ZipInfo("%s.class" % name), self.classes[hash])
jar.close()
def clean(self):
"""Delete all temporary files created during this job's build."""
if self.blocks is None:
self.__makeBlocks()
for i in xrange(len(self.blocks)):
os.unlink(self.tempJarName(i))
os.unlink(self.tempObjName(i))
class JarJob(Job):
"""A Job whose origin was a jarfile."""
def __init__(self, path):
Job.__init__(self, path)
self._walk(zipfile.ZipFile(path, "r"))
def _walk(self, zf):
for name in zf.namelist():
bytes = zf.read(name)
if bytes.startswith(ZIPMAGIC):
self._walk(zipfile.ZipFile(StringIO.StringIO(bytes)))
elif bytes.startswith(CLASSMAGIC):
self.addClass(bytes, name)
class DirJob(Job):
"""A Job whose origin was a directory of classfiles."""
def __init__(self, path):
Job.__init__(self, path)
os.path.walk(path, DirJob._visit, self)
def _visit(self, dir, items):
for item in items:
path = os.path.join(dir, item)
if os.path.islink(path) or not os.path.isfile(path):
continue
fp = open(path, "r")
magic = fp.read(4)
if magic == CLASSMAGIC:
self.addClass(magic + fp.read(), name)
def weed_jobs(jobs):
"""Remove any jarfiles that are completely contained within
another. This is more common than you'd think, and we only
need one nativified copy of each class after all."""
jobs = copy.copy(jobs)
while True:
for job1 in jobs:
for job2 in jobs:
if job1 is job2:
continue
if job1.isSubsetOf(job2):
msg = "subsetted %s" % job2.path
if job2.isSubsetOf(job1):
if (isinstance(job1, DirJob) and
isinstance(job2, JarJob)):
# In the braindead case where a package
# contains an expanded copy of a jarfile
# the jarfile takes precedence.
continue
msg += " (identical)"
warn(msg)
jobs.remove(job2)
break
else:
continue
break
else:
break
continue
return jobs
def set_basenames(jobs):
"""Ensure that each jarfile has a different basename."""
names = {}
for job in jobs:
name = os.path.basename(job.path)
if not names.has_key(name):
names[name] = []
names[name].append(job)
for name, set in names.items():
if len(set) == 1:
set[0].basename = name
continue
# prefix the jar filenames to make them unique
# XXX will not work in most cases -- needs generalising
set = [(job.path.split(os.sep), job) for job in set]
minlen = min([len(bits) for bits, job in set])
set = [(bits[-minlen:], job) for bits, job in set]
bits = apply(zip, [bits for bits, job in set])
while True:
row = bits[-2]
for bit in row[1:]:
if bit != row[0]:
break
else:
del bits[-2]
continue
break
set = zip(
["_".join(name) for name in apply(zip, bits[-2:])],
[job for bits, job in set])
for name, job in set:
warn("building %s as %s" % (job.path, name))
job.basename = name
# XXX keep this check until we're properly general
names = {}
for job in jobs:
name = job.basename
if names.has_key(name):
raise Error, "%s: duplicate jobname" % name
names[name] = 1
def system(command):
"""Execute a command."""
status = os.spawnv(os.P_WAIT, command[0], command)
if status > 0:
raise Error, "%s exited with code %d" % (command[0], status)
elif status < 0:
raise Error, "%s killed by signal %d" % (command[0], -status)
def warn(msg):
"""Print a warning message."""
print >>sys.stderr, "%s: warning: %s" % (
os.path.basename(sys.argv[0]), msg)
def classname(bytes):
"""Extract the class name from the bytes of a class file."""
klass = classfile.Class(bytes)
return klass.constants[klass.constants[klass.name][1]][1]