blob: 2b2bb11d2e617d60eb84a20be08f6543c47db384 [file] [log] [blame]
#!/usr/bin/env python3
#
# Copyright (C) 2013-2020 Free Software Foundation, Inc.
#
# This script is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
# This script adjusts the copyright notices at the top of source files
# so that they have the form:
#
# Copyright XXXX-YYYY Free Software Foundation, Inc.
#
# It doesn't change code that is known to be maintained elsewhere or
# that carries a non-FSF copyright.
#
# The script also doesn't change testsuite files, except those in
# libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite,
# while most tests in other directories are just things that failed at some
# point in the past.
#
# Pass --this-year to the script if you want it to add the current year
# to all applicable notices. Pass --quilt if you are using quilt and
# want files to be added to the quilt before being changed.
#
# By default the script will update all directories for which the
# output has been vetted. You can instead pass the names of individual
# directories, including those that haven't been approved. So:
#
# update-copyright.py --this-year
#
# is the command that would be used at the beginning of a year to update
# all copyright notices (and possibly at other times to check whether
# new files have been added with old years). On the other hand:
#
# update-copyright.py --this-year libitm
#
# would run the script on just libitm/.
#
# Note that things like --version output strings must be updated before
# this script is run. There's already a separate procedure for that.
import os
import re
import sys
import time
import subprocess
class Errors:
def __init__ (self):
self.num_errors = 0
def report (self, filename, string):
if filename:
string = filename + ': ' + string
sys.stderr.write (string + '\n')
self.num_errors += 1
def ok (self):
return self.num_errors == 0
class GenericFilter:
def __init__ (self):
self.skip_files = set()
self.skip_dirs = set()
self.skip_extensions = set([
'.png',
'.pyc',
])
self.fossilised_files = set()
self.own_files = set()
self.skip_files |= set ([
# Skip licence files.
'COPYING',
'COPYING.LIB',
'COPYING3',
'COPYING3.LIB',
'LICENSE',
'LICENSE.txt',
'fdl.texi',
'gpl_v3.texi',
'fdl-1.3.xml',
'gpl-3.0.xml',
# Skip auto- and libtool-related files
'aclocal.m4',
'compile',
'config.guess',
'config.sub',
'depcomp',
'install-sh',
'libtool.m4',
'ltmain.sh',
'ltoptions.m4',
'ltsugar.m4',
'ltversion.m4',
'lt~obsolete.m4',
'missing',
'mkdep',
'mkinstalldirs',
'move-if-change',
'shlibpath.m4',
'symlink-tree',
'ylwrap',
# Skip FSF mission statement, etc.
'gnu.texi',
'funding.texi',
'appendix_free.xml',
# Skip imported texinfo files.
'texinfo.tex',
])
def get_line_filter (self, dir, filename):
if filename.startswith ('ChangeLog'):
# Ignore references to copyright in changelog entries.
return re.compile ('\t')
return None
def skip_file (self, dir, filename):
if filename in self.skip_files:
return True
(base, extension) = os.path.splitext (os.path.join (dir, filename))
if extension in self.skip_extensions:
return True
if extension == '.in':
# Skip .in files produced by automake.
if os.path.exists (base + '.am'):
return True
# Skip files produced by autogen
if (os.path.exists (base + '.def')
and os.path.exists (base + '.tpl')):
return True
# Skip configure files produced by autoconf
if filename == 'configure':
if os.path.exists (base + '.ac'):
return True
if os.path.exists (base + '.in'):
return True
return False
def skip_dir (self, dir, subdir):
return subdir in self.skip_dirs
def is_fossilised_file (self, dir, filename):
if filename in self.fossilised_files:
return True
# Only touch current current ChangeLogs.
if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
return True
return False
def by_package_author (self, dir, filename):
return filename in self.own_files
class Copyright:
def __init__ (self, errors):
self.errors = errors
# Characters in a range of years. Include '.' for typos.
ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
# Non-whitespace characters in a copyright holder's name.
name = '[\w.,-]'
# Matches one year.
self.year_re = re.compile ('[0-9]+')
# Matches part of a year or copyright holder.
self.continuation_re = re.compile (ranges + '|' + name)
# Matches a full copyright notice:
self.copyright_re = re.compile (
# 1: 'Copyright (C)', etc.
'([Cc]opyright'
'|[Cc]opyright\s+\([Cc]\)'
'|[Cc]opyright\s+%s'
'|[Cc]opyright\s+©'
'|[Cc]opyright\s+@copyright{}'
'|copyright = u\''
'|@set\s+copyright[\w-]+)'
# 2: the years. Include the whitespace in the year, so that
# we can remove any excess.
'(\s*(?:' + ranges + ',?'
'|@value\{[^{}]*\})\s*)'
# 3: 'by ', if used
'(by\s+)?'
# 4: the copyright holder. Don't allow multiple consecutive
# spaces, so that right-margin gloss doesn't get caught
# (e.g. gnat_ugn.texi).
'(' + name + '(?:\s?' + name + ')*)?')
# A regexp for notices that might have slipped by. Just matching
# 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
# HTML header markers, so check for 'copyright' and two digits.
self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
re.IGNORECASE)
self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
self.holders = { '@copying': '@copying' }
self.holder_prefixes = set()
# True to 'quilt add' files before changing them.
self.use_quilt = False
# If set, force all notices to include this year.
self.max_year = None
# Goes after the year(s). Could be ', '.
self.separator = ' '
def add_package_author (self, holder, canon_form = None):
if not canon_form:
canon_form = holder
self.holders[holder] = canon_form
index = holder.find (' ')
while index >= 0:
self.holder_prefixes.add (holder[:index])
index = holder.find (' ', index + 1)
def add_external_author (self, holder):
self.holders[holder] = None
class BadYear():
def __init__ (self, year):
self.year = year
def __str__ (self):
return 'unrecognised year: ' + self.year
def parse_year (self, string):
year = int (string)
if len (string) == 2:
if year > 70:
return year + 1900
elif len (string) == 4:
return year
raise self.BadYear (string)
def year_range (self, years):
year_list = [self.parse_year (year)
for year in self.year_re.findall (years)]
assert len (year_list) > 0
return (min (year_list), max (year_list))
def set_use_quilt (self, use_quilt):
self.use_quilt = use_quilt
def include_year (self, year):
assert not self.max_year
self.max_year = year
def canonicalise_years (self, dir, filename, filter, years):
# Leave texinfo variables alone.
if years.startswith ('@value'):
return years
(min_year, max_year) = self.year_range (years)
# Update the upper bound, if enabled.
if self.max_year and not filter.is_fossilised_file (dir, filename):
max_year = max (max_year, self.max_year)
# Use a range.
if min_year == max_year:
return '%d' % min_year
else:
return '%d-%d' % (min_year, max_year)
def strip_continuation (self, line):
line = line.lstrip()
match = self.comment_re.match (line)
if match:
line = line[match.end():].lstrip()
return line
def is_complete (self, match):
holder = match.group (4)
return (holder
and (holder not in self.holder_prefixes
or holder in self.holders))
def update_copyright (self, dir, filename, filter, file, line, match):
orig_line = line
next_line = None
pathname = os.path.join (dir, filename)
intro = match.group (1)
if intro.startswith ('@set'):
# Texinfo year variables should always be on one line
after_years = line[match.end (2):].strip()
if after_years != '':
self.errors.report (pathname,
'trailing characters in @set: '
+ after_years)
return (False, orig_line, next_line)
else:
# If it looks like the copyright is incomplete, add the next line.
while not self.is_complete (match):
try:
next_line = file.readline()
except StopIteration:
break
# If the next line doesn't look like a proper continuation,
# assume that what we've got is complete.
continuation = self.strip_continuation (next_line)
if not self.continuation_re.match (continuation):
break
# Merge the lines for matching purposes.
orig_line += next_line
line = line.rstrip() + ' ' + continuation
next_line = None
# Rematch with the longer line, at the original position.
match = self.copyright_re.match (line, match.start())
assert match
holder = match.group (4)
# Use the filter to test cases where markup is getting in the way.
if filter.by_package_author (dir, filename):
assert holder not in self.holders
elif not holder:
self.errors.report (pathname, 'missing copyright holder')
return (False, orig_line, next_line)
elif holder not in self.holders:
self.errors.report (pathname,
'unrecognised copyright holder: ' + holder)
return (False, orig_line, next_line)
else:
# See whether the copyright is associated with the package
# author.
canon_form = self.holders[holder]
if not canon_form:
return (False, orig_line, next_line)
# Make sure the author is given in a consistent way.
line = (line[:match.start (4)]
+ canon_form
+ line[match.end (4):])
# Remove any 'by'
line = line[:match.start (3)] + line[match.end (3):]
# Update the copyright years.
years = match.group (2).strip()
try:
canon_form = self.canonicalise_years (dir, filename, filter, years)
except self.BadYear as e:
self.errors.report (pathname, str (e))
return (False, orig_line, next_line)
line = (line[:match.start (2)]
+ ('' if intro.startswith ('copyright = ') else ' ')
+ canon_form + self.separator
+ line[match.end (2):])
# Use the standard (C) form.
if intro.endswith ('right'):
intro += ' (C)'
elif intro.endswith ('(c)'):
intro = intro[:-3] + '(C)'
line = line[:match.start (1)] + intro + line[match.end (1):]
# Strip trailing whitespace
line = line.rstrip() + '\n'
return (line != orig_line, line, next_line)
def guess_encoding (self, pathname):
for encoding in ('utf8', 'iso8859'):
try:
open(pathname, 'r', encoding=encoding).read()
return encoding
except UnicodeDecodeError:
pass
return None
def process_file (self, dir, filename, filter):
pathname = os.path.join (dir, filename)
if filename.endswith ('.tmp'):
# Looks like something we tried to create before.
try:
os.remove (pathname)
except OSError:
pass
return
lines = []
changed = False
line_filter = filter.get_line_filter (dir, filename)
mode = None
encoding = self.guess_encoding(pathname)
with open (pathname, 'r', encoding=encoding) as file:
prev = None
mode = os.fstat (file.fileno()).st_mode
for line in file:
while line:
next_line = None
# Leave filtered-out lines alone.
if not (line_filter and line_filter.match (line)):
match = self.copyright_re.search (line)
if match:
res = self.update_copyright (dir, filename, filter,
file, line, match)
(this_changed, line, next_line) = res
changed = changed or this_changed
# Check for copyright lines that might have slipped by.
elif self.other_copyright_re.search (line):
self.errors.report (pathname,
'unrecognised copyright: %s'
% line.strip())
lines.append (line)
line = next_line
# If something changed, write the new file out.
if changed and self.errors.ok():
tmp_pathname = pathname + '.tmp'
with open (tmp_pathname, 'w', encoding=encoding) as file:
for line in lines:
file.write (line)
os.fchmod (file.fileno(), mode)
if self.use_quilt:
subprocess.call (['quilt', 'add', pathname])
os.rename (tmp_pathname, pathname)
def process_tree (self, tree, filter):
for (dir, subdirs, filenames) in os.walk (tree):
# Don't recurse through directories that should be skipped.
for i in range (len (subdirs) - 1, -1, -1):
if filter.skip_dir (dir, subdirs[i]):
del subdirs[i]
# Handle the files in this directory.
for filename in filenames:
if filter.skip_file (dir, filename):
sys.stdout.write ('Skipping %s\n'
% os.path.join (dir, filename))
else:
self.process_file (dir, filename, filter)
class CmdLine:
def __init__ (self, copyright = Copyright):
self.errors = Errors()
self.copyright = copyright (self.errors)
self.dirs = []
self.default_dirs = []
self.chosen_dirs = []
self.option_handlers = dict()
self.option_help = []
self.add_option ('--help', 'Print this help', self.o_help)
self.add_option ('--quilt', '"quilt add" files before changing them',
self.o_quilt)
self.add_option ('--this-year', 'Add the current year to every notice',
self.o_this_year)
def add_option (self, name, help, handler):
self.option_help.append ((name, help))
self.option_handlers[name] = handler
def add_dir (self, dir, filter = GenericFilter()):
self.dirs.append ((dir, filter))
def o_help (self, option = None):
sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
'Options:\n' % sys.argv[0])
format = '%-15s %s\n'
for (what, help) in self.option_help:
sys.stdout.write (format % (what, help))
sys.stdout.write ('\nDirectories:\n')
format = '%-25s'
i = 0
for (dir, filter) in self.dirs:
i += 1
if i % 3 == 0 or i == len (self.dirs):
sys.stdout.write (dir + '\n')
else:
sys.stdout.write (format % dir)
sys.exit (0)
def o_quilt (self, option):
self.copyright.set_use_quilt (True)
def o_this_year (self, option):
self.copyright.include_year (time.localtime().tm_year)
def main (self):
for arg in sys.argv[1:]:
if arg[:1] != '-':
self.chosen_dirs.append (arg)
elif arg in self.option_handlers:
self.option_handlers[arg] (arg)
else:
self.errors.report (None, 'unrecognised option: ' + arg)
if self.errors.ok():
if len (self.chosen_dirs) == 0:
self.chosen_dirs = self.default_dirs
if len (self.chosen_dirs) == 0:
self.o_help()
else:
for chosen_dir in self.chosen_dirs:
canon_dir = os.path.join (chosen_dir, '')
count = 0
for (dir, filter) in self.dirs:
if (dir + os.sep).startswith (canon_dir):
count += 1
self.copyright.process_tree (dir, filter)
if count == 0:
self.errors.report (None, 'unrecognised directory: '
+ chosen_dir)
sys.exit (0 if self.errors.ok() else 1)
#----------------------------------------------------------------------------
class TopLevelFilter (GenericFilter):
def skip_dir (self, dir, subdir):
return True
class ConfigFilter (GenericFilter):
def __init__ (self):
GenericFilter.__init__ (self)
def skip_file (self, dir, filename):
if filename.endswith ('.m4'):
pathname = os.path.join (dir, filename)
with open (pathname) as file:
# Skip files imported from gettext.
if file.readline().find ('gettext-') >= 0:
return True
return GenericFilter.skip_file (self, dir, filename)
class GCCFilter (GenericFilter):
def __init__ (self):
GenericFilter.__init__ (self)
self.skip_files |= set ([
# Not part of GCC
'math-68881.h',
])
self.skip_dirs |= set ([
# Better not create a merge nightmare for the GNAT folks.
'ada',
# Handled separately.
'testsuite',
])
self.skip_extensions |= set ([
# Maintained by the translation project.
'.po',
# Automatically-generated.
'.pot',
])
self.fossilised_files |= set ([
# Old news won't be updated.
'ONEWS',
])
class TestsuiteFilter (GenericFilter):
def __init__ (self):
GenericFilter.__init__ (self)
self.skip_extensions |= set ([
# Don't change the tests, which could be woend by anyone.
'.c',
'.C',
'.cc',
'.d',
'.h',
'.hs',
'.f',
'.f90',
'.go',
'.inc',
'.java',
])
def skip_file (self, dir, filename):
# g++.niklas/README contains historical copyright information
# and isn't updated.
if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
return True
# Similarly params/README.
if filename == 'README' and os.path.basename (dir) == 'params':
return True
if filename == 'pdt_5.f03' and os.path.basename (dir) == 'gfortran.dg':
return True
return GenericFilter.skip_file (self, dir, filename)
class LibCppFilter (GenericFilter):
def __init__ (self):
GenericFilter.__init__ (self)
self.skip_extensions |= set ([
# Maintained by the translation project.
'.po',
# Automatically-generated.
'.pot',
])
class LibGCCFilter (GenericFilter):
def __init__ (self):
GenericFilter.__init__ (self)
self.skip_dirs |= set ([
# Imported from GLIBC.
'soft-fp',
])
class LibPhobosFilter (GenericFilter):
def __init__ (self):
GenericFilter.__init__ (self)
self.skip_files |= set ([
# Source module imported from upstream.
'object.d',
])
self.skip_dirs |= set ([
# Contains sources imported from upstream.
'core',
'etc',
'gc',
'gcstub',
'rt',
'std',
])
class LibStdCxxFilter (GenericFilter):
def __init__ (self):
GenericFilter.__init__ (self)
self.skip_files |= set ([
# Contains no copyright of its own, but quotes the GPL.
'intro.xml',
])
self.skip_dirs |= set ([
# Contains automatically-generated sources.
'html',
# The testsuite data files shouldn't be changed.
'data',
# Contains imported images
'images',
])
self.own_files |= set ([
# Contains markup around the copyright owner.
'spine.xml',
])
def get_line_filter (self, dir, filename):
if filename == 'boost_concept_check.h':
return re.compile ('// \(C\) Copyright Jeremy Siek')
return GenericFilter.get_line_filter (self, dir, filename)
class GCCCopyright (Copyright):
def __init__ (self, errors):
Copyright.__init__ (self, errors)
canon_fsf = 'Free Software Foundation, Inc.'
self.add_package_author ('Free Software Foundation', canon_fsf)
self.add_package_author ('Free Software Foundation.', canon_fsf)
self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
self.add_package_author ('The Free Software Foundation', canon_fsf)
self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
self.add_package_author ('Software Foundation, Inc.', canon_fsf)
self.add_external_author ('ARM')
self.add_external_author ('AdaCore')
self.add_external_author ('Advanced Micro Devices Inc.')
self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
self.add_external_author ('Cavium Networks.')
self.add_external_author ('Faraday Technology Corp.')
self.add_external_author ('Florida State University')
self.add_external_author ('Gerard Jungman')
self.add_external_author ('Greg Colvin and Beman Dawes.')
self.add_external_author ('Hewlett-Packard Company')
self.add_external_author ('Intel Corporation')
self.add_external_author ('Information Technology Industry Council.')
self.add_external_author ('James Theiler, Brian Gough')
self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
self.add_external_author ('Mentor Graphics Corporation')
self.add_external_author ('National Research Council of Canada.')
self.add_external_author ('NVIDIA Corporation')
self.add_external_author ('Peter Dimov and Multi Media Ltd.')
self.add_external_author ('Peter Dimov')
self.add_external_author ('Pipeline Associates, Inc.')
self.add_external_author ('Regents of the University of California.')
self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
self.add_external_author ('Silicon Graphics')
self.add_external_author ('Stephen L. Moshier')
self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
self.add_external_author ('The D Language Foundation, All Rights Reserved')
self.add_external_author ('The Go Authors. All rights reserved.')
self.add_external_author ('The Go Authors. All rights reserved.')
self.add_external_author ('The Go Authors.')
self.add_external_author ('The Regents of the University of California.')
self.add_external_author ('Ulf Adams')
self.add_external_author ('Unicode, Inc.')
self.add_external_author ('University of Toronto.')
self.add_external_author ('Yoshinori Sato')
class GCCCmdLine (CmdLine):
def __init__ (self):
CmdLine.__init__ (self, GCCCopyright)
self.add_dir ('.', TopLevelFilter())
# boehm-gc is imported from upstream.
self.add_dir ('c++tools')
self.add_dir ('config', ConfigFilter())
# contrib isn't really part of GCC.
self.add_dir ('fixincludes')
self.add_dir ('gcc', GCCFilter())
self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
self.add_dir ('gnattools')
self.add_dir ('gotools')
self.add_dir ('include')
# intl is imported from upstream.
self.add_dir ('libada')
self.add_dir ('libatomic')
self.add_dir ('libbacktrace')
self.add_dir ('libcc1')
self.add_dir ('libcpp', LibCppFilter())
self.add_dir ('libdecnumber')
# libffi is imported from upstream.
self.add_dir ('libgcc', LibGCCFilter())
self.add_dir ('libgfortran')
# libgo is imported from upstream.
self.add_dir ('libgomp')
self.add_dir ('libiberty')
self.add_dir ('libitm')
self.add_dir ('libobjc')
# liboffloadmic is imported from upstream.
self.add_dir ('libphobos', LibPhobosFilter())
self.add_dir ('libquadmath')
# libsanitizer is imported from upstream.
self.add_dir ('libssp')
self.add_dir ('libstdc++-v3', LibStdCxxFilter())
self.add_dir ('libvtv')
self.add_dir ('lto-plugin')
# maintainer-scripts maintainer-scripts
# zlib is imported from upstream.
self.default_dirs = [
'c++tools',
'gcc',
'include',
'libada',
'libatomic',
'libbacktrace',
'libcc1',
'libcpp',
'libdecnumber',
'libgcc',
'libgfortran',
'libgomp',
'libiberty',
'libitm',
'libobjc',
'libphobos',
'libssp',
'libstdc++-v3',
'libvtv',
'lto-plugin',
]
GCCCmdLine().main()