blob: 710afa14442be396662da94fb52789bf8bbb4890 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright (C) 2025 Free Software Foundation, Inc.
#
# This file is part of GDB.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This program requires the python modules GitPython (git) and scancode-toolkit.
# It builds a list of all the newly added files to the repository and scans
# each file for a license, printing it to the terminal. If "--skip" is used,
# it will only output non-"common" licenses, e.g., omitting "GPL-3.0-or-later".
# This makes it a little bit easier to detect any possible new licenses.
#
# Example:
# bash$ cd /path/to/binutils-gdb/gdb
# bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint
# Scanning directories gdb*/...
# gdb/contrib/common-misspellings.txt: no longer in repo?
# gdb/contrib/spellcheck.sh: no longer in repo?
# gdbsupport/unordered_dense.h: MIT
import os
import sys
import argparse
from pathlib import PurePath
from git import Repo
from scancode import api
# A list of "common" licenses. If "--skip" is used, any file
# with a license in this list will be omitted from the output.
COMMON_LICENSES = ["GPL-2.0-or-later", "GPL-3.0-or-later"]
# Default list of directories to scan. Default scans are limited to
# gdb-specific git directories because much of the rest of binutils-gdb
# is actually owned by other projects/packages.
DEFAULT_SCAN_DIRS = "gdb*"
# Get the commit object associated with the string commit CSTR
# from the git repository REPO.
#
# Returns the object or prints an error and exits.
def get_commit(repo, cstr):
try:
return repo.commit(cstr)
except:
print(f'unknown commit "{cstr}"')
sys.exit(2)
# Uses scancode-toolkit package to scan FILE's licenses.
# Returns the full license dict from scancode on success or
# propagates any exceptions.
def get_licenses_for_file(file):
return api.get_licenses(file)
# Helper function to print FILE to the terminal if skipping
# common licenses.
def skip_print_file(skip, file):
if skip:
print(f"{file}: ", end="")
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument("from_commit")
parser.add_argument("to_commit")
parser.add_argument(
"-s", "--skip", help="skip common licenses in output", action="store_true"
)
parser.add_argument(
"-p",
"--paths",
help=f'paths to scan (default is "{DEFAULT_SCAN_DIRS}")',
type=str,
default=DEFAULT_SCAN_DIRS,
)
args = parser.parse_args()
# Commit boundaries to search for new files
from_commit = args.from_commit
to_commit = args.to_commit
# Get the list of new files from git. Try the current directory,
# looping up to the root attempting to find a valid git repository.
path = PurePath(os.getcwd())
paths = list(path.parents)
paths.insert(0, path)
for dir in paths:
try:
repo = Repo(dir)
break
except:
pass
if dir == path.parents[-1]:
print(f'not a git repository (or any parent up to mount point "{dir}")')
sys.exit(2)
# Get from/to commits
fc = get_commit(repo, from_commit)
tc = get_commit(repo, to_commit)
# Loop over new files
paths = [str(dir) for dir in args.paths.split(",")]
print(f'Scanning directories {",".join(f"{s}/" for s in paths)}...')
for file in fc.diff(tc, paths=paths).iter_change_type("A"):
filename = file.a_path
if not args.skip:
print(f"checking licenses for {filename}... ", end="", flush=True)
try:
f = dir.joinpath(dir, filename).as_posix()
lic = get_licenses_for_file(f)
if len(lic["license_clues"]) > 1:
print("multiple licenses detected")
elif (
not args.skip
or lic["detected_license_expression_spdx"] not in COMMON_LICENSES
):
skip_print_file(args.skip, filename)
print(f"{lic['detected_license_expression_spdx']}")
except OSError:
# Likely hit a file that was added to the repo and subsequently removed.
skip_print_file(args.skip, filename)
print("no longer in repo?")
except KeyboardInterrupt:
print("interrupted")
break
except Exception as e:
# If scanning fails, there is little we can do but print an error.
skip_print_file(args.skip, filename)
print(e)
if __name__ == "__main__":
main(sys.argv)