| #!/usr/bin/env python3 |
| |
| # Copyright (C) 2025 Free Software Foundation, Inc. |
| # |
| # This file is part of GDB. |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 3 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| |
| # This program requires the python modules GitPython (git) and scancode-toolkit. |
| # It builds a list of all the newly added files to the repository and scans |
| # each file for a license, printing it to the terminal. If "--skip" is used, |
| # it will only output non-"common" licenses, e.g., omitting "GPL-3.0-or-later". |
| # This makes it a little bit easier to detect any possible new licenses. |
| # |
| # Example: |
| # bash$ cd /path/to/binutils-gdb/gdb |
| # bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint |
| # Scanning directories gdb*/... |
| # gdb/contrib/common-misspellings.txt: no longer in repo? |
| # gdb/contrib/spellcheck.sh: no longer in repo? |
| # gdbsupport/unordered_dense.h: MIT |
| |
| import os |
| import sys |
| import argparse |
| from pathlib import PurePath |
| from git import Repo |
| from scancode import api |
| |
| # A list of "common" licenses. If "--skip" is used, any file |
| # with a license in this list will be omitted from the output. |
| COMMON_LICENSES = ["GPL-2.0-or-later", "GPL-3.0-or-later"] |
| |
| # Default list of directories to scan. Default scans are limited to |
| # gdb-specific git directories because much of the rest of binutils-gdb |
| # is actually owned by other projects/packages. |
| DEFAULT_SCAN_DIRS = "gdb*" |
| |
| |
| # Get the commit object associated with the string commit CSTR |
| # from the git repository REPO. |
| # |
| # Returns the object or prints an error and exits. |
| def get_commit(repo, cstr): |
| try: |
| return repo.commit(cstr) |
| except: |
| print(f'unknown commit "{cstr}"') |
| sys.exit(2) |
| |
| |
| # Uses scancode-toolkit package to scan FILE's licenses. |
| # Returns the full license dict from scancode on success or |
| # propagates any exceptions. |
| def get_licenses_for_file(file): |
| return api.get_licenses(file) |
| |
| |
| # Helper function to print FILE to the terminal if skipping |
| # common licenses. |
| def skip_print_file(skip, file): |
| if skip: |
| print(f"{file}: ", end="") |
| |
| |
| def main(argv): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("from_commit") |
| parser.add_argument("to_commit") |
| parser.add_argument( |
| "-s", "--skip", help="skip common licenses in output", action="store_true" |
| ) |
| parser.add_argument( |
| "-p", |
| "--paths", |
| help=f'paths to scan (default is "{DEFAULT_SCAN_DIRS}")', |
| type=str, |
| default=DEFAULT_SCAN_DIRS, |
| ) |
| args = parser.parse_args() |
| |
| # Commit boundaries to search for new files |
| from_commit = args.from_commit |
| to_commit = args.to_commit |
| |
| # Get the list of new files from git. Try the current directory, |
| # looping up to the root attempting to find a valid git repository. |
| path = PurePath(os.getcwd()) |
| paths = list(path.parents) |
| paths.insert(0, path) |
| for dir in paths: |
| try: |
| repo = Repo(dir) |
| break |
| except: |
| pass |
| |
| if dir == path.parents[-1]: |
| print(f'not a git repository (or any parent up to mount point "{dir}")') |
| sys.exit(2) |
| |
| # Get from/to commits |
| fc = get_commit(repo, from_commit) |
| tc = get_commit(repo, to_commit) |
| |
| # Loop over new files |
| paths = [str(dir) for dir in args.paths.split(",")] |
| print(f'Scanning directories {",".join(f"{s}/" for s in paths)}...') |
| for file in fc.diff(tc, paths=paths).iter_change_type("A"): |
| filename = file.a_path |
| if not args.skip: |
| print(f"checking licenses for {filename}... ", end="", flush=True) |
| try: |
| f = dir.joinpath(dir, filename).as_posix() |
| lic = get_licenses_for_file(f) |
| if len(lic["license_clues"]) > 1: |
| print("multiple licenses detected") |
| elif ( |
| not args.skip |
| or lic["detected_license_expression_spdx"] not in COMMON_LICENSES |
| ): |
| skip_print_file(args.skip, filename) |
| print(f"{lic['detected_license_expression_spdx']}") |
| except OSError: |
| # Likely hit a file that was added to the repo and subsequently removed. |
| skip_print_file(args.skip, filename) |
| print("no longer in repo?") |
| except KeyboardInterrupt: |
| print("interrupted") |
| break |
| except Exception as e: |
| # If scanning fails, there is little we can do but print an error. |
| skip_print_file(args.skip, filename) |
| print(e) |
| |
| |
| if __name__ == "__main__": |
| main(sys.argv) |