diff --git a/AUTHORS.rst b/AUTHORS.rst index 75b0533f921..089f769056e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -55,6 +55,7 @@ The following organizations or individuals have contributed to ScanCode: - Mike Rombout @mrombout - Mrinal Paliwal @mnpw - nexB Inc. @nexB +- Niklas Lingenauber @linge3011 - Nirmal Sarswat @vivonk - Nisha Kumar @nishakm - Nishchith Shetty @inishchith diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d115cda4b80..b4c4813a859 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,8 @@ Changelog Next release -------------- +- Addition of CLI options ``--spdx-json`` to create SPDX output in json format. + https://github.com/aboutcode-org/scancode-toolkit/issues/3698 v3.5.0 - 2026-01-15 ------------------- diff --git a/docs/source/reference/scancode-cli/cli-help-text-options.rst b/docs/source/reference/scancode-cli/cli-help-text-options.rst index a3bfb44f776..7283a6be602 100644 --- a/docs/source/reference/scancode-cli/cli-help-text-options.rst +++ b/docs/source/reference/scancode-cli/cli-help-text-options.rst @@ -97,6 +97,7 @@ The following help text is displayed for ScanCode version 32.0.0: --custom-template FILE Use this Jinja template FILE as a custom template. --cyclonedx FILE Write scan output in CycloneDX JSON format to FILE. --cyclonedx-xml FILE Write scan output in CycloneDX XML format to FILE. + --spdx-json FILE Write scan output as SPDX JSON to FILE. --spdx-rdf FILE Write scan output as SPDX RDF to FILE. --spdx-tv FILE Write scan output as SPDX Tag/Value to FILE. --html-app FILE (DEPRECATED: use the ScanCode Workbench app instead) @@ -434,6 +435,17 @@ for ScanCode Version 32.0.0. help: Write scan output as JSON Lines to FILE. doc: None + -------------------------------------------- + Plugin: scancode_output:spdx-json class: formattedcode.output_spdx:SpdxJsonOutput + codebase_attributes: + resource_attributes: + sort_order: 100 + required_plugins: + options: + help_group: output formats, name: spdx_json: --spdx-json + help: Write scan output as SPDX JSON to FILE. + doc: None + -------------------------------------------- Plugin: scancode_output:spdx-rdf class: formattedcode.output_spdx:SpdxRdfOutput codebase_attributes: diff --git a/docs/source/reference/scancode-cli/cli-output-format-options.rst b/docs/source/reference/scancode-cli/cli-output-format-options.rst index 1f3a4bb852c..0fb5ce05edd 100644 --- a/docs/source/reference/scancode-cli/cli-output-format-options.rst +++ b/docs/source/reference/scancode-cli/cli-output-format-options.rst @@ -325,6 +325,25 @@ Comparing different ``json`` output formats ---- +.. _cli-json-spdx-option: + +``--spdx-json FILE`` +-------------------- + + SPDX JSON output writes a Software Bill of Materials in the SPDX JSON format + using the same scan data model as other SPDX outputs. + + **Example** + + The following code performs a scan on the samples directory, and publishes the results in + ``spdx-json`` format + + .. code-block:: shell + + scancode -clpieu --spdx-json output.spdx.json samples + +---- + .. _cli-rdf-option: ``--spdx-rdf FILE`` diff --git a/docs/source/rst-snippets/cli-output-format-options.rst b/docs/source/rst-snippets/cli-output-format-options.rst index b73f8cfbeed..56ccd64c44d 100644 --- a/docs/source/rst-snippets/cli-output-format-options.rst +++ b/docs/source/rst-snippets/cli-output-format-options.rst @@ -41,6 +41,8 @@ --spdx-rdf FILE Write scan output as SPDX RDF to FILE. +--spdx-json FILE Write scan output as SPDX JSON to FILE. + --spdx-tv FILE Write scan output as SPDX Tag/Value to FILE. --html-app FILE [DEPRECATED] Use ``scancode-workbench`` diff --git a/setup-mini.cfg b/setup-mini.cfg index 9bfcc14439a..5cab4a90fc3 100644 --- a/setup-mini.cfg +++ b/setup-mini.cfg @@ -228,6 +228,7 @@ scancode_output = json-pp = formattedcode.output_json:JsonPrettyOutput spdx-tv = formattedcode.output_spdx:SpdxTvOutput spdx-rdf = formattedcode.output_spdx:SpdxRdfOutput + spdx-json = formattedcode.output_spdx:SpdxJsonOutput csv = formattedcode.output_csv:CsvOutput jsonlines = formattedcode.output_jsonlines:JsonLinesOutput template = formattedcode.output_html:CustomTemplateOutput diff --git a/setup.cfg b/setup.cfg index 7c45f388fd5..438dd8b220d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -230,6 +230,7 @@ scancode_output = json-pp = formattedcode.output_json:JsonPrettyOutput spdx-tv = formattedcode.output_spdx:SpdxTvOutput spdx-rdf = formattedcode.output_spdx:SpdxRdfOutput + spdx-json = formattedcode.output_spdx:SpdxJsonOutput csv = formattedcode.output_csv:CsvOutput jsonlines = formattedcode.output_jsonlines:JsonLinesOutput template = formattedcode.output_html:CustomTemplateOutput diff --git a/src/formattedcode/output_spdx.py b/src/formattedcode/output_spdx.py index 88039c7f2e9..b2cf393263c 100644 --- a/src/formattedcode/output_spdx.py +++ b/src/formattedcode/output_spdx.py @@ -9,6 +9,7 @@ import os import sys import uuid +import json from datetime import datetime from io import BytesIO from io import StringIO @@ -119,6 +120,36 @@ def process_codebase(self, codebase, spdx_rdf, **kwargs): input_path=kwargs.get('input', ''), output_file=spdx_rdf, as_tagvalue=False, + as_json=False, + **kwargs + ) + + +@output_impl +class SpdxJsonOutput(OutputPlugin): + + options = [ + PluggableCommandLineOption(('--spdx-json',), + type=FileOptionType(mode='w', encoding='utf-8', lazy=True), + metavar='FILE', + default=None, + help='Write scan output as SPDX JSON to FILE.', + help_group=OUTPUT_GROUP, + sort_order=70, + ) + ] + + def is_enabled(self, spdx_json, **kwargs): + return spdx_json + + def process_codebase(self, codebase, spdx_json, **kwargs): + _process_codebase( + spdx_plugin=self, + codebase=codebase, + input_path=kwargs.get('input', ''), + output_file=spdx_json, + as_tagvalue=False, + as_json=True, **kwargs ) @@ -129,6 +160,7 @@ def _process_codebase( input_path, output_file, as_tagvalue=True, + as_json=False, **kwargs, ): check_sha1(codebase) @@ -148,6 +180,7 @@ def _process_codebase( notice=notice, package_name=package_name, as_tagvalue=as_tagvalue, + as_json=as_json, ) @@ -178,6 +211,49 @@ def check_sha1(codebase): ) +def update_json_package_files(spdx_json): + """ + Ensure SPDX JSON packages list their file members explicitly. + """ + packages = spdx_json.get('packages') or [] + files = spdx_json.get('files') or [] + if not packages or not files: + return spdx_json + + relationships = spdx_json.get('relationships') or [] + package_file_map = {} + for relationship in relationships: + if relationship.get('relationshipType') != 'CONTAINS': + continue + package_id = relationship.get('spdxElementId') + file_id = relationship.get('relatedSpdxElement') + if not package_id or not file_id: + continue + package_file_map.setdefault(package_id, set()).add(file_id) + + if not package_file_map and len(packages) == 1: + package_id = packages[0].get('SPDXID') + if package_id: + file_ids = {f.get('SPDXID') for f in files if f.get('SPDXID')} + if file_ids: + package_file_map[package_id] = file_ids + + for package in packages: + package_id = package.get('SPDXID') + if not package_id: + continue + file_ids = package_file_map.get(package_id) + if file_ids: + package['hasFiles'] = sorted(file_ids) + + if not spdx_json.get('documentDescribes'): + described = [p.get('SPDXID') for p in packages if p.get('SPDXID')] + if described: + spdx_json['documentDescribes'] = described + + return spdx_json + + def write_spdx( codebase, output_file, @@ -188,6 +264,7 @@ def write_spdx( package_name='', download_location=SpdxNoAssertion(), as_tagvalue=True, + as_json=False, spdx_version = (2, 2), with_notice_text=False, ): @@ -205,7 +282,7 @@ def write_spdx( licenses = cache.get_licenses_db() licensing = Licensing() - as_rdf = not as_tagvalue + as_rdf = not as_tagvalue and not as_json ns_prefix = '_'.join(package_name.lower().split()) comment = notice + f'\nSPDX License List: {scancode_config.spdx_license_list_version}' @@ -241,6 +318,15 @@ def write_spdx( packages=[package], ) + if as_json: + doc.relationships.append( + Relationship( + spdx_element_id=creation_info.spdx_id, + relationship_type=RelationshipType.DESCRIBES, + related_spdx_element_id=package.spdx_id, + ) + ) + # Use a set of unique copyrights for the package. package_copyright_texts = set() @@ -347,7 +433,7 @@ def write_spdx( relationship = Relationship(package.spdx_id, RelationshipType.CONTAINS, file_entry.spdx_id) doc.relationships.append(relationship) - if not doc.files: + if not doc.files and not as_json: if as_tagvalue: msg = "# No results for package '{}'.\n".format(package.name) else: @@ -388,7 +474,7 @@ def write_spdx( # one case we do need to deal with bytes and decode before writing (rdf) and # in the other case we deal with text all the way. - if doc.files: + if doc.files or as_json: if as_tagvalue: from spdx_tools.spdx.writer.tagvalue.tagvalue_writer import write_document_to_stream # NOQA spdx_output = StringIO() @@ -396,12 +482,23 @@ def write_spdx( from spdx_tools.spdx.writer.rdf.rdf_writer import write_document_to_stream # NOQA # rdf is utf-encoded bytes spdx_output = BytesIO() + elif as_json: + try: + from spdx_tools.spdx.writer.json.json_writer import write_document_to_stream # NOQA + except ImportError: + from spdx_tools.spdx.writer.json_writer import write_document_to_stream # NOQA + spdx_output = StringIO() - write_document_to_stream(doc, spdx_output, validate=False) + write_document_to_stream(doc, spdx_output, validate=as_json) result = spdx_output.getvalue() if as_rdf: # rdf is utf-encoded bytes result = result.decode('utf-8') + if as_json: + spdx_json = json.loads(result) + spdx_json = update_json_package_files(spdx_json) + result = json.dumps(spdx_json, indent=4, ensure_ascii=False) + output_file.write(result) diff --git a/tests/formattedcode/test_output_spdx.py b/tests/formattedcode/test_output_spdx.py index 8aa1fda02f3..ceb2f1cc62b 100644 --- a/tests/formattedcode/test_output_spdx.py +++ b/tests/formattedcode/test_output_spdx.py @@ -9,6 +9,7 @@ # import io +import json import os import re @@ -178,6 +179,20 @@ def check_tv_scan(expected_file, result_file, regen=REGEN_TEST_FIXTURES): assert result == expected +def check_json_spdx_scan(result_file): + """ + Return SPDX JSON data after basic structural validation. + """ + with io.open(result_file, encoding='utf-8') as co: + data = json.load(co) + + assert data.get('spdxVersion', '').startswith('SPDX-') + assert data.get('SPDXID') == 'SPDXRef-DOCUMENT' + assert isinstance(data.get('packages'), list) + assert 'creationInfo' in data + return data + + def test_spdx_rdf_basic(): test_file = test_env.get_test_loc('spdx/simple/test.txt') result_file = test_env.get_temp_file('rdf') @@ -194,6 +209,14 @@ def test_spdx_tv_basic(): check_tv_scan(expected_file, result_file) +def test_spdx_json_basic(): + test_file = test_env.get_test_loc('spdx/simple/test.txt') + result_file = test_env.get_temp_file('spdx.json') + run_scan_click([test_file, '-clip', '--spdx-json', result_file]) + data = check_json_spdx_scan(result_file) + assert data['packages'] + + @pytest.mark.scanslow def test_spdx_rdf_with_known_licenses(): test_dir = test_env.get_test_loc('spdx/license_known/scan') @@ -221,6 +244,15 @@ def test_spdx_tv_with_known_licenses(): check_tv_scan(expected_file, result_file) +@pytest.mark.scanslow +def test_spdx_json_with_known_licenses(): + test_dir = test_env.get_test_loc('spdx/license_known/scan') + result_file = test_env.get_temp_file('spdx.json') + run_scan_click([test_dir, '-clip', '--spdx-json', result_file]) + data = check_json_spdx_scan(result_file) + assert data['files'] + + @pytest.mark.scanslow def test_spdx_tv_with_license_ref(): test_dir = test_env.get_test_loc('spdx/license_ref/scan') @@ -230,6 +262,15 @@ def test_spdx_tv_with_license_ref(): check_tv_scan(expected_file, result_file) +@pytest.mark.scanslow +def test_spdx_json_with_license_ref(): + test_dir = test_env.get_test_loc('spdx/license_ref/scan') + result_file = test_env.get_temp_file('spdx.json') + run_scan_click([test_dir, '-clip', '--spdx-json', result_file]) + data = check_json_spdx_scan(result_file) + assert data['hasExtractedLicensingInfos'] + + @pytest.mark.scanslow def test_spdx_rdf_with_known_licenses_with_text(): test_dir = test_env.get_test_loc('spdx/license_known/scan') @@ -257,6 +298,15 @@ def test_spdx_tv_with_known_licenses_with_text(): check_tv_scan(expected_file, result_file) +@pytest.mark.scanslow +def test_spdx_json_with_known_licenses_with_text(): + test_dir = test_env.get_test_loc('spdx/license_known/scan') + result_file = test_env.get_temp_file('spdx.json') + run_scan_click(['-clip', '--license-text', test_dir, '--spdx-json', result_file]) + data = check_json_spdx_scan(result_file) + assert data['files'] + + @pytest.mark.scanslow def test_spdx_tv_with_license_ref_with_text(): test_dir = test_env.get_test_loc('spdx/license_ref/scan') @@ -266,6 +316,15 @@ def test_spdx_tv_with_license_ref_with_text(): check_tv_scan(expected_file, result_file) +@pytest.mark.scanslow +def test_spdx_json_with_license_ref_with_text(): + test_dir = test_env.get_test_loc('spdx/license_ref/scan') + result_file = test_env.get_temp_file('spdx.json') + run_scan_click(['-clip', '--license-text', test_dir, '--spdx-json', result_file]) + data = check_json_spdx_scan(result_file) + assert data['hasExtractedLicensingInfos'] + + @pytest.mark.scanslow def test_spdx_tv_tree(): test_dir = test_env.get_test_loc('spdx/tree/scan') @@ -284,6 +343,15 @@ def test_spdx_rdf_tree(): check_rdf_scan(expected_file, result_file) +@pytest.mark.scanslow +def test_spdx_json_tree(): + test_dir = test_env.get_test_loc('spdx/tree/scan') + result_file = test_env.get_temp_file('spdx.json') + run_scan_click(['-clip', test_dir, '--spdx-json', result_file]) + data = check_json_spdx_scan(result_file) + assert len(data['files']) > 1 + + @pytest.mark.scanslow def test_spdx_tv_with_unicode_license_text_does_not_fail(): test_file = test_env.get_test_loc('spdx/unicode/et131x.h') @@ -306,6 +374,17 @@ def test_spdx_rdf_with_unicode_license_text_does_not_fail(): check_rdf_scan(expected_file, result_file) +@pytest.mark.scanslow +def test_spdx_json_with_unicode_license_text_does_not_fail(): + test_file = test_env.get_test_loc('spdx/unicode/et131x.h') + result_file = test_env.get_temp_file('spdx.json') + args = ['--license', '--copyright', '--info', '--strip-root', + '--license-text', test_file, '--spdx-json', result_file] + run_scan_plain(args) + data = check_json_spdx_scan(result_file) + assert data['files'] + + @pytest.mark.scanslow def test_spdx_rdf_with_or_later_license_does_not_fail(): test_file = test_env.get_test_loc('spdx/or_later/test.java') @@ -338,6 +417,16 @@ def test_spdx_rdf_with_empty_scan(): assert results == expected +@pytest.mark.scanslow +def test_spdx_json_with_empty_scan(): + test_file = test_env.get_test_loc('spdx/empty/scan') + result_file = test_env.get_temp_file('spdx.json') + args = ['--license', '--strip-root', '--info', '--only-findings', test_file, '--spdx-json', result_file] + run_scan_plain(args) + data = check_json_spdx_scan(result_file) + assert data['packages'] + + @pytest.mark.scanslow def test_output_spdx_rdf_can_handle_non_ascii_paths(): test_file = test_env.get_test_loc('spdx/unicode.json') @@ -355,6 +444,29 @@ def test_output_spdx_tv_can_handle_non_ascii_paths(): results = res.read() assert 'han/据.svg' in results + +def test_output_spdx_json_can_handle_non_ascii_paths(): + test_file = test_env.get_test_loc('spdx/unicode.json') + result_file = test_env.get_temp_file(extension='spdx', file_name='test_spdx') + run_scan_click(['--from-json', test_file, '--spdx-json', result_file]) + with io.open(result_file, encoding='utf-8') as res: + results = res.read() + assert 'han/据.svg' in results + + +def test_output_can_create_multiple_spdx_outputs_in_a_single_scan(): + test_file = test_env.get_test_loc('spdx/simple/test.txt') + result_tv = test_env.get_temp_file('tv') + result_rdf = test_env.get_temp_file('rdf') + result_json = test_env.get_temp_file('json') + run_scan_click([test_file, '-clip', '--spdx-tv', result_tv, '--spdx-rdf', result_rdf, '--spdx-json', result_json]) + assert os.path.exists(result_tv) + assert os.path.exists(result_rdf) + assert os.path.exists(result_json) + assert os.path.getsize(result_tv) > 0 + assert os.path.getsize(result_rdf) > 0 + assert os.path.getsize(result_json) > 0 + def test_output_spdx_tv_sh1_of_empty_file(): test_dir = test_env.get_test_loc('spdx/empty/scan/somefile') result_file = test_env.get_temp_file(extension='spdx', file_name='test_spdx') diff --git a/tests/formattedcode/test_reuse_output_plugins.py b/tests/formattedcode/test_reuse_output_plugins.py index 90236ae0450..2fe47b64f98 100644 --- a/tests/formattedcode/test_reuse_output_plugins.py +++ b/tests/formattedcode/test_reuse_output_plugins.py @@ -71,6 +71,11 @@ def test_can_call_spdxrdf_output_from_regular_code_with_virtualcodebase(): check_plugin(plug, 'reuse/vb.json', force_text=True) +def test_can_call_spdxjson_output_from_regular_code_with_virtualcodebase(): + from formattedcode.output_spdx import SpdxJsonOutput as plug + check_plugin(plug, 'reuse/vb.json', force_text=True) + + def test_can_call_html_output_from_regular_code_with_virtualcodebase(): from formattedcode.output_html import HtmlOutput as plug check_plugin(plug, 'reuse/vb.json', force_text=True) diff --git a/tests/scancode/data/help/help.txt b/tests/scancode/data/help/help.txt index e725888ead4..7a002ad2308 100644 --- a/tests/scancode/data/help/help.txt +++ b/tests/scancode/data/help/help.txt @@ -65,6 +65,7 @@ Options: --custom-template FILE Use this Jinja template FILE as a custom template. --cyclonedx FILE Write scan output in CycloneDX JSON format to FILE. --cyclonedx-xml FILE Write scan output in CycloneDX XML format to FILE. + --spdx-json FILE Write scan output as SPDX JSON to FILE. --spdx-rdf FILE Write scan output as SPDX RDF to FILE. --spdx-tv FILE Write scan output as SPDX Tag/Value to FILE. diff --git a/tests/scancode/data/help/help_linux.txt b/tests/scancode/data/help/help_linux.txt index 6794b19d602..08556bdc956 100644 --- a/tests/scancode/data/help/help_linux.txt +++ b/tests/scancode/data/help/help_linux.txt @@ -67,6 +67,7 @@ Options: --custom-template FILE Use this Jinja template FILE as a custom template. --cyclonedx FILE Write scan output in CycloneDX JSON format to FILE. --cyclonedx-xml FILE Write scan output in CycloneDX XML format to FILE. + --spdx-json FILE Write scan output as SPDX JSON to FILE. --spdx-rdf FILE Write scan output as SPDX RDF to FILE. --spdx-tv FILE Write scan output as SPDX Tag/Value to FILE.