Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ The following organizations or individuals have contributed to ScanCode:
- Mike Rombout @mrombout
- Mrinal Paliwal @mnpw
- nexB Inc. @nexB
- Niklas Lingenauber @linge3011
- Nirmal Sarswat @vivonk
- Nisha Kumar @nishakm
- Nishchith Shetty @inishchith
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ Changelog

Next release
--------------
- Addition of CLI options ``--spdx-json`` to create SPDX output in json format.
https://github.com/aboutcode-org/scancode-toolkit/issues/3698

v3.5.0 - 2026-01-15
-------------------
Expand Down
12 changes: 12 additions & 0 deletions docs/source/reference/scancode-cli/cli-help-text-options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ The following help text is displayed for ScanCode version 32.0.0:
--custom-template FILE Use this Jinja template FILE as a custom template.
--cyclonedx FILE Write scan output in CycloneDX JSON format to FILE.
--cyclonedx-xml FILE Write scan output in CycloneDX XML format to FILE.
--spdx-json FILE Write scan output as SPDX JSON to FILE.
--spdx-rdf FILE Write scan output as SPDX RDF to FILE.
--spdx-tv FILE Write scan output as SPDX Tag/Value to FILE.
--html-app FILE (DEPRECATED: use the ScanCode Workbench app instead)
Expand Down Expand Up @@ -434,6 +435,17 @@ for ScanCode Version 32.0.0.
help: Write scan output as JSON Lines to FILE.
doc: None

--------------------------------------------
Plugin: scancode_output:spdx-json class: formattedcode.output_spdx:SpdxJsonOutput
codebase_attributes:
resource_attributes:
sort_order: 100
required_plugins:
options:
help_group: output formats, name: spdx_json: --spdx-json
help: Write scan output as SPDX JSON to FILE.
doc: None

--------------------------------------------
Plugin: scancode_output:spdx-rdf class: formattedcode.output_spdx:SpdxRdfOutput
codebase_attributes:
Expand Down
19 changes: 19 additions & 0 deletions docs/source/reference/scancode-cli/cli-output-format-options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,25 @@ Comparing different ``json`` output formats

----

.. _cli-json-spdx-option:

``--spdx-json FILE``
--------------------

SPDX JSON output writes a Software Bill of Materials in the SPDX JSON format
using the same scan data model as other SPDX outputs.

**Example**

The following code performs a scan on the samples directory, and publishes the results in
``spdx-json`` format

.. code-block:: shell

scancode -clpieu --spdx-json output.spdx.json samples

----

.. _cli-rdf-option:

``--spdx-rdf FILE``
Expand Down
2 changes: 2 additions & 0 deletions docs/source/rst-snippets/cli-output-format-options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@

--spdx-rdf FILE Write scan output as SPDX RDF to FILE.

--spdx-json FILE Write scan output as SPDX JSON to FILE.

--spdx-tv FILE Write scan output as SPDX Tag/Value to FILE.

--html-app FILE [DEPRECATED] Use ``scancode-workbench``
Expand Down
1 change: 1 addition & 0 deletions setup-mini.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ scancode_output =
json-pp = formattedcode.output_json:JsonPrettyOutput
spdx-tv = formattedcode.output_spdx:SpdxTvOutput
spdx-rdf = formattedcode.output_spdx:SpdxRdfOutput
spdx-json = formattedcode.output_spdx:SpdxJsonOutput
csv = formattedcode.output_csv:CsvOutput
jsonlines = formattedcode.output_jsonlines:JsonLinesOutput
template = formattedcode.output_html:CustomTemplateOutput
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ scancode_output =
json-pp = formattedcode.output_json:JsonPrettyOutput
spdx-tv = formattedcode.output_spdx:SpdxTvOutput
spdx-rdf = formattedcode.output_spdx:SpdxRdfOutput
spdx-json = formattedcode.output_spdx:SpdxJsonOutput
csv = formattedcode.output_csv:CsvOutput
jsonlines = formattedcode.output_jsonlines:JsonLinesOutput
template = formattedcode.output_html:CustomTemplateOutput
Expand Down
105 changes: 101 additions & 4 deletions src/formattedcode/output_spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import sys
import uuid
import json
from datetime import datetime
from io import BytesIO
from io import StringIO
Expand Down Expand Up @@ -119,6 +120,36 @@ def process_codebase(self, codebase, spdx_rdf, **kwargs):
input_path=kwargs.get('input', ''),
output_file=spdx_rdf,
as_tagvalue=False,
as_json=False,
**kwargs
)


@output_impl
class SpdxJsonOutput(OutputPlugin):

options = [
PluggableCommandLineOption(('--spdx-json',),
type=FileOptionType(mode='w', encoding='utf-8', lazy=True),
metavar='FILE',
default=None,
help='Write scan output as SPDX JSON to FILE.',
help_group=OUTPUT_GROUP,
sort_order=70,
)
]

def is_enabled(self, spdx_json, **kwargs):
return spdx_json

def process_codebase(self, codebase, spdx_json, **kwargs):
_process_codebase(
spdx_plugin=self,
codebase=codebase,
input_path=kwargs.get('input', ''),
output_file=spdx_json,
as_tagvalue=False,
as_json=True,
**kwargs
)

Expand All @@ -129,6 +160,7 @@ def _process_codebase(
input_path,
output_file,
as_tagvalue=True,
as_json=False,
**kwargs,
):
check_sha1(codebase)
Expand All @@ -148,6 +180,7 @@ def _process_codebase(
notice=notice,
package_name=package_name,
as_tagvalue=as_tagvalue,
as_json=as_json,
)


Expand Down Expand Up @@ -178,6 +211,49 @@ def check_sha1(codebase):
)


def update_json_package_files(spdx_json):
"""
Ensure SPDX JSON packages list their file members explicitly.
"""
packages = spdx_json.get('packages') or []
files = spdx_json.get('files') or []
if not packages or not files:
return spdx_json

relationships = spdx_json.get('relationships') or []
package_file_map = {}
for relationship in relationships:
if relationship.get('relationshipType') != 'CONTAINS':
continue
package_id = relationship.get('spdxElementId')
file_id = relationship.get('relatedSpdxElement')
if not package_id or not file_id:
continue
package_file_map.setdefault(package_id, set()).add(file_id)

if not package_file_map and len(packages) == 1:
package_id = packages[0].get('SPDXID')
if package_id:
file_ids = {f.get('SPDXID') for f in files if f.get('SPDXID')}
if file_ids:
package_file_map[package_id] = file_ids

for package in packages:
package_id = package.get('SPDXID')
if not package_id:
continue
file_ids = package_file_map.get(package_id)
if file_ids:
package['hasFiles'] = sorted(file_ids)

if not spdx_json.get('documentDescribes'):
described = [p.get('SPDXID') for p in packages if p.get('SPDXID')]
if described:
spdx_json['documentDescribes'] = described

return spdx_json


def write_spdx(
codebase,
output_file,
Expand All @@ -188,6 +264,7 @@ def write_spdx(
package_name='',
download_location=SpdxNoAssertion(),
as_tagvalue=True,
as_json=False,
spdx_version = (2, 2),
with_notice_text=False,
):
Expand All @@ -205,7 +282,7 @@ def write_spdx(
licenses = cache.get_licenses_db()
licensing = Licensing()

as_rdf = not as_tagvalue
as_rdf = not as_tagvalue and not as_json

ns_prefix = '_'.join(package_name.lower().split())
comment = notice + f'\nSPDX License List: {scancode_config.spdx_license_list_version}'
Expand Down Expand Up @@ -241,6 +318,15 @@ def write_spdx(
packages=[package],
)

if as_json:
doc.relationships.append(
Relationship(
spdx_element_id=creation_info.spdx_id,
relationship_type=RelationshipType.DESCRIBES,
related_spdx_element_id=package.spdx_id,
)
)

# Use a set of unique copyrights for the package.
package_copyright_texts = set()

Expand Down Expand Up @@ -347,7 +433,7 @@ def write_spdx(
relationship = Relationship(package.spdx_id, RelationshipType.CONTAINS, file_entry.spdx_id)
doc.relationships.append(relationship)

if not doc.files:
if not doc.files and not as_json:
if as_tagvalue:
msg = "# No results for package '{}'.\n".format(package.name)
else:
Expand Down Expand Up @@ -388,20 +474,31 @@ def write_spdx(
# one case we do need to deal with bytes and decode before writing (rdf) and
# in the other case we deal with text all the way.

if doc.files:
if doc.files or as_json:
if as_tagvalue:
from spdx_tools.spdx.writer.tagvalue.tagvalue_writer import write_document_to_stream # NOQA
spdx_output = StringIO()
elif as_rdf:
from spdx_tools.spdx.writer.rdf.rdf_writer import write_document_to_stream # NOQA
# rdf is utf-encoded bytes
spdx_output = BytesIO()
elif as_json:
try:
from spdx_tools.spdx.writer.json.json_writer import write_document_to_stream # NOQA
except ImportError:
from spdx_tools.spdx.writer.json_writer import write_document_to_stream # NOQA
spdx_output = StringIO()

write_document_to_stream(doc, spdx_output, validate=False)
write_document_to_stream(doc, spdx_output, validate=as_json)
result = spdx_output.getvalue()

if as_rdf:
# rdf is utf-encoded bytes
result = result.decode('utf-8')

if as_json:
spdx_json = json.loads(result)
spdx_json = update_json_package_files(spdx_json)
result = json.dumps(spdx_json, indent=4, ensure_ascii=False)

output_file.write(result)
Loading