From a830f08e919847cb37f234ed4764cab7b5349226 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Tue, 21 Apr 2026 15:50:43 +0000 Subject: [PATCH] Fix strip_root not stripping root from Resource paths in return_codebase mode When using cli.run_scan() with strip_root=True and return_codebase=True, the root directory was not stripped from Resource paths. This adds path stripping logic for the return_codebase branch and patches parent() for direct children of root. Adds tests for stripped paths, single file behavior, parent traversal, and regression guard. Fixes: https://github.com/aboutcode-org/scancode-toolkit/issues/2985 Signed-off-by: Kaushik --- src/scancode/cli.py | 18 ++++++++++ tests/scancode/test_cli.py | 74 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/scancode/cli.py b/src/scancode/cli.py index 1376c6cfee9..bcd25f65888 100644 --- a/src/scancode/cli.py +++ b/src/scancode/cli.py @@ -1070,6 +1070,24 @@ def echo_func(*_args, **_kwargs): results = get_results(codebase, as_list=True, **requested_options) elif return_codebase: results = codebase + # Strip root from Resource paths. See #2985 + if strip_root and not codebase.has_single_resource: + from commoncode.resource import strip_first_path_segment + new_resources_by_path = {} + for old_path, resource in list(codebase.resources_by_path.items()): + stripped_path = strip_first_path_segment(old_path) + resource.path = stripped_path + new_resources_by_path[stripped_path] = resource + codebase.resources_by_path = new_resources_by_path + + # Patch parent() for direct children of root with empty parent path. + original_parent = codebase.resource_class.parent + def patched_parent(self, codebase_arg): + parent_path = self.parent_path() + if parent_path == '': + return codebase_arg.root + return original_parent(self, codebase_arg) + codebase.resource_class.parent = patched_parent finally: # remove temporary files diff --git a/tests/scancode/test_cli.py b/tests/scancode/test_cli.py index 858baf68ca7..3418e7ec5f8 100644 --- a/tests/scancode/test_cli.py +++ b/tests/scancode/test_cli.py @@ -120,6 +120,80 @@ def test_run_scan_includes_outdated_in_extra(): assert results['headers'][0]['extra_data']['OUTDATED'] == 'out of date' +def test_run_scan_return_codebase_with_strip_root_strips_paths(): + from scancode.cli import run_scan + test_dir = test_env.extract_test_tar('info/basic.tgz') + rc, codebase = run_scan( + test_dir, + info=True, + strip_root=True, + return_results=False, + return_codebase=True, + ) + assert rc + assert codebase.root.path == '' + paths = [r.path for r in codebase.walk(skip_root=True)] + root_dir_name = os.path.basename(test_dir) + assert all(not p.startswith(root_dir_name) for p in paths) + assert 'basic' in paths + assert 'basic/main.c' in paths + + +def test_run_scan_return_codebase_with_strip_root_single_file_does_not_strip(): + from scancode.cli import run_scan + test_file = test_env.get_test_loc('single/iproute.c') + rc, codebase = run_scan( + test_file, + info=True, + strip_root=True, + return_results=False, + return_codebase=True, + ) + assert rc + assert codebase.root.path != '' + assert 'iproute.c' in codebase.root.path + + +def test_run_scan_return_codebase_with_strip_root_parent_traversal_works(): + from scancode.cli import run_scan + test_dir = test_env.extract_test_tar('info/basic.tgz') + rc, codebase = run_scan( + test_dir, + info=True, + strip_root=True, + return_results=False, + return_codebase=True, + ) + assert rc + basic_resource = codebase.get_resource('basic') + assert basic_resource is not None + parent = basic_resource.parent(codebase) + assert parent is not None + assert parent.is_root + + main_c = codebase.get_resource('basic/main.c') + assert main_c is not None + main_parent = main_c.parent(codebase) + assert main_parent is not None + assert main_parent.path == 'basic' + + +def test_run_scan_return_codebase_without_strip_root_keeps_original_paths(): + from scancode.cli import run_scan + test_dir = test_env.extract_test_tar('info/basic.tgz') + rc, codebase = run_scan( + test_dir, + info=True, + return_results=False, + return_codebase=True, + ) + assert rc + root_path = codebase.root.path + assert root_path != '' + paths = [r.path for r in codebase.walk(skip_root=True)] + assert all(p.startswith(root_path) for p in paths) + + def test_no_version_check_run_is_successful(): test_file = test_env.get_test_loc('single/iproute.c') result_file = test_env.get_temp_file('json')