From c77ae2f5f6608d718d572383000d4f0ba31dbae2 Mon Sep 17 00:00:00 2001 From: kumarasantosh Date: Fri, 13 Mar 2026 08:45:17 +0530 Subject: [PATCH 1/7] packagedcode: fix gemspec version constants being stored as-is When a gemspec uses a Ruby constant for the version field like: s.version = Elasticsearch::API::VERSION s.version = Faraday::VERSION scancode was storing the constant name as the version string. These constants cannot be resolved without executing Ruby code. Add is_ruby_version_constant() to detect Ruby constant expressions (containing :: namespace separator or bare uppercase constant names) and return None for the version instead of storing an unresolvable constant string. Also fixes the download_url and api_data_url which were generating invalid URLs with the constant name embedded. Fixes #3129 Signed-off-by: kumarasantosh --- src/packagedcode/rubygems.py | 1 + src/packagedcode/spec.py | 38 +++++++++ .../elasticsearch-api.gemspec | 83 +++++++++++++++++++ .../rubygems/version-constant/excon.gemspec | 45 ++++++++++ .../rubygems/version-constant/faraday.gemspec | 1 + .../version-constant/simple-constant.gemspec | 6 ++ .../version-constant/simple-version.gemspec | 6 ++ tests/packagedcode/test_rubygems.py | 42 ++++++++++ 8 files changed, 222 insertions(+) create mode 100644 tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec create mode 100644 tests/packagedcode/data/rubygems/version-constant/excon.gemspec create mode 100644 tests/packagedcode/data/rubygems/version-constant/faraday.gemspec create mode 100644 tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec create mode 100644 tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec diff --git a/src/packagedcode/rubygems.py b/src/packagedcode/rubygems.py index e80295c48af..dc13b1560f9 100644 --- a/src/packagedcode/rubygems.py +++ b/src/packagedcode/rubygems.py @@ -706,6 +706,7 @@ def party_mapper(role, names=[], emails=[]): models.Party(type=models.party_person, email=email, role=role) for email in emails ) + return () def get_parties(gem_data): diff --git a/src/packagedcode/spec.py b/src/packagedcode/spec.py index 95dfdba7dd9..4c4154485fd 100644 --- a/src/packagedcode/spec.py +++ b/src/packagedcode/spec.py @@ -133,6 +133,40 @@ def get_authors(line): } +def is_ruby_version_constant(value): + """ + Return True if value looks like a Ruby constant expression + that cannot be resolved statically, such as: + Elasticsearch::API::VERSION or MyGem::VERSION + + These are dynamic values that reference Ruby constants + and cannot be determined without executing the Ruby code. + + For example: + >>> is_ruby_version_constant('Elasticsearch::API::VERSION') + True + >>> is_ruby_version_constant('MyGem::VERSION') + True + >>> is_ruby_version_constant('1.0.0') + False + >>> is_ruby_version_constant("'2.3.4'") + False + >>> is_ruby_version_constant(None) + False + """ + if not value: + return False + # Ruby constants use :: as namespace separator + if '::' in value: + return True + # A bare constant starts with uppercase and has no dots/quotes + # e.g. VERSION (unlikely but possible) + stripped = value.strip('\'"') + if stripped and stripped[0].isupper() and '.' not in stripped: + return True + return False + + def parse_spec(location, package_type): """ Return a mapping of data parsed from a podspec/gemspec/Pofile/Gemfile file @@ -151,6 +185,10 @@ def parse_spec(location, package_type): parsed = parser(line=line) if parsed: spec_data[attribute_name] = parsed + + version = spec_data.get('version') + if is_ruby_version_constant(version): + spec_data['version'] = None # description can be in single or multi-lines # There are many different ways to write description. diff --git a/tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec b/tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec new file mode 100644 index 00000000000..100a7d6e705 --- /dev/null +++ b/tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec @@ -0,0 +1,83 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'elasticsearch/api/version' + +Gem::Specification.new do |s| + s.name = 'elasticsearch-api' + s.version = Elasticsearch::API::VERSION + s.authors = ['Karel Minarik'] + s.email = ['karel.minarik@elasticsearch.org'] + s.summary = 'Ruby API for Elasticsearch.' + s.homepage = 'https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/index.html' + s.license = 'Apache-2.0' + s.metadata = { + 'homepage_uri' => 'https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/index.html', + 'changelog_uri' => 'https://github.com/elastic/elasticsearch-ruby/blob/main/CHANGELOG.md', + 'source_code_uri' => 'https://github.com/elastic/elasticsearch-ruby/tree/main/elasticsearch-api', + 'bug_tracker_uri' => 'https://github.com/elastic/elasticsearch-ruby/issues' + } + s.files = `git ls-files`.split($/) + s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) } + s.test_files = s.files.grep(%r{^(test|spec|features)/}) + s.require_paths = ['lib'] + + s.extra_rdoc_files = ['README.md', 'LICENSE.txt'] + s.rdoc_options = ['--charset=UTF-8'] + + s.required_ruby_version = '>= 2.5' + + s.add_dependency 'multi_json' + + s.add_development_dependency 'ansi' + s.add_development_dependency 'bundler' + s.add_development_dependency 'elasticsearch' + s.add_development_dependency 'minitest' + s.add_development_dependency 'minitest-reporters' + s.add_development_dependency 'mocha' + s.add_development_dependency 'pry' + s.add_development_dependency 'rake' + s.add_development_dependency 'shoulda-context' + s.add_development_dependency 'yard' + + # Gems for testing integrations + s.add_development_dependency 'jsonify' + s.add_development_dependency 'hashie' + # Temporary support for Ruby 2.6, since it's EOL March 2022: + if RUBY_VERSION < '2.7.0' + s.add_development_dependency 'jbuilder', '< 7.0.0' + else + s.add_development_dependency 'activesupport' + s.add_development_dependency 'jbuilder' + end + + s.add_development_dependency 'cane' + s.add_development_dependency 'escape_utils' unless defined? JRUBY_VERSION + + s.add_development_dependency 'require-prof' unless defined?(JRUBY_VERSION) || defined?(Rubinius) + s.add_development_dependency 'ruby-prof' unless defined?(JRUBY_VERSION) || defined?(Rubinius) + s.add_development_dependency 'simplecov' + + s.add_development_dependency 'test-unit', '~> 2' + + s.description = <<-DESC.gsub(/^ /, '') + Ruby API for Elasticsearch. See the `elasticsearch` gem for full integration. + DESC +end diff --git a/tests/packagedcode/data/rubygems/version-constant/excon.gemspec b/tests/packagedcode/data/rubygems/version-constant/excon.gemspec new file mode 100644 index 00000000000..161eb9bc94d --- /dev/null +++ b/tests/packagedcode/data/rubygems/version-constant/excon.gemspec @@ -0,0 +1,45 @@ +$LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib') +require 'excon/version' + +Gem::Specification.new do |s| + s.name = 'excon' + s.version = Excon::VERSION + s.summary = "speed, persistence, http(s)" + s.description = "EXtended http(s) CONnections" + s.authors = ["dpiddy (Dan Peterson)", "geemus (Wesley Beary)", "nextmat (Matt Sanders)"] + s.email = 'geemus@gmail.com' + s.homepage = 'https://github.com/excon/excon' + s.license = 'MIT' + s.rdoc_options = ["--charset=UTF-8"] + s.extra_rdoc_files = %w[README.md CONTRIBUTORS.md CONTRIBUTING.md] + s.files = `git ls-files -- data/* lib/*`.split("\n") + [ + "CONTRIBUTING.md", + "CONTRIBUTORS.md", + "LICENSE.md", + "README.md", + "excon.gemspec" + ] + + s.add_development_dependency('rspec', '>= 3.5.0') + s.add_development_dependency('activesupport') + s.add_development_dependency('delorean') + s.add_development_dependency('eventmachine', '>= 1.0.4') + s.add_development_dependency('open4') + s.add_development_dependency('rake') + s.add_development_dependency('rdoc') + s.add_development_dependency('shindo') + s.add_development_dependency('sinatra') + s.add_development_dependency('sinatra-contrib') + s.add_development_dependency('json', '>= 1.8.5') + s.add_development_dependency('puma') + s.add_development_dependency('webrick') + + s.metadata = { + 'homepage_uri' => 'https://github.com/excon/excon', + 'bug_tracker_uri' => 'https://github.com/excon/excon/issues', + 'changelog_uri' => 'https://github.com/excon/excon/blob/master/changelog.txt', + 'documentation_uri' => 'https://github.com/excon/excon/blob/master/README.md', + 'source_code_uri' => 'https://github.com/excon/excon', + 'wiki_uri' => 'https://github.com/excon/excon/wiki' + } +end diff --git a/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec b/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec new file mode 100644 index 00000000000..1becba2bb0a --- /dev/null +++ b/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec @@ -0,0 +1 @@ +404: Not Found \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec b/tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec new file mode 100644 index 00000000000..e885a9453cd --- /dev/null +++ b/tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec @@ -0,0 +1,6 @@ +Gem::Specification.new do |s| + s.name = 'my-gem' + s.version = MyGem::VERSION + s.summary = 'A gem with a version constant' + s.license = 'MIT' +end diff --git a/tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec b/tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec new file mode 100644 index 00000000000..3fb26f707c7 --- /dev/null +++ b/tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec @@ -0,0 +1,6 @@ +Gem::Specification.new do |s| + s.name = 'my-gem' + s.version = '1.2.3' + s.summary = 'A gem with a real version' + s.license = 'MIT' +end diff --git a/tests/packagedcode/test_rubygems.py b/tests/packagedcode/test_rubygems.py index af55f355ab7..c0e25a684ab 100644 --- a/tests/packagedcode/test_rubygems.py +++ b/tests/packagedcode/test_rubygems.py @@ -15,14 +15,56 @@ from commoncode.testcase import FileBasedTesting from packagedcode import rubygems +from packagedcode import spec from packages_test_utils import PackageTester from scancode_config import REGEN_TEST_FIXTURES +REGEN_TEST_FIXTURES = False + # TODO: Add test with https://rubygems.org/gems/pbox2d/versions/1.0.3-java # this is a multiple personality package (Java and Ruby) # see also https://rubygems.org/downloads/jaro_winkler-1.5.1-java.gem +class TestGemspecVersionConstant(PackageTester): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + def test_version_constant_returns_none_for_elasticsearch(self): + test_file = self.get_test_loc('rubygems/version-constant/elasticsearch-api.gemspec') + packages = list(rubygems.GemspecHandler.parse(test_file)) + assert packages + pkg = packages[0] + assert pkg.name == 'elasticsearch-api' + assert pkg.version is None + assert 'Elasticsearch' not in str(pkg.version) + assert pkg.download_url is None + + def test_version_constant_returns_none_for_simple_constant(self): + test_file = self.get_test_loc('rubygems/version-constant/simple-constant.gemspec') + packages = list(rubygems.GemspecHandler.parse(test_file)) + assert packages + pkg = packages[0] + assert pkg.name == 'my-gem' + assert pkg.version is None + + def test_real_version_is_preserved(self): + test_file = self.get_test_loc('rubygems/version-constant/simple-version.gemspec') + packages = list(rubygems.GemspecHandler.parse(test_file)) + assert packages + pkg = packages[0] + assert pkg.name == 'my-gem' + assert pkg.version == '1.2.3' + + def test_is_ruby_version_constant_function(self): + assert spec.is_ruby_version_constant('Elasticsearch::API::VERSION') is True + assert spec.is_ruby_version_constant('MyGem::VERSION') is True + assert spec.is_ruby_version_constant('Faraday::VERSION') is True + assert spec.is_ruby_version_constant('1.0.0') is False + assert spec.is_ruby_version_constant("'2.3.4'") is False + assert spec.is_ruby_version_constant(None) is False + assert spec.is_ruby_version_constant('') is False + + class TestGemSpec(PackageTester): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') From f8d1c477b1d4efe080bb0db715feca258856ae1b Mon Sep 17 00:00:00 2001 From: kumarasantosh Date: Fri, 13 Mar 2026 09:01:36 +0530 Subject: [PATCH 2/7] packagedcode: add gemspec version constant coverage Signed-off-by: kumarasantosh --- .../gemspec/github.gemspec.expected.json | 12 +++--- .../oj.gemspec-package-only.expected.json | 12 +++--- .../rubygems/gemspec/oj.gemspec.expected.json | 12 +++--- .../gemspec/rubocop.gemspec.expected.json | 12 +++--- .../with_variables.gemspec.expected.json | 12 +++--- .../rubygems/version-constant/faraday.gemspec | 38 +++++++++++++++++- tests/packagedcode/test_rubygems.py | 40 +++++++++++++------ 7 files changed, 95 insertions(+), 43 deletions(-) diff --git a/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json index 51a94b6f65f..9e64c87ef14 100644 --- a/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "github", - "version": "GitHub::VERSION", + "version": null, "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -48,7 +48,7 @@ ], "keywords": [], "homepage_url": "https://github.com/defunkt/github-gem", - "download_url": "https://rubygems.org/downloads/github-GitHub::VERSION.gem", + "download_url": null, "size": null, "sha1": null, "md5": null, @@ -162,10 +162,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/github/versions/GitHub::VERSION", - "repository_download_url": "https://rubygems.org/downloads/github-GitHub::VERSION.gem", - "api_data_url": "https://rubygems.org/api/v2/rubygems/github/versions/GitHub::VERSION.json", + "repository_homepage_url": "https://rubygems.org/gems/github", + "repository_download_url": null, + "api_data_url": "https://rubygems.org/api/v1/versions/github.json", "datasource_id": "gemspec", - "purl": "pkg:gem/github@GitHub::VERSION" + "purl": "pkg:gem/github" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json index 41c2396b66f..2094705b872 100644 --- a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "oj", - "version": "::Oj::VERSION", + "version": null, "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -27,7 +27,7 @@ ], "keywords": [], "homepage_url": "http://www.ohler.com/oj", - "download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", + "download_url": null, "size": null, "sha1": null, "md5": null, @@ -97,10 +97,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/oj/versions/::Oj::VERSION", - "repository_download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", - "api_data_url": "https://rubygems.org/api/v2/rubygems/oj/versions/::Oj::VERSION.json", + "repository_homepage_url": "https://rubygems.org/gems/oj", + "repository_download_url": null, + "api_data_url": "https://rubygems.org/api/v1/versions/oj.json", "datasource_id": "gemspec", - "purl": "pkg:gem/oj@::Oj::VERSION" + "purl": "pkg:gem/oj" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json index 41c2396b66f..2094705b872 100644 --- a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "oj", - "version": "::Oj::VERSION", + "version": null, "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -27,7 +27,7 @@ ], "keywords": [], "homepage_url": "http://www.ohler.com/oj", - "download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", + "download_url": null, "size": null, "sha1": null, "md5": null, @@ -97,10 +97,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/oj/versions/::Oj::VERSION", - "repository_download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", - "api_data_url": "https://rubygems.org/api/v2/rubygems/oj/versions/::Oj::VERSION.json", + "repository_homepage_url": "https://rubygems.org/gems/oj", + "repository_download_url": null, + "api_data_url": "https://rubygems.org/api/v1/versions/oj.json", "datasource_id": "gemspec", - "purl": "pkg:gem/oj@::Oj::VERSION" + "purl": "pkg:gem/oj" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json index 0c9b94323f8..c47e41a3c7a 100644 --- a/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "rubocop", - "version": "RuboCop::Version::STRING", + "version": null, "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -41,7 +41,7 @@ ], "keywords": [], "homepage_url": "https://github.com/rubocop-hq/rubocop", - "download_url": "https://rubygems.org/downloads/rubocop-RuboCop::Version::STRING.gem", + "download_url": null, "size": null, "sha1": null, "md5": null, @@ -166,10 +166,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/rubocop/versions/RuboCop::Version::STRING", - "repository_download_url": "https://rubygems.org/downloads/rubocop-RuboCop::Version::STRING.gem", - "api_data_url": "https://rubygems.org/api/v2/rubygems/rubocop/versions/RuboCop::Version::STRING.json", + "repository_homepage_url": "https://rubygems.org/gems/rubocop", + "repository_download_url": null, + "api_data_url": "https://rubygems.org/api/v1/versions/rubocop.json", "datasource_id": "gemspec", - "purl": "pkg:gem/rubocop@RuboCop::Version::STRING" + "purl": "pkg:gem/rubocop" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json index 9d706386763..d402d106eb9 100644 --- a/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "ProviderDSL::GemDescription::NAME", - "version": "ProviderDSL::GemDescription::VERSION", + "version": null, "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -27,7 +27,7 @@ ], "keywords": [], "homepage_url": "ProviderDSL::GemDescription::PAGE", - "download_url": "https://rubygems.org/downloads/ProviderDSL::GemDescription::NAME-ProviderDSL::GemDescription::VERSION.gem", + "download_url": null, "size": null, "sha1": null, "md5": null, @@ -152,10 +152,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/ProviderDSL::GemDescription::NAME/versions/ProviderDSL::GemDescription::VERSION", - "repository_download_url": "https://rubygems.org/downloads/ProviderDSL::GemDescription::NAME-ProviderDSL::GemDescription::VERSION.gem", - "api_data_url": "https://rubygems.org/api/v2/rubygems/ProviderDSL::GemDescription::NAME/versions/ProviderDSL::GemDescription::VERSION.json", + "repository_homepage_url": "https://rubygems.org/gems/ProviderDSL::GemDescription::NAME", + "repository_download_url": null, + "api_data_url": "https://rubygems.org/api/v1/versions/ProviderDSL::GemDescription::NAME.json", "datasource_id": "gemspec", - "purl": "pkg:gem/ProviderDSL::GemDescription::NAME@ProviderDSL::GemDescription::VERSION" + "purl": "pkg:gem/ProviderDSL::GemDescription::NAME" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec b/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec index 1becba2bb0a..1127632356c 100644 --- a/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec +++ b/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec @@ -1 +1,37 @@ -404: Not Found \ No newline at end of file +# frozen_string_literal: true + +require_relative 'lib/faraday/version' + +Gem::Specification.new do |spec| + spec.name = 'faraday' + spec.version = Faraday::VERSION + + spec.summary = 'HTTP/REST API client library.' + + spec.authors = ['@technoweenie', '@iMacTia', '@olleolleolle'] + spec.email = 'technoweenie@gmail.com' + spec.homepage = 'https://lostisland.github.io/faraday' + spec.licenses = ['MIT'] + + spec.required_ruby_version = '>= 2.6' + + # faraday-net_http is the "default adapter", but being a Faraday dependency it can't + # control which version of faraday it will be pulled from. + # To avoid releasing a major version every time there's a new Faraday API, we should + # always fix its required version to the next MINOR version. + # This way, we can release minor versions of the adapter with "breaking" changes for older versions of Faraday + # and then bump the version requirement on the next compatible version of faraday. + spec.add_dependency 'faraday-net_http', '>= 2.0', '< 3.1' + spec.add_dependency 'ruby2_keywords', '>= 0.0.4' + + # Includes `examples` and `spec` to allow external adapter gems to run Faraday unit and integration tests + spec.files = Dir['CHANGELOG.md', '{examples,lib,spec}/**/*', 'LICENSE.md', 'Rakefile', 'README.md'] + spec.require_paths = %w[lib spec/external_adapters] + spec.metadata = { + 'homepage_uri' => 'https://lostisland.github.io/faraday', + 'changelog_uri' => + "https://github.com/lostisland/faraday/releases/tag/v#{spec.version}", + 'source_code_uri' => 'https://github.com/lostisland/faraday', + 'bug_tracker_uri' => 'https://github.com/lostisland/faraday/issues' + } +end diff --git a/tests/packagedcode/test_rubygems.py b/tests/packagedcode/test_rubygems.py index c0e25a684ab..ac89125a828 100644 --- a/tests/packagedcode/test_rubygems.py +++ b/tests/packagedcode/test_rubygems.py @@ -19,8 +19,6 @@ from packages_test_utils import PackageTester from scancode_config import REGEN_TEST_FIXTURES -REGEN_TEST_FIXTURES = False - # TODO: Add test with https://rubygems.org/gems/pbox2d/versions/1.0.3-java # this is a multiple personality package (Java and Ruby) # see also https://rubygems.org/downloads/jaro_winkler-1.5.1-java.gem @@ -29,23 +27,41 @@ class TestGemspecVersionConstant(PackageTester): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') - def test_version_constant_returns_none_for_elasticsearch(self): - test_file = self.get_test_loc('rubygems/version-constant/elasticsearch-api.gemspec') + def _check_version_constant_package(self, test_path, expected_name): + test_file = self.get_test_loc(test_path) packages = list(rubygems.GemspecHandler.parse(test_file)) assert packages pkg = packages[0] - assert pkg.name == 'elasticsearch-api' + assert pkg.name == expected_name assert pkg.version is None - assert 'Elasticsearch' not in str(pkg.version) assert pkg.download_url is None + assert pkg.api_data_url == f'https://rubygems.org/api/v1/versions/{expected_name}.json' + return pkg + + def test_version_constant_returns_none_for_elasticsearch(self): + pkg = self._check_version_constant_package( + test_path='rubygems/version-constant/elasticsearch-api.gemspec', + expected_name='elasticsearch-api', + ) + assert 'Elasticsearch' not in str(pkg.version) + + def test_version_constant_returns_none_for_excon(self): + self._check_version_constant_package( + test_path='rubygems/version-constant/excon.gemspec', + expected_name='excon', + ) + + def test_version_constant_returns_none_for_faraday(self): + self._check_version_constant_package( + test_path='rubygems/version-constant/faraday.gemspec', + expected_name='faraday', + ) def test_version_constant_returns_none_for_simple_constant(self): - test_file = self.get_test_loc('rubygems/version-constant/simple-constant.gemspec') - packages = list(rubygems.GemspecHandler.parse(test_file)) - assert packages - pkg = packages[0] - assert pkg.name == 'my-gem' - assert pkg.version is None + self._check_version_constant_package( + test_path='rubygems/version-constant/simple-constant.gemspec', + expected_name='my-gem', + ) def test_real_version_is_preserved(self): test_file = self.get_test_loc('rubygems/version-constant/simple-version.gemspec') From 09a419792201f232f8a9ac3020507d60e17ac4f7 Mon Sep 17 00:00:00 2001 From: kumarasantosh Date: Tue, 24 Mar 2026 21:15:24 +0530 Subject: [PATCH 3/7] git push origin fix/ibpp-license-detection-issue-3553 --force licenses: add IBPP License v1.1 detection - Add ibpp_ref.RULE for reference-style detection - Add ibpp_intro.RULE for header/copyright line detection - Add test data for IBPP interference with passwdqc block Signed-off-by: kumarasantosh --- src/licensedcode/data/rules/ibpp_intro.RULE | 9 +++++++ src/licensedcode/data/rules/ibpp_ref.RULE | 6 +++++ .../datadriven/lic1/wt_ibpp_interference.md | 26 ++++++++++++++++++ .../lic1/wt_ibpp_interference.md.yml | 5 ++++ .../test_plugin_license_detection.py | 27 +++++++++++++++++++ 5 files changed, 73 insertions(+) create mode 100644 src/licensedcode/data/rules/ibpp_intro.RULE create mode 100644 src/licensedcode/data/rules/ibpp_ref.RULE create mode 100644 tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md create mode 100644 tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml diff --git a/src/licensedcode/data/rules/ibpp_intro.RULE b/src/licensedcode/data/rules/ibpp_intro.RULE new file mode 100644 index 00000000000..b21ff2f76e7 --- /dev/null +++ b/src/licensedcode/data/rules/ibpp_intro.RULE @@ -0,0 +1,9 @@ +--- +license_expression: ibpp +is_license_intro: yes +relevance: 85 +--- +IBPP License v1.1 +----------------- + +(C) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) diff --git a/src/licensedcode/data/rules/ibpp_ref.RULE b/src/licensedcode/data/rules/ibpp_ref.RULE new file mode 100644 index 00000000000..33b1b92c021 --- /dev/null +++ b/src/licensedcode/data/rules/ibpp_ref.RULE @@ -0,0 +1,6 @@ +--- +license_expression: ibpp +is_license_reference: yes +relevance: 90 +--- +IBPP License, see appendix diff --git a/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md b/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md new file mode 100644 index 00000000000..e2f3d9169b4 --- /dev/null +++ b/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md @@ -0,0 +1,26 @@ +passwdqc +Copyright (c) 2000-2002 by Solar Designer +Copyright (c) 2008,2009 by Dmitry V. Levin +Redistribution and use in source and binary forms, with or without +modification, are permitted. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +| IBPP | Wt::Dbo Firebird backend | IBPP License, see appendix | Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team | + +### IBPP + +IBPP License v1.1 +----------------- + +(C) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) diff --git a/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml b/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml new file mode 100644 index 00000000000..b3f4bd6ed67 --- /dev/null +++ b/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml @@ -0,0 +1,5 @@ +notes: Minimal Wt-derived regression fixture for issue #3553, keeping the passwdqc disclaimer immediately before the IBPP reference and appendix intro. +license_expressions: + - bsd-1-clause + - ibpp + - ibpp diff --git a/tests/licensedcode/test_plugin_license_detection.py b/tests/licensedcode/test_plugin_license_detection.py index ff249bc9c58..20f75d6a3c2 100644 --- a/tests/licensedcode/test_plugin_license_detection.py +++ b/tests/licensedcode/test_plugin_license_detection.py @@ -374,3 +374,30 @@ def test_match_reference_license(): must_exist=False, ) check_json_scan(expected_loc, result_file, regen=REGEN_TEST_FIXTURES) + + +def test_wt_ibpp_interference_is_detected_in_scan_output(): + test_file = test_env.get_test_loc('datadriven/lic1/wt_ibpp_interference.md') + result_file = test_env.get_temp_file('json') + args = [ + '--license', + '--license-text', + '--license-text-diagnostics', + '--license-diagnostics', + '--strip-root', + '--json', result_file, + test_file, + ] + run_scan_click(args, processes='1') + + from commoncode.resource import VirtualCodebase + + codebase = VirtualCodebase(result_file) + resource = codebase.get_resource(path='wt_ibpp_interference.md') + + assert resource.detected_license_expression == 'bsd-1-clause AND ibpp' + assert any( + match['license_expression'] == 'ibpp' + for detection in resource.license_detections + for match in detection['matches'] + ) From 2eae344cf9f537466420fa4221ccd74b60f6d6de Mon Sep 17 00:00:00 2001 From: kumarasantosh Date: Tue, 24 Mar 2026 21:48:24 +0530 Subject: [PATCH 4/7] licenses: add IBPP License v1.1 detection [fixes #3553] Signed-off-by: kumarasantosh --- src/licensedcode/data/rules/ibpp_intro.RULE | 6 +++++ .../data/datadriven/lic1/ibpp.txt | 22 +++++++++++++++++++ .../data/datadriven/lic1/ibpp.txt.yml | 2 ++ 3 files changed, 30 insertions(+) create mode 100644 tests/licensedcode/data/datadriven/lic1/ibpp.txt create mode 100644 tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml diff --git a/src/licensedcode/data/rules/ibpp_intro.RULE b/src/licensedcode/data/rules/ibpp_intro.RULE index b21ff2f76e7..7f393ad6b2c 100644 --- a/src/licensedcode/data/rules/ibpp_intro.RULE +++ b/src/licensedcode/data/rules/ibpp_intro.RULE @@ -2,6 +2,12 @@ license_expression: ibpp is_license_intro: yes relevance: 85 +ignorable_copyrights: + - (c) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) +ignorable_holders: + - T.I.P. Group S.A. and the IBPP Team +ignorable_urls: + - http://www.ibpp.org/ --- IBPP License v1.1 ----------------- diff --git a/tests/licensedcode/data/datadriven/lic1/ibpp.txt b/tests/licensedcode/data/datadriven/lic1/ibpp.txt new file mode 100644 index 00000000000..99ef9e6b258 --- /dev/null +++ b/tests/licensedcode/data/datadriven/lic1/ibpp.txt @@ -0,0 +1,22 @@ +IBPP License v1.1 +----------------- + +(C) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) + +Permission is hereby granted, free of charge, to any person or organization +("You") obtaining a copy of this software and associated documentation files +covered by this license (the "Software") to use the Software as part of another +work; to modify it for that purpose; to publish or distribute it, modified or +not, for that same purpose; to permit persons to whom the other work using the +Software is furnished to do so; subject to the following conditions: the above +copyright notice and this complete and unmodified permission notice shall be +included in all copies or substantial portions of the Software; You will not +misrepresent modified versions of the Software as being the original. + +The Software is provided "as is", without warranty of any kind, express or +implied, including but not limited to the warranties of merchantability, +fitness for a particular purpose and noninfringement. In no event shall +the authors or copyright holders be liable for any claim, damages or other +liability, whether in an action of contract, tort or otherwise, arising from, +out of or in connection with the software or the use of other dealings in +the Software. diff --git a/tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml b/tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml new file mode 100644 index 00000000000..478f3b1a5f0 --- /dev/null +++ b/tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml @@ -0,0 +1,2 @@ +license_expressions: + - ibpp From ce55547199d42f7d0b97fc49796964d70d3b1f22 Mon Sep 17 00:00:00 2001 From: kumarasantosh Date: Wed, 25 Mar 2026 08:46:21 +0530 Subject: [PATCH 5/7] Add publiccode.yml package handler\n\nImplements a new DatafileHandler to parse publiccode.yml files.\npubliccode.yml is a metadata standard for public sector open source\nsoftware. See https://github.com/publiccodeyml/publiccode.yml\n\nResolves #2851 Signed-off-by: kumarasantosh --- src/packagedcode/__init__.py | 3 + src/packagedcode/publiccode.py | 133 ++++++++++++++++++ .../data/publiccode/publiccode.yml | 47 +++++++ tests/packagedcode/test_publiccode.py | 46 ++++++ 4 files changed, 229 insertions(+) create mode 100644 src/packagedcode/publiccode.py create mode 100644 tests/packagedcode/data/publiccode/publiccode.yml create mode 100644 tests/packagedcode/test_publiccode.py diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index d3c48b6e259..4e16aa97d4e 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -33,6 +33,7 @@ from packagedcode import opam from packagedcode import phpcomposer from packagedcode import pubspec +from packagedcode import publiccode from packagedcode import pypi from packagedcode import readme from packagedcode import rpm @@ -77,6 +78,8 @@ conda.CondaMetaYamlHandler, conda.CondaYamlHandler, + publiccode.PubliccodeYmlHandler, + conan.ConanFileHandler, conan.ConanDataHandler, diff --git a/src/packagedcode/publiccode.py b/src/packagedcode/publiccode.py new file mode 100644 index 00000000000..ba7d59fd2c8 --- /dev/null +++ b/src/packagedcode/publiccode.py @@ -0,0 +1,133 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import os + +import saneyaml + +from packagedcode import models + +""" +Handle publiccode.yml metadata files. +publiccode.yml is a metadata standard for public sector open source software. +See https://github.com/publiccodeyml/publiccode.yml +""" + +TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False) + +logger = logging.getLogger(__name__) + + +class PubliccodeYmlHandler(models.DatafileHandler): + datasource_id = 'publiccode_yml' + path_patterns = ('*/publiccode.yml', '*/publiccode.yaml') + default_package_type = 'publiccode' + default_primary_language = None + description = 'publiccode.yml metadata file' + documentation_url = 'https://github.com/publiccodeyml/publiccode.yml' + + @classmethod + def parse(cls, location, package_only=False): + with open(location, 'rb') as f: + data = saneyaml.load(f.read()) + + if not data or not isinstance(data, dict): + return + + # Validate: a publiccode.yml must have 'publiccodeYmlVersion' + if 'publiccodeYmlVersion' not in data: + return + + name = data.get('name') + version = data.get('softwareVersion') + vcs_url = data.get('url') + homepage_url = data.get('landingURL') or vcs_url + + # License is under legal.license (SPDX expression) + legal = data.get('legal') or {} + declared_license = legal.get('license') + copyright_statement = legal.get('mainCopyrightOwner') or legal.get('repoOwner') + + # Description: prefer English, fall back to first available language + description = _get_description(data) + + # Keywords from categories + categories = data.get('categories') or [] + keywords = ', '.join(categories) if categories else None + + # Parties from maintenance.contacts + parties = [] + maintenance = data.get('maintenance') or {} + for contact in maintenance.get('contacts') or []: + contact_name = contact.get('name') + contact_email = contact.get('email') + if contact_name or contact_email: + parties.append( + models.Party( + type=models.party_person, + name=contact_name, + email=contact_email, + role='maintainer', + ) + ) + + # Extra data + extra_data = {} + schema_version = data.get('publiccodeYmlVersion') + if schema_version: + extra_data['publiccodeYmlVersion'] = schema_version + platforms = data.get('platforms') + if platforms: + extra_data['platforms'] = platforms + development_status = data.get('developmentStatus') + if development_status: + extra_data['developmentStatus'] = development_status + software_type = data.get('softwareType') + if software_type: + extra_data['softwareType'] = software_type + + yield models.PackageData( + datasource_id=cls.datasource_id, + type=cls.default_package_type, + name=name, + version=version, + vcs_url=vcs_url, + homepage_url=homepage_url, + description=description, + declared_license_expression=declared_license, + copyright=copyright_statement, + keywords=keywords, + parties=parties, + extra_data=extra_data or None, + ) + + +def _get_description(data): + """ + Extract the best available description from publiccode.yml's + multilingual 'description' block. Prefer English, fall back to + any available language. Returns longDescription, else shortDescription. + """ + description_block = data.get('description') or {} + if not description_block: + return + + lang_data = ( + description_block.get('en') + or description_block.get('eng') + or next(iter(description_block.values()), None) + ) + if not lang_data: + return + + long_desc = lang_data.get('longDescription', '').strip() + short_desc = lang_data.get('shortDescription', '').strip() + + return long_desc or short_desc or None diff --git a/tests/packagedcode/data/publiccode/publiccode.yml b/tests/packagedcode/data/publiccode/publiccode.yml new file mode 100644 index 00000000000..10728af5dd6 --- /dev/null +++ b/tests/packagedcode/data/publiccode/publiccode.yml @@ -0,0 +1,47 @@ +publiccodeYmlVersion: "0.4" + +name: Medusa +url: "https://example.com/italia/medusa.git" +landingURL: "https://example.com/medusa" +softwareVersion: "1.0.3" + +platforms: + - web + - linux + +categories: + - financial-reporting + - accounting + +developmentStatus: stable +softwareType: "standalone/desktop" + +description: + en: + shortDescription: > + A short description of this software. + longDescription: > + A very long description of this software. It explains what it does, + who it is for, and why you might want to use it in a public + administration context. + features: + - Feature one + - Feature two + +legal: + license: AGPL-3.0-or-later + mainCopyrightOwner: City of Example + repoOwner: City of Example + +maintenance: + type: "contract" + contacts: + - name: Francesco Rossi + email: f.rossi@example.com + affiliation: City of Example + +localisation: + localisationReady: true + availableLanguages: + - en + - it diff --git a/tests/packagedcode/test_publiccode.py b/tests/packagedcode/test_publiccode.py new file mode 100644 index 00000000000..e4755e27132 --- /dev/null +++ b/tests/packagedcode/test_publiccode.py @@ -0,0 +1,46 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# + +import os +import pytest + +from packagedcode.publiccode import PubliccodeYmlHandler + +TESTDATA_DIR = os.path.join(os.path.dirname(__file__), 'data', 'publiccode') + + +def test_publiccode_yml_basic(): + location = os.path.join(TESTDATA_DIR, 'publiccode.yml') + packages = list(PubliccodeYmlHandler.parse(location)) + assert len(packages) == 1 + pkg = packages[0] + + assert pkg.name == 'Medusa' + assert pkg.version == '1.0.3' + assert pkg.vcs_url == 'https://example.com/italia/medusa.git' + assert pkg.homepage_url == 'https://example.com/medusa' + assert pkg.declared_license_expression == 'AGPL-3.0-or-later' + assert pkg.copyright == 'City of Example' + assert 'financial-reporting' in pkg.keywords + assert len(pkg.parties) == 1 + assert pkg.parties[0].name == 'Francesco Rossi' + assert pkg.parties[0].email == 'f.rossi@example.com' + assert pkg.parties[0].role == 'maintainer' + + +def test_publiccode_yml_no_version_key_returns_nothing(tmp_path): + """A YAML file without publiccodeYmlVersion should yield nothing.""" + f = tmp_path / 'publiccode.yml' + f.write_text('name: something\nversion: 1.0\n') + packages = list(PubliccodeYmlHandler.parse(str(f))) + assert packages == [] + + +def test_publiccode_yml_path_patterns(): + assert PubliccodeYmlHandler.path_patterns == ( + '*/publiccode.yml', + '*/publiccode.yaml', + ) From 98a1fc0b8e75b1e7b6ac8e1bf254a546cbc075d0 Mon Sep 17 00:00:00 2001 From: kumarasantosh Date: Wed, 15 Apr 2026 21:39:37 +0530 Subject: [PATCH 6/7] packagedcode: remove unrelated changes from publiccode PR Signed-off-by: kumarasantosh --- src/licensedcode/data/rules/ibpp_intro.RULE | 15 ---- src/licensedcode/data/rules/ibpp_ref.RULE | 6 -- src/packagedcode/rubygems.py | 1 - src/packagedcode/spec.py | 38 --------- .../data/datadriven/lic1/ibpp.txt | 22 ----- .../data/datadriven/lic1/ibpp.txt.yml | 2 - .../datadriven/lic1/wt_ibpp_interference.md | 26 ------ .../lic1/wt_ibpp_interference.md.yml | 5 -- .../test_plugin_license_detection.py | 27 ------ .../gemspec/github.gemspec.expected.json | 12 +-- .../oj.gemspec-package-only.expected.json | 12 +-- .../rubygems/gemspec/oj.gemspec.expected.json | 12 +-- .../gemspec/rubocop.gemspec.expected.json | 12 +-- .../with_variables.gemspec.expected.json | 12 +-- .../elasticsearch-api.gemspec | 83 ------------------- .../rubygems/version-constant/excon.gemspec | 45 ---------- .../rubygems/version-constant/faraday.gemspec | 37 --------- .../version-constant/simple-constant.gemspec | 6 -- .../version-constant/simple-version.gemspec | 6 -- tests/packagedcode/test_rubygems.py | 58 ------------- 20 files changed, 30 insertions(+), 407 deletions(-) delete mode 100644 src/licensedcode/data/rules/ibpp_intro.RULE delete mode 100644 src/licensedcode/data/rules/ibpp_ref.RULE delete mode 100644 tests/licensedcode/data/datadriven/lic1/ibpp.txt delete mode 100644 tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml delete mode 100644 tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md delete mode 100644 tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml delete mode 100644 tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec delete mode 100644 tests/packagedcode/data/rubygems/version-constant/excon.gemspec delete mode 100644 tests/packagedcode/data/rubygems/version-constant/faraday.gemspec delete mode 100644 tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec delete mode 100644 tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec diff --git a/src/licensedcode/data/rules/ibpp_intro.RULE b/src/licensedcode/data/rules/ibpp_intro.RULE deleted file mode 100644 index 7f393ad6b2c..00000000000 --- a/src/licensedcode/data/rules/ibpp_intro.RULE +++ /dev/null @@ -1,15 +0,0 @@ ---- -license_expression: ibpp -is_license_intro: yes -relevance: 85 -ignorable_copyrights: - - (c) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) -ignorable_holders: - - T.I.P. Group S.A. and the IBPP Team -ignorable_urls: - - http://www.ibpp.org/ ---- -IBPP License v1.1 ------------------ - -(C) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) diff --git a/src/licensedcode/data/rules/ibpp_ref.RULE b/src/licensedcode/data/rules/ibpp_ref.RULE deleted file mode 100644 index 33b1b92c021..00000000000 --- a/src/licensedcode/data/rules/ibpp_ref.RULE +++ /dev/null @@ -1,6 +0,0 @@ ---- -license_expression: ibpp -is_license_reference: yes -relevance: 90 ---- -IBPP License, see appendix diff --git a/src/packagedcode/rubygems.py b/src/packagedcode/rubygems.py index dc13b1560f9..e80295c48af 100644 --- a/src/packagedcode/rubygems.py +++ b/src/packagedcode/rubygems.py @@ -706,7 +706,6 @@ def party_mapper(role, names=[], emails=[]): models.Party(type=models.party_person, email=email, role=role) for email in emails ) - return () def get_parties(gem_data): diff --git a/src/packagedcode/spec.py b/src/packagedcode/spec.py index 4c4154485fd..95dfdba7dd9 100644 --- a/src/packagedcode/spec.py +++ b/src/packagedcode/spec.py @@ -133,40 +133,6 @@ def get_authors(line): } -def is_ruby_version_constant(value): - """ - Return True if value looks like a Ruby constant expression - that cannot be resolved statically, such as: - Elasticsearch::API::VERSION or MyGem::VERSION - - These are dynamic values that reference Ruby constants - and cannot be determined without executing the Ruby code. - - For example: - >>> is_ruby_version_constant('Elasticsearch::API::VERSION') - True - >>> is_ruby_version_constant('MyGem::VERSION') - True - >>> is_ruby_version_constant('1.0.0') - False - >>> is_ruby_version_constant("'2.3.4'") - False - >>> is_ruby_version_constant(None) - False - """ - if not value: - return False - # Ruby constants use :: as namespace separator - if '::' in value: - return True - # A bare constant starts with uppercase and has no dots/quotes - # e.g. VERSION (unlikely but possible) - stripped = value.strip('\'"') - if stripped and stripped[0].isupper() and '.' not in stripped: - return True - return False - - def parse_spec(location, package_type): """ Return a mapping of data parsed from a podspec/gemspec/Pofile/Gemfile file @@ -185,10 +151,6 @@ def parse_spec(location, package_type): parsed = parser(line=line) if parsed: spec_data[attribute_name] = parsed - - version = spec_data.get('version') - if is_ruby_version_constant(version): - spec_data['version'] = None # description can be in single or multi-lines # There are many different ways to write description. diff --git a/tests/licensedcode/data/datadriven/lic1/ibpp.txt b/tests/licensedcode/data/datadriven/lic1/ibpp.txt deleted file mode 100644 index 99ef9e6b258..00000000000 --- a/tests/licensedcode/data/datadriven/lic1/ibpp.txt +++ /dev/null @@ -1,22 +0,0 @@ -IBPP License v1.1 ------------------ - -(C) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) - -Permission is hereby granted, free of charge, to any person or organization -("You") obtaining a copy of this software and associated documentation files -covered by this license (the "Software") to use the Software as part of another -work; to modify it for that purpose; to publish or distribute it, modified or -not, for that same purpose; to permit persons to whom the other work using the -Software is furnished to do so; subject to the following conditions: the above -copyright notice and this complete and unmodified permission notice shall be -included in all copies or substantial portions of the Software; You will not -misrepresent modified versions of the Software as being the original. - -The Software is provided "as is", without warranty of any kind, express or -implied, including but not limited to the warranties of merchantability, -fitness for a particular purpose and noninfringement. In no event shall -the authors or copyright holders be liable for any claim, damages or other -liability, whether in an action of contract, tort or otherwise, arising from, -out of or in connection with the software or the use of other dealings in -the Software. diff --git a/tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml b/tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml deleted file mode 100644 index 478f3b1a5f0..00000000000 --- a/tests/licensedcode/data/datadriven/lic1/ibpp.txt.yml +++ /dev/null @@ -1,2 +0,0 @@ -license_expressions: - - ibpp diff --git a/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md b/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md deleted file mode 100644 index e2f3d9169b4..00000000000 --- a/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md +++ /dev/null @@ -1,26 +0,0 @@ -passwdqc -Copyright (c) 2000-2002 by Solar Designer -Copyright (c) 2008,2009 by Dmitry V. Levin -Redistribution and use in source and binary forms, with or without -modification, are permitted. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -SUCH DAMAGE. - -| IBPP | Wt::Dbo Firebird backend | IBPP License, see appendix | Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team | - -### IBPP - -IBPP License v1.1 ------------------ - -(C) Copyright 2000-2006 T.I.P. Group S.A. and the IBPP Team (www.ibpp.org) diff --git a/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml b/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml deleted file mode 100644 index b3f4bd6ed67..00000000000 --- a/tests/licensedcode/data/datadriven/lic1/wt_ibpp_interference.md.yml +++ /dev/null @@ -1,5 +0,0 @@ -notes: Minimal Wt-derived regression fixture for issue #3553, keeping the passwdqc disclaimer immediately before the IBPP reference and appendix intro. -license_expressions: - - bsd-1-clause - - ibpp - - ibpp diff --git a/tests/licensedcode/test_plugin_license_detection.py b/tests/licensedcode/test_plugin_license_detection.py index 20f75d6a3c2..ff249bc9c58 100644 --- a/tests/licensedcode/test_plugin_license_detection.py +++ b/tests/licensedcode/test_plugin_license_detection.py @@ -374,30 +374,3 @@ def test_match_reference_license(): must_exist=False, ) check_json_scan(expected_loc, result_file, regen=REGEN_TEST_FIXTURES) - - -def test_wt_ibpp_interference_is_detected_in_scan_output(): - test_file = test_env.get_test_loc('datadriven/lic1/wt_ibpp_interference.md') - result_file = test_env.get_temp_file('json') - args = [ - '--license', - '--license-text', - '--license-text-diagnostics', - '--license-diagnostics', - '--strip-root', - '--json', result_file, - test_file, - ] - run_scan_click(args, processes='1') - - from commoncode.resource import VirtualCodebase - - codebase = VirtualCodebase(result_file) - resource = codebase.get_resource(path='wt_ibpp_interference.md') - - assert resource.detected_license_expression == 'bsd-1-clause AND ibpp' - assert any( - match['license_expression'] == 'ibpp' - for detection in resource.license_detections - for match in detection['matches'] - ) diff --git a/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json index 9e64c87ef14..51a94b6f65f 100644 --- a/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/github.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "github", - "version": null, + "version": "GitHub::VERSION", "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -48,7 +48,7 @@ ], "keywords": [], "homepage_url": "https://github.com/defunkt/github-gem", - "download_url": null, + "download_url": "https://rubygems.org/downloads/github-GitHub::VERSION.gem", "size": null, "sha1": null, "md5": null, @@ -162,10 +162,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/github", - "repository_download_url": null, - "api_data_url": "https://rubygems.org/api/v1/versions/github.json", + "repository_homepage_url": "https://rubygems.org/gems/github/versions/GitHub::VERSION", + "repository_download_url": "https://rubygems.org/downloads/github-GitHub::VERSION.gem", + "api_data_url": "https://rubygems.org/api/v2/rubygems/github/versions/GitHub::VERSION.json", "datasource_id": "gemspec", - "purl": "pkg:gem/github" + "purl": "pkg:gem/github@GitHub::VERSION" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json index 2094705b872..41c2396b66f 100644 --- a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec-package-only.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "oj", - "version": null, + "version": "::Oj::VERSION", "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -27,7 +27,7 @@ ], "keywords": [], "homepage_url": "http://www.ohler.com/oj", - "download_url": null, + "download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", "size": null, "sha1": null, "md5": null, @@ -97,10 +97,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/oj", - "repository_download_url": null, - "api_data_url": "https://rubygems.org/api/v1/versions/oj.json", + "repository_homepage_url": "https://rubygems.org/gems/oj/versions/::Oj::VERSION", + "repository_download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", + "api_data_url": "https://rubygems.org/api/v2/rubygems/oj/versions/::Oj::VERSION.json", "datasource_id": "gemspec", - "purl": "pkg:gem/oj" + "purl": "pkg:gem/oj@::Oj::VERSION" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json index 2094705b872..41c2396b66f 100644 --- a/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/oj.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "oj", - "version": null, + "version": "::Oj::VERSION", "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -27,7 +27,7 @@ ], "keywords": [], "homepage_url": "http://www.ohler.com/oj", - "download_url": null, + "download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", "size": null, "sha1": null, "md5": null, @@ -97,10 +97,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/oj", - "repository_download_url": null, - "api_data_url": "https://rubygems.org/api/v1/versions/oj.json", + "repository_homepage_url": "https://rubygems.org/gems/oj/versions/::Oj::VERSION", + "repository_download_url": "https://rubygems.org/downloads/oj-::Oj::VERSION.gem", + "api_data_url": "https://rubygems.org/api/v2/rubygems/oj/versions/::Oj::VERSION.json", "datasource_id": "gemspec", - "purl": "pkg:gem/oj" + "purl": "pkg:gem/oj@::Oj::VERSION" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json index c47e41a3c7a..0c9b94323f8 100644 --- a/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/rubocop.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "rubocop", - "version": null, + "version": "RuboCop::Version::STRING", "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -41,7 +41,7 @@ ], "keywords": [], "homepage_url": "https://github.com/rubocop-hq/rubocop", - "download_url": null, + "download_url": "https://rubygems.org/downloads/rubocop-RuboCop::Version::STRING.gem", "size": null, "sha1": null, "md5": null, @@ -166,10 +166,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/rubocop", - "repository_download_url": null, - "api_data_url": "https://rubygems.org/api/v1/versions/rubocop.json", + "repository_homepage_url": "https://rubygems.org/gems/rubocop/versions/RuboCop::Version::STRING", + "repository_download_url": "https://rubygems.org/downloads/rubocop-RuboCop::Version::STRING.gem", + "api_data_url": "https://rubygems.org/api/v2/rubygems/rubocop/versions/RuboCop::Version::STRING.json", "datasource_id": "gemspec", - "purl": "pkg:gem/rubocop" + "purl": "pkg:gem/rubocop@RuboCop::Version::STRING" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json b/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json index d402d106eb9..9d706386763 100644 --- a/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json +++ b/tests/packagedcode/data/rubygems/gemspec/with_variables.gemspec.expected.json @@ -3,7 +3,7 @@ "type": "gem", "namespace": null, "name": "ProviderDSL::GemDescription::NAME", - "version": null, + "version": "ProviderDSL::GemDescription::VERSION", "qualifiers": {}, "subpath": null, "primary_language": "Ruby", @@ -27,7 +27,7 @@ ], "keywords": [], "homepage_url": "ProviderDSL::GemDescription::PAGE", - "download_url": null, + "download_url": "https://rubygems.org/downloads/ProviderDSL::GemDescription::NAME-ProviderDSL::GemDescription::VERSION.gem", "size": null, "sha1": null, "md5": null, @@ -152,10 +152,10 @@ "extra_data": {} } ], - "repository_homepage_url": "https://rubygems.org/gems/ProviderDSL::GemDescription::NAME", - "repository_download_url": null, - "api_data_url": "https://rubygems.org/api/v1/versions/ProviderDSL::GemDescription::NAME.json", + "repository_homepage_url": "https://rubygems.org/gems/ProviderDSL::GemDescription::NAME/versions/ProviderDSL::GemDescription::VERSION", + "repository_download_url": "https://rubygems.org/downloads/ProviderDSL::GemDescription::NAME-ProviderDSL::GemDescription::VERSION.gem", + "api_data_url": "https://rubygems.org/api/v2/rubygems/ProviderDSL::GemDescription::NAME/versions/ProviderDSL::GemDescription::VERSION.json", "datasource_id": "gemspec", - "purl": "pkg:gem/ProviderDSL::GemDescription::NAME" + "purl": "pkg:gem/ProviderDSL::GemDescription::NAME@ProviderDSL::GemDescription::VERSION" } ] \ No newline at end of file diff --git a/tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec b/tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec deleted file mode 100644 index 100a7d6e705..00000000000 --- a/tests/packagedcode/data/rubygems/version-constant/elasticsearch-api.gemspec +++ /dev/null @@ -1,83 +0,0 @@ -# Licensed to Elasticsearch B.V. under one or more contributor -# license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright -# ownership. Elasticsearch B.V. licenses this file to you under -# the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# coding: utf-8 -lib = File.expand_path('../lib', __FILE__) -$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) -require 'elasticsearch/api/version' - -Gem::Specification.new do |s| - s.name = 'elasticsearch-api' - s.version = Elasticsearch::API::VERSION - s.authors = ['Karel Minarik'] - s.email = ['karel.minarik@elasticsearch.org'] - s.summary = 'Ruby API for Elasticsearch.' - s.homepage = 'https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/index.html' - s.license = 'Apache-2.0' - s.metadata = { - 'homepage_uri' => 'https://www.elastic.co/guide/en/elasticsearch/client/ruby-api/current/index.html', - 'changelog_uri' => 'https://github.com/elastic/elasticsearch-ruby/blob/main/CHANGELOG.md', - 'source_code_uri' => 'https://github.com/elastic/elasticsearch-ruby/tree/main/elasticsearch-api', - 'bug_tracker_uri' => 'https://github.com/elastic/elasticsearch-ruby/issues' - } - s.files = `git ls-files`.split($/) - s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) } - s.test_files = s.files.grep(%r{^(test|spec|features)/}) - s.require_paths = ['lib'] - - s.extra_rdoc_files = ['README.md', 'LICENSE.txt'] - s.rdoc_options = ['--charset=UTF-8'] - - s.required_ruby_version = '>= 2.5' - - s.add_dependency 'multi_json' - - s.add_development_dependency 'ansi' - s.add_development_dependency 'bundler' - s.add_development_dependency 'elasticsearch' - s.add_development_dependency 'minitest' - s.add_development_dependency 'minitest-reporters' - s.add_development_dependency 'mocha' - s.add_development_dependency 'pry' - s.add_development_dependency 'rake' - s.add_development_dependency 'shoulda-context' - s.add_development_dependency 'yard' - - # Gems for testing integrations - s.add_development_dependency 'jsonify' - s.add_development_dependency 'hashie' - # Temporary support for Ruby 2.6, since it's EOL March 2022: - if RUBY_VERSION < '2.7.0' - s.add_development_dependency 'jbuilder', '< 7.0.0' - else - s.add_development_dependency 'activesupport' - s.add_development_dependency 'jbuilder' - end - - s.add_development_dependency 'cane' - s.add_development_dependency 'escape_utils' unless defined? JRUBY_VERSION - - s.add_development_dependency 'require-prof' unless defined?(JRUBY_VERSION) || defined?(Rubinius) - s.add_development_dependency 'ruby-prof' unless defined?(JRUBY_VERSION) || defined?(Rubinius) - s.add_development_dependency 'simplecov' - - s.add_development_dependency 'test-unit', '~> 2' - - s.description = <<-DESC.gsub(/^ /, '') - Ruby API for Elasticsearch. See the `elasticsearch` gem for full integration. - DESC -end diff --git a/tests/packagedcode/data/rubygems/version-constant/excon.gemspec b/tests/packagedcode/data/rubygems/version-constant/excon.gemspec deleted file mode 100644 index 161eb9bc94d..00000000000 --- a/tests/packagedcode/data/rubygems/version-constant/excon.gemspec +++ /dev/null @@ -1,45 +0,0 @@ -$LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib') -require 'excon/version' - -Gem::Specification.new do |s| - s.name = 'excon' - s.version = Excon::VERSION - s.summary = "speed, persistence, http(s)" - s.description = "EXtended http(s) CONnections" - s.authors = ["dpiddy (Dan Peterson)", "geemus (Wesley Beary)", "nextmat (Matt Sanders)"] - s.email = 'geemus@gmail.com' - s.homepage = 'https://github.com/excon/excon' - s.license = 'MIT' - s.rdoc_options = ["--charset=UTF-8"] - s.extra_rdoc_files = %w[README.md CONTRIBUTORS.md CONTRIBUTING.md] - s.files = `git ls-files -- data/* lib/*`.split("\n") + [ - "CONTRIBUTING.md", - "CONTRIBUTORS.md", - "LICENSE.md", - "README.md", - "excon.gemspec" - ] - - s.add_development_dependency('rspec', '>= 3.5.0') - s.add_development_dependency('activesupport') - s.add_development_dependency('delorean') - s.add_development_dependency('eventmachine', '>= 1.0.4') - s.add_development_dependency('open4') - s.add_development_dependency('rake') - s.add_development_dependency('rdoc') - s.add_development_dependency('shindo') - s.add_development_dependency('sinatra') - s.add_development_dependency('sinatra-contrib') - s.add_development_dependency('json', '>= 1.8.5') - s.add_development_dependency('puma') - s.add_development_dependency('webrick') - - s.metadata = { - 'homepage_uri' => 'https://github.com/excon/excon', - 'bug_tracker_uri' => 'https://github.com/excon/excon/issues', - 'changelog_uri' => 'https://github.com/excon/excon/blob/master/changelog.txt', - 'documentation_uri' => 'https://github.com/excon/excon/blob/master/README.md', - 'source_code_uri' => 'https://github.com/excon/excon', - 'wiki_uri' => 'https://github.com/excon/excon/wiki' - } -end diff --git a/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec b/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec deleted file mode 100644 index 1127632356c..00000000000 --- a/tests/packagedcode/data/rubygems/version-constant/faraday.gemspec +++ /dev/null @@ -1,37 +0,0 @@ -# frozen_string_literal: true - -require_relative 'lib/faraday/version' - -Gem::Specification.new do |spec| - spec.name = 'faraday' - spec.version = Faraday::VERSION - - spec.summary = 'HTTP/REST API client library.' - - spec.authors = ['@technoweenie', '@iMacTia', '@olleolleolle'] - spec.email = 'technoweenie@gmail.com' - spec.homepage = 'https://lostisland.github.io/faraday' - spec.licenses = ['MIT'] - - spec.required_ruby_version = '>= 2.6' - - # faraday-net_http is the "default adapter", but being a Faraday dependency it can't - # control which version of faraday it will be pulled from. - # To avoid releasing a major version every time there's a new Faraday API, we should - # always fix its required version to the next MINOR version. - # This way, we can release minor versions of the adapter with "breaking" changes for older versions of Faraday - # and then bump the version requirement on the next compatible version of faraday. - spec.add_dependency 'faraday-net_http', '>= 2.0', '< 3.1' - spec.add_dependency 'ruby2_keywords', '>= 0.0.4' - - # Includes `examples` and `spec` to allow external adapter gems to run Faraday unit and integration tests - spec.files = Dir['CHANGELOG.md', '{examples,lib,spec}/**/*', 'LICENSE.md', 'Rakefile', 'README.md'] - spec.require_paths = %w[lib spec/external_adapters] - spec.metadata = { - 'homepage_uri' => 'https://lostisland.github.io/faraday', - 'changelog_uri' => - "https://github.com/lostisland/faraday/releases/tag/v#{spec.version}", - 'source_code_uri' => 'https://github.com/lostisland/faraday', - 'bug_tracker_uri' => 'https://github.com/lostisland/faraday/issues' - } -end diff --git a/tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec b/tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec deleted file mode 100644 index e885a9453cd..00000000000 --- a/tests/packagedcode/data/rubygems/version-constant/simple-constant.gemspec +++ /dev/null @@ -1,6 +0,0 @@ -Gem::Specification.new do |s| - s.name = 'my-gem' - s.version = MyGem::VERSION - s.summary = 'A gem with a version constant' - s.license = 'MIT' -end diff --git a/tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec b/tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec deleted file mode 100644 index 3fb26f707c7..00000000000 --- a/tests/packagedcode/data/rubygems/version-constant/simple-version.gemspec +++ /dev/null @@ -1,6 +0,0 @@ -Gem::Specification.new do |s| - s.name = 'my-gem' - s.version = '1.2.3' - s.summary = 'A gem with a real version' - s.license = 'MIT' -end diff --git a/tests/packagedcode/test_rubygems.py b/tests/packagedcode/test_rubygems.py index ac89125a828..af55f355ab7 100644 --- a/tests/packagedcode/test_rubygems.py +++ b/tests/packagedcode/test_rubygems.py @@ -15,7 +15,6 @@ from commoncode.testcase import FileBasedTesting from packagedcode import rubygems -from packagedcode import spec from packages_test_utils import PackageTester from scancode_config import REGEN_TEST_FIXTURES @@ -24,63 +23,6 @@ # see also https://rubygems.org/downloads/jaro_winkler-1.5.1-java.gem -class TestGemspecVersionConstant(PackageTester): - test_data_dir = os.path.join(os.path.dirname(__file__), 'data') - - def _check_version_constant_package(self, test_path, expected_name): - test_file = self.get_test_loc(test_path) - packages = list(rubygems.GemspecHandler.parse(test_file)) - assert packages - pkg = packages[0] - assert pkg.name == expected_name - assert pkg.version is None - assert pkg.download_url is None - assert pkg.api_data_url == f'https://rubygems.org/api/v1/versions/{expected_name}.json' - return pkg - - def test_version_constant_returns_none_for_elasticsearch(self): - pkg = self._check_version_constant_package( - test_path='rubygems/version-constant/elasticsearch-api.gemspec', - expected_name='elasticsearch-api', - ) - assert 'Elasticsearch' not in str(pkg.version) - - def test_version_constant_returns_none_for_excon(self): - self._check_version_constant_package( - test_path='rubygems/version-constant/excon.gemspec', - expected_name='excon', - ) - - def test_version_constant_returns_none_for_faraday(self): - self._check_version_constant_package( - test_path='rubygems/version-constant/faraday.gemspec', - expected_name='faraday', - ) - - def test_version_constant_returns_none_for_simple_constant(self): - self._check_version_constant_package( - test_path='rubygems/version-constant/simple-constant.gemspec', - expected_name='my-gem', - ) - - def test_real_version_is_preserved(self): - test_file = self.get_test_loc('rubygems/version-constant/simple-version.gemspec') - packages = list(rubygems.GemspecHandler.parse(test_file)) - assert packages - pkg = packages[0] - assert pkg.name == 'my-gem' - assert pkg.version == '1.2.3' - - def test_is_ruby_version_constant_function(self): - assert spec.is_ruby_version_constant('Elasticsearch::API::VERSION') is True - assert spec.is_ruby_version_constant('MyGem::VERSION') is True - assert spec.is_ruby_version_constant('Faraday::VERSION') is True - assert spec.is_ruby_version_constant('1.0.0') is False - assert spec.is_ruby_version_constant("'2.3.4'") is False - assert spec.is_ruby_version_constant(None) is False - assert spec.is_ruby_version_constant('') is False - - class TestGemSpec(PackageTester): test_data_dir = os.path.join(os.path.dirname(__file__), 'data') From 1229323d3059cbcc04c44eebebe4e20b13660d8a Mon Sep 17 00:00:00 2001 From: kumarasantosh Date: Wed, 15 Apr 2026 21:42:22 +0530 Subject: [PATCH 7/7] packagedcode: fix publiccode license extraction Signed-off-by: kumarasantosh --- src/packagedcode/publiccode.py | 174 +++++++++------- .../data/publiccode/publiccode.yml | 2 + .../publiccode/publiccode.yml-expected.json | 88 ++++++++ .../publiccode/publiccode.yml-scancode.json | 193 ++++++++++++++++++ tests/packagedcode/test_publiccode.py | 78 ++++--- 5 files changed, 427 insertions(+), 108 deletions(-) create mode 100644 tests/packagedcode/data/publiccode/publiccode.yml-expected.json create mode 100644 tests/packagedcode/data/publiccode/publiccode.yml-scancode.json diff --git a/src/packagedcode/publiccode.py b/src/packagedcode/publiccode.py index ba7d59fd2c8..5a1d299da85 100644 --- a/src/packagedcode/publiccode.py +++ b/src/packagedcode/publiccode.py @@ -7,8 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import logging -import os +import io import saneyaml @@ -20,14 +19,17 @@ See https://github.com/publiccodeyml/publiccode.yml """ -TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False) - -logger = logging.getLogger(__name__) +EXTRA_DATA_KEYS = ( + 'publiccodeYmlVersion', + 'platforms', + 'developmentStatus', + 'softwareType', +) class PubliccodeYmlHandler(models.DatafileHandler): datasource_id = 'publiccode_yml' - path_patterns = ('*/publiccode.yml', '*/publiccode.yaml') + path_patterns = ('*publiccode.yml', '*publiccode.yaml') default_package_type = 'publiccode' default_primary_language = None description = 'publiccode.yml metadata file' @@ -35,81 +37,34 @@ class PubliccodeYmlHandler(models.DatafileHandler): @classmethod def parse(cls, location, package_only=False): - with open(location, 'rb') as f: - data = saneyaml.load(f.read()) - - if not data or not isinstance(data, dict): - return + with io.open(location, encoding='utf-8') as loc: + data = saneyaml.load(loc.read()) - # Validate: a publiccode.yml must have 'publiccodeYmlVersion' - if 'publiccodeYmlVersion' not in data: + if not is_publiccode_yml_data(data): return - name = data.get('name') - version = data.get('softwareVersion') - vcs_url = data.get('url') - homepage_url = data.get('landingURL') or vcs_url - - # License is under legal.license (SPDX expression) - legal = data.get('legal') or {} - declared_license = legal.get('license') - copyright_statement = legal.get('mainCopyrightOwner') or legal.get('repoOwner') - - # Description: prefer English, fall back to first available language - description = _get_description(data) - - # Keywords from categories - categories = data.get('categories') or [] - keywords = ', '.join(categories) if categories else None - - # Parties from maintenance.contacts - parties = [] - maintenance = data.get('maintenance') or {} - for contact in maintenance.get('contacts') or []: - contact_name = contact.get('name') - contact_email = contact.get('email') - if contact_name or contact_email: - parties.append( - models.Party( - type=models.party_person, - name=contact_name, - email=contact_email, - role='maintainer', - ) - ) - - # Extra data - extra_data = {} - schema_version = data.get('publiccodeYmlVersion') - if schema_version: - extra_data['publiccodeYmlVersion'] = schema_version - platforms = data.get('platforms') - if platforms: - extra_data['platforms'] = platforms - development_status = data.get('developmentStatus') - if development_status: - extra_data['developmentStatus'] = development_status - software_type = data.get('softwareType') - if software_type: - extra_data['softwareType'] = software_type - - yield models.PackageData( + package_data = dict( datasource_id=cls.datasource_id, type=cls.default_package_type, - name=name, - version=version, - vcs_url=vcs_url, - homepage_url=homepage_url, - description=description, - declared_license_expression=declared_license, - copyright=copyright_statement, - keywords=keywords, - parties=parties, - extra_data=extra_data or None, + name=data.get('name'), + version=data.get('softwareVersion'), + vcs_url=data.get('url'), + homepage_url=data.get('landingURL') or data.get('url'), + description=get_description(data), + extracted_license_statement=get_extracted_license_statement(data), + copyright=get_copyright_statement(data), + keywords=get_categories(data), + parties=get_parties(data), + extra_data=get_extra_data(data) or None, ) + yield models.PackageData.from_data(package_data, package_only) + +def is_publiccode_yml_data(data): + return isinstance(data, dict) and 'publiccodeYmlVersion' in data -def _get_description(data): + +def get_description(data): """ Extract the best available description from publiccode.yml's multilingual 'description' block. Prefer English, fall back to @@ -119,11 +74,16 @@ def _get_description(data): if not description_block: return - lang_data = ( - description_block.get('en') - or description_block.get('eng') - or next(iter(description_block.values()), None) - ) + lang_data = None + for language, localized_description in description_block.items(): + primary_language = language.lower().split('-')[0] + if primary_language == 'en': + lang_data = localized_description + break + + if not lang_data: + lang_data = next(iter(description_block.values()), None) + if not lang_data: return @@ -131,3 +91,61 @@ def _get_description(data): short_desc = lang_data.get('shortDescription', '').strip() return long_desc or short_desc or None + + +def get_extracted_license_statement(data): + legal = data.get('legal') or {} + return legal.get('license') + + +def get_copyright_statement(data): + legal = data.get('legal') or {} + copyright_holders = [] + + for key in ('mainCopyrightOwner', 'repoOwner'): + value = legal.get(key) + if value and value not in copyright_holders: + copyright_holders.append(value) + + return '\n'.join(copyright_holders) or None + + +def get_categories(data): + categories = data.get('categories') or [] + if isinstance(categories, str): + return [categories] + return categories + + +def get_parties(data): + parties = [] + maintenance = data.get('maintenance') or {} + + for contact in maintenance.get('contacts') or []: + contact_name = contact.get('name') + contact_email = contact.get('email') + + if not (contact_name or contact_email): + continue + + parties.append( + models.Party( + type=models.party_person, + name=contact_name, + email=contact_email, + role='maintainer', + ) + ) + + return parties + + +def get_extra_data(data): + extra_data = {} + + for key in EXTRA_DATA_KEYS: + value = data.get(key) + if value: + extra_data[key] = value + + return extra_data diff --git a/tests/packagedcode/data/publiccode/publiccode.yml b/tests/packagedcode/data/publiccode/publiccode.yml index 10728af5dd6..d7b7a28e768 100644 --- a/tests/packagedcode/data/publiccode/publiccode.yml +++ b/tests/packagedcode/data/publiccode/publiccode.yml @@ -1,3 +1,5 @@ +# Hand-crafted publiccode.yml test fixture based on examples from: +# https://github.com/publiccodeyml/publiccode.yml/blob/main/docs/standard/schema.core.rst publiccodeYmlVersion: "0.4" name: Medusa diff --git a/tests/packagedcode/data/publiccode/publiccode.yml-expected.json b/tests/packagedcode/data/publiccode/publiccode.yml-expected.json new file mode 100644 index 00000000000..f1c4811efda --- /dev/null +++ b/tests/packagedcode/data/publiccode/publiccode.yml-expected.json @@ -0,0 +1,88 @@ +[ + { + "type": "publiccode", + "namespace": null, + "name": "Medusa", + "version": "1.0.3", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": "A very long description of this software. It explains what it does, who it is for, and why you might want to use it in a public administration context.", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "maintainer", + "name": "Francesco Rossi", + "email": "f.rossi@example.com", + "url": null + } + ], + "keywords": [ + "financial-reporting", + "accounting" + ], + "homepage_url": "https://example.com/medusa", + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "https://example.com/italia/medusa.git", + "copyright": "City of Example", + "holder": "City of Example", + "declared_license_expression": "agpl-3.0-plus", + "declared_license_expression_spdx": "AGPL-3.0-or-later", + "license_detections": [ + { + "license_expression": "agpl-3.0-plus", + "license_expression_spdx": "AGPL-3.0-or-later", + "matches": [ + { + "license_expression": "agpl-3.0-plus", + "license_expression_spdx": "AGPL-3.0-or-later", + "from_file": null, + "start_line": 1, + "end_line": 1, + "matcher": "1-hash", + "score": 100.0, + "matched_length": 5, + "match_coverage": 100.0, + "rule_relevance": 100, + "rule_identifier": "spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE", + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE", + "matched_text": "AGPL-3.0-or-later" + } + ], + "identifier": "agpl_3_0_plus-a0f62d44-7e99-852b-0b1c-0bc5e1c9f6d0" + } + ], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": "AGPL-3.0-or-later", + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": false, + "is_virtual": false, + "extra_data": { + "publiccodeYmlVersion": "0.4", + "platforms": [ + "web", + "linux" + ], + "developmentStatus": "stable", + "softwareType": "standalone/desktop" + }, + "dependencies": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "datasource_id": "publiccode_yml", + "purl": "pkg:publiccode/Medusa@1.0.3" + } +] \ No newline at end of file diff --git a/tests/packagedcode/data/publiccode/publiccode.yml-scancode.json b/tests/packagedcode/data/publiccode/publiccode.yml-scancode.json new file mode 100644 index 00000000000..b5c9f8359e0 --- /dev/null +++ b/tests/packagedcode/data/publiccode/publiccode.yml-scancode.json @@ -0,0 +1,193 @@ +{ + "packages": [ + { + "type": "publiccode", + "namespace": null, + "name": "Medusa", + "version": "1.0.3", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": "A very long description of this software. It explains what it does, who it is for, and why you might want to use it in a public administration context.", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "maintainer", + "name": "Francesco Rossi", + "email": "f.rossi@example.com", + "url": null + } + ], + "keywords": [ + "financial-reporting", + "accounting" + ], + "homepage_url": "https://example.com/medusa", + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "https://example.com/italia/medusa.git", + "copyright": "City of Example", + "holder": "City of Example", + "declared_license_expression": "agpl-3.0-plus", + "declared_license_expression_spdx": "AGPL-3.0-or-later", + "license_detections": [ + { + "license_expression": "agpl-3.0-plus", + "license_expression_spdx": "AGPL-3.0-or-later", + "matches": [ + { + "license_expression": "agpl-3.0-plus", + "license_expression_spdx": "AGPL-3.0-or-later", + "from_file": "publiccode.yml", + "start_line": 1, + "end_line": 1, + "matcher": "1-hash", + "score": 100.0, + "matched_length": 5, + "match_coverage": 100.0, + "rule_relevance": 100, + "rule_identifier": "spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE", + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE", + "matched_text": "AGPL-3.0-or-later" + } + ], + "identifier": "agpl_3_0_plus-a0f62d44-7e99-852b-0b1c-0bc5e1c9f6d0" + } + ], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": "AGPL-3.0-or-later", + "notice_text": null, + "source_packages": [], + "is_private": false, + "is_virtual": false, + "extra_data": { + "publiccodeYmlVersion": "0.4", + "platforms": [ + "web", + "linux" + ], + "developmentStatus": "stable", + "softwareType": "standalone/desktop" + }, + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "package_uid": "pkg:publiccode/Medusa@1.0.3?uuid=fixed-uid-done-for-testing-5642512d1758", + "datafile_paths": [ + "publiccode.yml" + ], + "datasource_ids": [ + "publiccode_yml" + ], + "purl": "pkg:publiccode/Medusa@1.0.3" + } + ], + "dependencies": [], + "files": [ + { + "path": "publiccode.yml", + "type": "file", + "package_data": [ + { + "type": "publiccode", + "namespace": null, + "name": "Medusa", + "version": "1.0.3", + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": "A very long description of this software. It explains what it does, who it is for, and why you might want to use it in a public administration context.", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "maintainer", + "name": "Francesco Rossi", + "email": "f.rossi@example.com", + "url": null + } + ], + "keywords": [ + "financial-reporting", + "accounting" + ], + "homepage_url": "https://example.com/medusa", + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "https://example.com/italia/medusa.git", + "copyright": "City of Example", + "holder": "City of Example", + "declared_license_expression": "agpl-3.0-plus", + "declared_license_expression_spdx": "AGPL-3.0-or-later", + "license_detections": [ + { + "license_expression": "agpl-3.0-plus", + "license_expression_spdx": "AGPL-3.0-or-later", + "matches": [ + { + "license_expression": "agpl-3.0-plus", + "license_expression_spdx": "AGPL-3.0-or-later", + "from_file": "publiccode.yml", + "start_line": 1, + "end_line": 1, + "matcher": "1-hash", + "score": 100.0, + "matched_length": 5, + "match_coverage": 100.0, + "rule_relevance": 100, + "rule_identifier": "spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE", + "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_agpl-3.0-or-later_for_agpl-3.0-plus.RULE", + "matched_text": "AGPL-3.0-or-later" + } + ], + "identifier": "agpl_3_0_plus-a0f62d44-7e99-852b-0b1c-0bc5e1c9f6d0" + } + ], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": "AGPL-3.0-or-later", + "notice_text": null, + "source_packages": [], + "file_references": [], + "is_private": false, + "is_virtual": false, + "extra_data": { + "publiccodeYmlVersion": "0.4", + "platforms": [ + "web", + "linux" + ], + "developmentStatus": "stable", + "softwareType": "standalone/desktop" + }, + "dependencies": [], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "datasource_id": "publiccode_yml", + "purl": "pkg:publiccode/Medusa@1.0.3" + } + ], + "for_packages": [ + "pkg:publiccode/Medusa@1.0.3?uuid=fixed-uid-done-for-testing-5642512d1758" + ], + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/tests/packagedcode/test_publiccode.py b/tests/packagedcode/test_publiccode.py index e4755e27132..c1b09a3b9fd 100644 --- a/tests/packagedcode/test_publiccode.py +++ b/tests/packagedcode/test_publiccode.py @@ -5,42 +5,60 @@ # import os -import pytest -from packagedcode.publiccode import PubliccodeYmlHandler +from packagedcode import publiccode +from packages_test_utils import PackageTester +from scancode.cli_test_utils import check_json_scan +from scancode.cli_test_utils import run_scan_click +from scancode_config import REGEN_TEST_FIXTURES -TESTDATA_DIR = os.path.join(os.path.dirname(__file__), 'data', 'publiccode') +class TestPubliccode(PackageTester): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') -def test_publiccode_yml_basic(): - location = os.path.join(TESTDATA_DIR, 'publiccode.yml') - packages = list(PubliccodeYmlHandler.parse(location)) - assert len(packages) == 1 - pkg = packages[0] + def test_publiccode_yml_is_datafile(self): + test_file = self.get_test_loc('publiccode/publiccode.yml') + assert publiccode.PubliccodeYmlHandler.is_datafile(test_file) - assert pkg.name == 'Medusa' - assert pkg.version == '1.0.3' - assert pkg.vcs_url == 'https://example.com/italia/medusa.git' - assert pkg.homepage_url == 'https://example.com/medusa' - assert pkg.declared_license_expression == 'AGPL-3.0-or-later' - assert pkg.copyright == 'City of Example' - assert 'financial-reporting' in pkg.keywords - assert len(pkg.parties) == 1 - assert pkg.parties[0].name == 'Francesco Rossi' - assert pkg.parties[0].email == 'f.rossi@example.com' - assert pkg.parties[0].role == 'maintainer' + def test_parse_publiccode_yml(self): + test_file = self.get_test_loc('publiccode/publiccode.yml') + packages = publiccode.PubliccodeYmlHandler.parse(test_file) + expected_loc = self.get_test_loc( + 'publiccode/publiccode.yml-expected.json', + must_exist=False, + ) + self.check_packages_data( + packages_data=packages, + expected_loc=expected_loc, + must_exist=False, + regen=REGEN_TEST_FIXTURES, + ) + def test_scan_cli_works(self): + test_file = self.get_test_loc('publiccode/publiccode.yml') + expected_file = self.get_test_loc( + 'publiccode/publiccode.yml-scancode.json', + must_exist=False, + ) + result_file = self.get_temp_file('results.json') + run_scan_click(['--package', test_file, '--json', result_file]) + check_json_scan( + expected_file=expected_file, + result_file=result_file, + remove_uuid=True, + regen=REGEN_TEST_FIXTURES, + ) -def test_publiccode_yml_no_version_key_returns_nothing(tmp_path): - """A YAML file without publiccodeYmlVersion should yield nothing.""" - f = tmp_path / 'publiccode.yml' - f.write_text('name: something\nversion: 1.0\n') - packages = list(PubliccodeYmlHandler.parse(str(f))) - assert packages == [] + def test_publiccode_yml_no_version_key_returns_nothing(self): + test_file = self.get_temp_file(extension='yml', file_name='publiccode') + with open(test_file, 'w') as temp_file: + temp_file.write('name: something\nversion: 1.0\n') + packages = list(publiccode.PubliccodeYmlHandler.parse(test_file)) + assert packages == [] -def test_publiccode_yml_path_patterns(): - assert PubliccodeYmlHandler.path_patterns == ( - '*/publiccode.yml', - '*/publiccode.yaml', - ) + def test_publiccode_yml_path_patterns(self): + assert publiccode.PubliccodeYmlHandler.path_patterns == ( + '*publiccode.yml', + '*publiccode.yaml', + )