Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2ec6013

Browse files
committed
Merge branch '4190-license-licence' into improve-required
2 parents cd8389c + 1ff605e commit 2ec6013

File tree

10 files changed

+237
-59
lines changed

10 files changed

+237
-59
lines changed

src/licensedcode/detection.py

+3-13
Original file line numberDiff line numberDiff line change
@@ -338,20 +338,10 @@ def identifier_with_expression(self):
338338
def is_unknown(self):
339339
"""
340340
Return True if there are unknown license keys in the license expression
341-
for this detection, return False otherwise.
341+
for this detection, return False otherwise. By design these are licenses with "unknown" in
342+
their key.
342343
"""
343-
unknown_license_keys = [
344-
"unknown-license-reference",
345-
"unknown-spdx",
346-
"unknown",
347-
"free-unknown"
348-
]
349-
350-
for license_key in unknown_license_keys:
351-
if license_key in self.license_expression:
352-
return True
353-
354-
return False
344+
return 'unknown' in self.license_expression
355345

356346
def get_start_end_line(self):
357347
"""

src/licensedcode/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2338,7 +2338,7 @@ def _set_continuous(self):
23382338

23392339
def build_required_phrase_spans(self):
23402340
"""
2341-
Return a list of Spans marking key phrases token positions of that must
2341+
Return a list of Spans marking required phrases token positions of that must
23422342
be present for this rule to be matched.
23432343
"""
23442344
if self.is_from_license:

src/packagedcode/alpine.py

+61-22
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99
import base64
1010
import codecs
11+
import dataclasses
1112
import email
1213
import posixpath
1314
import re
@@ -1362,32 +1363,76 @@ def source_handler(value, **kwargs):
13621363
}
13631364

13641365

1365-
def detect_declared_license(declared):
1366+
@dataclasses.dataclass
1367+
class ApkLicenseDetection:
1368+
"""
1369+
Represent the results of an Alpine license detection, including intermediate steps.
13661370
"""
1367-
Return a tuple of (cleaned declared license, detected license expression)
1368-
strings from a ``declared`` license text. Both can be None.
1371+
declared_license: str
1372+
cleaned_license: str
1373+
mapped_license: str
1374+
license_detections: list
1375+
license_expression: str
1376+
1377+
def to_dict(self):
1378+
return dict(
1379+
declared_license=self.declared_license ,
1380+
cleaned_license=self.cleaned_license ,
1381+
mapped_license=self.mapped_license ,
1382+
license_detections=self.license_detections ,
1383+
license_expression=self.license_expression ,
1384+
)
1385+
1386+
1387+
def get_alpine_license_detection(declared):
1388+
"""
1389+
Return an ApkLicenseDetection from a ``declared`` license text
13691390
"""
13701391
# cleaning first to fix syntax quirks and try to get something we can parse
1371-
cleaned = normalize_and_cleanup_declared_license(declared)
1372-
if not cleaned:
1373-
return None, None
1392+
cleaned_license = normalize_and_cleanup_declared_license(declared)
1393+
if not cleaned_license:
1394+
return None
13741395

1375-
# then we apply mappings for known non-standard symbols
1396+
# then we apply mappings for known non-standard symbols.
13761397
# the output should be a proper SPDX expression
1377-
mapped = apply_expressions_mapping(cleaned)
1398+
mapped_license = apply_expressions_mapping(cleaned_license)
13781399

13791400
# Finally perform SPDX expressions detection: Alpine uses mostly SPDX, but
13801401
# with some quirks such as some non standard symbols (in addition to the
13811402
# non-standard syntax)
13821403
extra_licenses = {}
13831404
expression_symbols = get_license_symbols(extra_licenses=extra_licenses)
13841405

1385-
license_detections, detected_license_expression = get_license_detections_and_expression(
1386-
extracted_license_statement=mapped,
1406+
license_detections, license_expression = get_license_detections_and_expression(
1407+
extracted_license_statement=mapped_license,
13871408
expression_symbols=expression_symbols,
13881409
)
13891410

1390-
return cleaned, detected_license_expression, license_detections
1411+
return ApkLicenseDetection(
1412+
declared_license=declared,
1413+
cleaned_license=cleaned_license,
1414+
mapped_license=mapped_license,
1415+
license_expression=license_expression,
1416+
license_detections=license_detections,
1417+
)
1418+
1419+
1420+
def detect_declared_license(declared):
1421+
"""
1422+
Return a three-tuple of detected license data from a ``declared`` license text, with this shape:
1423+
(cleaned declared license, detected license expression, license_detections)
1424+
- cleaned declared license and detected license expression are strings.
1425+
- license_detections is a list of LicenseDetection.
1426+
- Any of these can be None.
1427+
"""
1428+
if alpine_detection := get_alpine_license_detection(declared):
1429+
return (
1430+
alpine_detection.cleaned_license,
1431+
alpine_detection.license_expression,
1432+
alpine_detection.license_detections,
1433+
)
1434+
else:
1435+
return None, None, None
13911436

13921437

13931438
def get_license_symbols(extra_licenses):
@@ -1416,25 +1461,19 @@ def get_license_symbols(extra_licenses):
14161461
def normalize_and_cleanup_declared_license(declared):
14171462
"""
14181463
Return a cleaned and normalized declared license.
1419-
1420-
The expression should be valida SPDX but are far from this in practice.
1421-
1464+
The expressions should be valid SPDX license expressions but they are far from this in practice.
14221465
Several fixes are applied:
1423-
14241466
- plain text replacemnet aka. syntax fixes are plain text replacements
14251467
to make the expression parsable
1426-
14271468
- common fixes includes also nadling space-separated and comma-separated
14281469
lists of licenses
14291470
"""
14301471
declared = declared or ''
14311472

1432-
# normalize spaces
1473+
# normalize spaces and case
14331474
declared = ' '.join(declared.split())
1434-
14351475
declared = declared.lower()
14361476

1437-
# performa replacements
14381477
declared = apply_syntax_fixes(declared)
14391478

14401479
# comma-separated as in gpl-2.0+, lgpl-2.1+, zlib
@@ -1516,15 +1555,15 @@ def normalize_and_cleanup_declared_license(declared):
15161555

15171556
def apply_syntax_fixes(s):
15181557
"""
1519-
Fix the expression string s by aplying replacement for various quirks.
1558+
Fix the expression string ``s`` by aplying replacement for various quirks to get clean license
1559+
expression syntax.
15201560
"""
15211561
for src, tgt in EXPRESSION_SYNTAX_FIXES.items():
15221562
s = s.replace(src, tgt)
15231563
return s
15241564

1525-
# These are parsed expression objects replacement that make the expression SPDX compliant
1526-
15271565

1566+
# These are parsed expression objects replacement that make the expression SPDX compliant
15281567
# {alpine sub-expression: SPDX subexpression}
15291568
DECLARED_TO_SPDX = {
15301569
'openssl-exception': 'licenseref-scancode-generic-exception',

src/packagedcode/licensing.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import logging
1111
import os
1212

13+
import saneyaml
1314
from license_expression import Licensing
1415

1516
from licensedcode.cache import build_spdx_license_expression
@@ -35,9 +36,6 @@
3536
from summarycode.classify import LEGAL_STARTS_ENDS
3637
from summarycode.classify import README_STARTS_ENDS
3738

38-
import saneyaml
39-
40-
4139
"""
4240
Detect and normalize licenses as found in package manifests data.
4341
"""
@@ -59,7 +57,6 @@ def logger_debug(*args):
5957
def logger_debug(*args):
6058
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
6159

62-
6360
RESOURCE_TO_PACKAGE_LICENSE_FIELDS = {
6461
'detected_license_expression': 'declared_license_expression',
6562
'detected_license_expression_spdx': 'declared_license_expression_spdx',
@@ -255,7 +252,7 @@ def add_referenced_license_detection_from_package(resource, codebase):
255252
continue
256253

257254
for sibling_detection in sibling_license_detections:
258-
255+
259256
modified = True
260257
detection_modified = True
261258
license_match_mappings.extend(sibling_detection["matches"])
@@ -585,7 +582,9 @@ def get_license_expression_from_detection_mappings(
585582

586583
def matches_have_unknown(matches, licensing=Licensing()):
587584
"""
588-
Return True if any of the LicenseMatch in `matches` has an unknown license.
585+
Return True if any of the LicenseMatch in ``matches`` has an unknown license.
586+
Note that by construction and design, an unknown license must have the word "unknown" in its
587+
license key, but we only care about two specific license keys, and not all license keys.
589588
"""
590589
for match in matches:
591590
exp = match.rule.license_expression_object
@@ -719,8 +718,7 @@ def get_normalized_license_detections(
719718
expression_symbols=None,
720719
):
721720
"""
722-
Return a normalized license expression string detected from a list of
723-
declared license items.
721+
Return a list of LicenseDetection detected in ``extracted license`` data.
724722
"""
725723
license_detections = []
726724

@@ -743,6 +741,7 @@ def get_normalized_license_detections(
743741
logger_debug(f'get_normalized_license_detections: str:')
744742

745743
elif isinstance(extracted_license, dict):
744+
# FIXME: why ignoring keys?
746745
for extracted_license_statement in extracted_license.values():
747746
detections = get_license_detections_for_extracted_license_statement(
748747
extracted_license_statement=extracted_license_statement,
@@ -757,6 +756,7 @@ def get_normalized_license_detections(
757756
license_detections.extend(detections)
758757

759758
if not license_detections:
759+
# FIXME: we should Never detect on dict representation, but on a YAML dump instead
760760
unknown_dict_object = repr(dict(extracted_license.items()))
761761
unknown_detection = get_unknown_license_detection(query_string=unknown_dict_object)
762762
license_detections.append(unknown_detection)
@@ -828,7 +828,7 @@ def get_license_detections_and_expression(
828828
try_as_expression=True,
829829
approximate=True,
830830
expression_symbols=None,
831-
datasource_id = None,
831+
datasource_id=None,
832832
):
833833
"""
834834
Given a text `extracted_license_statement` return a list of LicenseDetection objects.
@@ -893,8 +893,7 @@ def get_license_detections_for_extracted_license_statement(
893893
expression_symbols=None,
894894
):
895895
"""
896-
Return a list of LicenseDetection objects after detecting licenses in
897-
the given `extracted_license_statement`.
896+
Return a list of LicenseDetection detected the ``extracted_license_statement`` string.
898897
"""
899898
if not extracted_license_statement:
900899
return []

src/packagedcode/recognize.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ def _parse(
118118
# We should continue when an Exception has occured when trying to
119119
# recognize a package
120120
if TRACE:
121-
logger_debug(f'_parse: Exception: {str(e)}')
121+
import traceback
122+
logger_debug(f'_parse: Exception: {str(e)} : {traceback.format_exc()}')
123+
raise Exception(f'_parse: error') from e
122124

123125
continue

0 commit comments

Comments
 (0)