|
8 | 8 | #
|
9 | 9 | import base64
|
10 | 10 | import codecs
|
| 11 | +import dataclasses |
11 | 12 | import email
|
12 | 13 | import posixpath
|
13 | 14 | import re
|
@@ -1362,32 +1363,76 @@ def source_handler(value, **kwargs):
|
1362 | 1363 | }
|
1363 | 1364 |
|
1364 | 1365 |
|
1365 |
| -def detect_declared_license(declared): |
| 1366 | +@dataclasses.dataclass |
| 1367 | +class ApkLicenseDetection: |
| 1368 | + """ |
| 1369 | + Represent the results of an Alpine license detection, including intermediate steps. |
1366 | 1370 | """
|
1367 |
| - Return a tuple of (cleaned declared license, detected license expression) |
1368 |
| - strings from a ``declared`` license text. Both can be None. |
| 1371 | + declared_license: str |
| 1372 | + cleaned_license: str |
| 1373 | + mapped_license: str |
| 1374 | + license_detections: list |
| 1375 | + license_expression: str |
| 1376 | + |
| 1377 | + def to_dict(self): |
| 1378 | + return dict( |
| 1379 | + declared_license=self.declared_license , |
| 1380 | + cleaned_license=self.cleaned_license , |
| 1381 | + mapped_license=self.mapped_license , |
| 1382 | + license_detections=self.license_detections , |
| 1383 | + license_expression=self.license_expression , |
| 1384 | + ) |
| 1385 | + |
| 1386 | + |
| 1387 | +def get_alpine_license_detection(declared): |
| 1388 | + """ |
| 1389 | + Return an ApkLicenseDetection from a ``declared`` license text |
1369 | 1390 | """
|
1370 | 1391 | # cleaning first to fix syntax quirks and try to get something we can parse
|
1371 |
| - cleaned = normalize_and_cleanup_declared_license(declared) |
1372 |
| - if not cleaned: |
1373 |
| - return None, None |
| 1392 | + cleaned_license = normalize_and_cleanup_declared_license(declared) |
| 1393 | + if not cleaned_license: |
| 1394 | + return None |
1374 | 1395 |
|
1375 |
| - # then we apply mappings for known non-standard symbols |
| 1396 | + # then we apply mappings for known non-standard symbols. |
1376 | 1397 | # the output should be a proper SPDX expression
|
1377 |
| - mapped = apply_expressions_mapping(cleaned) |
| 1398 | + mapped_license = apply_expressions_mapping(cleaned_license) |
1378 | 1399 |
|
1379 | 1400 | # Finally perform SPDX expressions detection: Alpine uses mostly SPDX, but
|
1380 | 1401 | # with some quirks such as some non standard symbols (in addition to the
|
1381 | 1402 | # non-standard syntax)
|
1382 | 1403 | extra_licenses = {}
|
1383 | 1404 | expression_symbols = get_license_symbols(extra_licenses=extra_licenses)
|
1384 | 1405 |
|
1385 |
| - license_detections, detected_license_expression = get_license_detections_and_expression( |
1386 |
| - extracted_license_statement=mapped, |
| 1406 | + license_detections, license_expression = get_license_detections_and_expression( |
| 1407 | + extracted_license_statement=mapped_license, |
1387 | 1408 | expression_symbols=expression_symbols,
|
1388 | 1409 | )
|
1389 | 1410 |
|
1390 |
| - return cleaned, detected_license_expression, license_detections |
| 1411 | + return ApkLicenseDetection( |
| 1412 | + declared_license=declared, |
| 1413 | + cleaned_license=cleaned_license, |
| 1414 | + mapped_license=mapped_license, |
| 1415 | + license_expression=license_expression, |
| 1416 | + license_detections=license_detections, |
| 1417 | + ) |
| 1418 | + |
| 1419 | + |
| 1420 | +def detect_declared_license(declared): |
| 1421 | + """ |
| 1422 | + Return a three-tuple of detected license data from a ``declared`` license text, with this shape: |
| 1423 | + (cleaned declared license, detected license expression, license_detections) |
| 1424 | + - cleaned declared license and detected license expression are strings. |
| 1425 | + - license_detections is a list of LicenseDetection. |
| 1426 | + - Any of these can be None. |
| 1427 | + """ |
| 1428 | + if alpine_detection := get_alpine_license_detection(declared): |
| 1429 | + return ( |
| 1430 | + alpine_detection.cleaned_license, |
| 1431 | + alpine_detection.license_expression, |
| 1432 | + alpine_detection.license_detections, |
| 1433 | + ) |
| 1434 | + else: |
| 1435 | + return None, None, None |
1391 | 1436 |
|
1392 | 1437 |
|
1393 | 1438 | def get_license_symbols(extra_licenses):
|
@@ -1416,25 +1461,19 @@ def get_license_symbols(extra_licenses):
|
1416 | 1461 | def normalize_and_cleanup_declared_license(declared):
|
1417 | 1462 | """
|
1418 | 1463 | Return a cleaned and normalized declared license.
|
1419 |
| -
|
1420 |
| - The expression should be valida SPDX but are far from this in practice. |
1421 |
| -
|
| 1464 | + The expressions should be valid SPDX license expressions but they are far from this in practice. |
1422 | 1465 | Several fixes are applied:
|
1423 |
| -
|
1424 | 1466 | - plain text replacemnet aka. syntax fixes are plain text replacements
|
1425 | 1467 | to make the expression parsable
|
1426 |
| -
|
1427 | 1468 | - common fixes includes also nadling space-separated and comma-separated
|
1428 | 1469 | lists of licenses
|
1429 | 1470 | """
|
1430 | 1471 | declared = declared or ''
|
1431 | 1472 |
|
1432 |
| - # normalize spaces |
| 1473 | + # normalize spaces and case |
1433 | 1474 | declared = ' '.join(declared.split())
|
1434 |
| - |
1435 | 1475 | declared = declared.lower()
|
1436 | 1476 |
|
1437 |
| - # performa replacements |
1438 | 1477 | declared = apply_syntax_fixes(declared)
|
1439 | 1478 |
|
1440 | 1479 | # comma-separated as in gpl-2.0+, lgpl-2.1+, zlib
|
@@ -1516,15 +1555,15 @@ def normalize_and_cleanup_declared_license(declared):
|
1516 | 1555 |
|
1517 | 1556 | def apply_syntax_fixes(s):
|
1518 | 1557 | """
|
1519 |
| - Fix the expression string s by aplying replacement for various quirks. |
| 1558 | + Fix the expression string ``s`` by aplying replacement for various quirks to get clean license |
| 1559 | + expression syntax. |
1520 | 1560 | """
|
1521 | 1561 | for src, tgt in EXPRESSION_SYNTAX_FIXES.items():
|
1522 | 1562 | s = s.replace(src, tgt)
|
1523 | 1563 | return s
|
1524 | 1564 |
|
1525 |
| -# These are parsed expression objects replacement that make the expression SPDX compliant |
1526 |
| - |
1527 | 1565 |
|
| 1566 | +# These are parsed expression objects replacement that make the expression SPDX compliant |
1528 | 1567 | # {alpine sub-expression: SPDX subexpression}
|
1529 | 1568 | DECLARED_TO_SPDX = {
|
1530 | 1569 | 'openssl-exception': 'licenseref-scancode-generic-exception',
|
|
0 commit comments