diff --git a/Mk/Scripts/check_spdx.lua b/Mk/Scripts/check_spdx.lua new file mode 100755 --- /dev/null +++ b/Mk/Scripts/check_spdx.lua @@ -0,0 +1,490 @@ +#!/usr/libexec/flua + +local ucl +local ucl_paths = { + "/usr/lib/flua/ucl.so", + "/usr/local/lib/lua/5.4/ucl.so" +} + +for _, path in ipairs(ucl_paths) do + if io.open(path) then + package.cpath = path .. ";" .. package.cpath + ucl = require("ucl") + break + end +end + +if not ucl then + io.stderr:write("[!] Could not load ucl module from known paths\n") + os.exit(1) +end + +local args = {...} +local debug = false +local license_file = nil +local scan_tree = false +local scriptsdir = os.getenv("SCRIPTSDIR") +local wrksrc = nil + +local BASEDIR = "/var/db/ports-licenses" +local NORMDIR = BASEDIR .. "/normalized" +local TIMESTAMP = BASEDIR .. "/.timestamp" +-- local LICENSE_LIST_URL = "https://spdx.org/licenses/licenses.json" +-- local LICENSE_BASE_URL = "https://spdx.org/licenses/" +local LICENSE_LIST_URL = "https://fdp.bofh.network/licenses/licenses.json" +local LICENSE_BASE_URL = "https://fdp.bofh.network/licenses/" +local TMP_LICENSE_LIST = "/tmp/licenses.json" +local TMP_LICENSE_JSON = "/tmp/license.json" +local DAYS_VALID = 30 +local FETCH = 'fetch -q -o %s --user-agent="User-Agent: spdx-checker/1.0" %s' +local TMPDIR = "/tmp" +local colors = { + reset = "\27[0m", + red = "\27[31m", + green = "\27[32m", + yellow = "\27[33m", + blue = "\27[34m", + magenta = "\27[35m", + cyan = "\27[36m", + bold = "\27[1m" +} + +local function usage() + print("Usage: check_spdx.lua [-h] [-d] [-f ] [-s]") + print(" -h Show help and exit") + print(" -d Enable debug output") + print(" -f Path to LICENSE_FILE to match against SPDX database") + print(" -s Scan WRKSRC for SPDX headers in source files") + os.exit(0) +end + +local function dprint(msg) + if debug then print(colors.cyan .. "[D] " .. msg .. colors.reset) end +end + +-- Check if file exists +local function file_exists(path) + local f = io.open(path, "r") + if f then + f:close() + return true + end + return false +end + +local function cleanup(path) + local removed = 0 + local stat = io.popen + + -- If it's a direct file path, try to delete it + if file_exists(path) then + os.remove(path) + return 1 + end + + -- Otherwise treat it as a directory or glob pattern + -- We assume this means: clean matching files in a directory + local dir, pattern = path:match("^(.-)/([^/]-)$") + if not dir then + dir = path + pattern = ".*" + end + + local cmd = string.format('find "%s" -type f -name "%s"', dir, pattern) + local p = stat(cmd) + if not p then return 0 end + + for fname in p:lines() do + os.remove(fname) + print(colors.red .. "[*] Removed: " .. fname .. colors.reset) + removed = removed + 1 + end + + p:close() + return removed +end + +-- Return true if timestamp is older than threshold +local function is_stale(path, days) + local p = io.popen("stat -f %m " .. path) + local mtime = tonumber(p:read("*a") or "0") or 0 + p:close() + return os.time() - mtime > (days * 86400) +end + +local function read_file(path) + local f = io.open(path, "r") + if not f then return nil end + local content = f:read("*a") + f:close() + return content +end + +local function parse_json_ucl(text) + local parser = ucl.parser() + if not parser:parse_string(text) then + return nil, "Failed to parse JSON with libucl" + end + return parser:get_object() +end + +-- Normalize SPDX license text (same logic as Python) +local function normalize(text) + -- Remove copyright lines + text = text:gsub("([^\n]*\n?)", function(line) + if line:match("^%s*Copyright") then + return "" + else + return line + end + end) + + + -- Convert to lowercase + text = text:lower() + + -- Remove URLs + -- text = text:gsub("http[s]?://[%w%p]+", "") + text = text:gsub("[<%(%[]?https?://[%w%p]+[>%)%]]?", "") + + -- Remove comments (only lines starting with comment symbols) + text = text:gsub("^%s*(//|#|/%*).*", "") + + -- Remove end of terms and conditions + text = text:gsub("end of terms and conditions.*", "") + + -- Remove EXHIBIT / APPENDIX etc. sections + text = text:gsub("(?i)(appendix|apaddendum|exhibit).*", "") + + -- Replace fancy quotes with ASCII " + text = text:gsub("[“”„«»"]", '') + text = text:gsub('"', '') + + -- Replace copyright symbols + text = text:gsub("[©]", "(c)") + text = text:gsub("[Ⓒ]", "(c)") + text = text:gsub("[ⓒ]", "(c)") + text = text:gsub("%(c%)%(c%)", "(c)") + + -- Remove bullets and numbering like 1., a), (i), *, etc. + text = text + :gsub("%f[%w][0-9a-z]%.%s", "") -- 1. or a. at word boundary + :gsub("%([0-9a-zA-Z]+%)%s", "") -- (1) or (a) + :gsub("%*%s", "") -- * bullet + :gsub("%([ivxlcdmIVXLCDM]+%)%s", "") -- (ii), (IV) roman numerals + + -- Normalize leading header line if it's just a title + text = text:gsub("^(.-)license\\n", "") + + -- British to American spelling variants + local replacements = { + ['acknowledgment'] = 'acknowledgement', + ['analogue'] = 'analog', + ['analyse'] = 'analyze', + ['artefact'] = 'artifact', + ['authorisation'] = 'authorization', + ['authorised'] = 'authorized', + ['calibre'] = 'caliber', + ['cancelled'] = 'canceled', + ['capitalisations'] = 'capitalizations', + ['catalogue'] = 'catalog', + ['categorise'] = 'categorize', + ['centre'] = 'center', + ['emphasised'] = 'emphasized', + ['favour'] = 'favor', + ['favourite'] = 'favorite', + ['fulfiling'] = 'fulfilling', + ['fulfilment'] = 'fulfillment', + ['initialise'] = 'initialize', + ['judgment'] = 'judgement', + ['labelling'] = 'labeling', + ['labour'] = 'labor', + ['licence'] = 'license', + ['maximise'] = 'maximize', + ['modelled'] = 'modeled', + ['modelling'] = 'modeling', + ['offence'] = 'offense', + ['optimise'] = 'optimize', + ['organisation'] = 'organization', + ['organise'] = 'organize', + ['practise'] = 'practice', + ['programme'] = 'program', + ['realise'] = 'realize', + ['recognise'] = 'recognize', + ['signalling'] = 'signaling', + ['sub%-license'] = 'sublicense', + ['sub license'] = 'sublicense', + ['utilisation'] = 'utilization', + ['whilst'] = 'while', + ['wilful'] = 'wilfull', + ['non%-commercial'] = 'noncommercial', + ['per cent'] = 'percent', + ['owner'] = 'holder' + } + + for k, v in pairs(replacements) do + text = text:gsub(k, v) + end + + -- Final pass: collapse all whitespace + text = text:gsub("%s+", " "):gsub("^%s+", ""):gsub("%s+$", "") + return text +end + +-- Dice coefficient +local function dice(a, b) + local function bigrams(s) + local set = {} + for i = 1, #s - 1 do + local bg = s:sub(i, i+1) + set[bg] = true + end + return set + end + local A, B = bigrams(a), bigrams(b) + local overlap, total = 0, 0 + for k in pairs(A) do total = total + 1 end + for k in pairs(B) do + total = total + 1 + if A[k] then overlap = overlap + 1 end + end + return total > 0 and (2 * overlap) / total or 1 +end + +local function mkdir(path) + os.execute("mkdir -p " .. path) +end + +local function update_spdx() + if not file_exists(NORMDIR) then + mkdir(NORMDIR) + end + + print("[*] Downloading SPDX license list...") + local ok, _, code = os.execute(FETCH:format(TMP_LICENSE_LIST, LICENSE_LIST_URL)) + if not ok or code ~= 0 then + print(colors.red .. "[!] Failed to download license list" .. colors.reset) + return + end + + local json_text = read_file(TMP_LICENSE_LIST) + if not json_text then + print(colors.red .. "[!] Could not read downloaded SPDX license list" .. colors.reset) + return + end + + local licenses_obj, err = parse_json_ucl(json_text) + if not licenses_obj or not licenses_obj.licenses then + print(colors.red .. "[!] Failed to parse SPDX license list: " .. (err or "unknown error") .. colors.reset) + return + end + + for _, lic in ipairs(licenses_obj.licenses) do + local id = lic.licenseId + if not id then + dprint("[!] Skipping license entry with missing ID") + elseif lic.isDeprecatedLicenseId then + dprint("[-] Skipping deprecated license: " .. id) + else + local lic_tmp = TMPDIR .. "/" .. id .. ".json" + local outpath = NORMDIR .. "/" .. id .. ".txt" + local url = LICENSE_BASE_URL .. id .. ".json" + + if file_exists(outpath) then + dprint("[=] Already exists: " .. id) + else + local ok2, _, code2 = os.execute(FETCH:format(lic_tmp, url)) + if ok2 and code2 == 0 then + local raw = read_file(lic_tmp) + if raw then + local parsed, perr = parse_json_ucl(raw) + if parsed and parsed.licenseText then + local text = parsed.licenseText:gsub("\\n", "\n"):gsub('\\"', '"') + local normtext = normalize(text) + local outf = io.open(outpath, "w") + if outf then + outf:write(normtext) + outf:close() + print("[+] Fetching license: " .. id) + else + print("[!] Failed to write: " .. id) + end + else + print("[!] licenseText missing or invalid for: " .. id) + end + cleanup(lic_tmp) + end + else + print("[!] Failed to fetch: " .. id) + end + end + end + end +end + +-- Save normalized input for inspection/debug +local function save_normalized_input(input_path, normalized_text) + local dir = "normalized-input" + os.execute("mkdir -p " .. dir) + + local input_basename = input_path:match("^.+/(.+)$") or input_path + local output_path = dir .. "/" .. input_basename .. ".normalized" + + local f = io.open(output_path, "w") + if f then + f:write(normalized_text) + f:close() + print(colors.green .. "[*] Saved normalized input to: " .. output_path .. colors.reset) + else + print(colors.red .. "[!] Failed to save normalized input to: " .. output_path .. colors.reset) + end +end + +-- Compare a LICENSE_FILE +local function compare(file) + if not file_exists(file) then + print(colors.red .. "[!] LICENSE file not found: " .. file .. colors.reset) + os.exit(1) + end + local f = io.open(file, "r") + local raw = f:read("*a") + f:close() + local input = normalize(raw) + if debug then save_normalized_input(file, input) end + + local results = {} + local top_score = 0 + + for entry in io.popen("ls " .. NORMDIR):lines() do + if entry:match("%.txt$") then + local lic_id = entry:gsub("%.txt$", "") + local path = NORMDIR .. "/" .. entry + local f = io.open(path, "r") + local spdx = f:read("*a") + f:close() + local score = dice(input, spdx) +-- if score > top_score then +-- top_score = score +-- results = { { lic_id, score } } +-- elseif math.abs(score - top_score) < 1e-6 then + table.insert(results, { lic_id, score }) +-- end + end + end + +-- table.sort(results, function(a, b) return a[1] < b[1] end) + table.sort(results, function(a, b) return a[2] > b[2] end) + print("\nTop SPDX license matches for: " .. file) + print(string.rep("-", 60)) + local max_display = 10 +-- for _, m in ipairs(results) do +-- print(string.format("%-30s Dice=%.4f", m[1], m[2])) +-- end + for i = 1, math.min(max_display, #results) do + local lic_id, score = table.unpack(results[i]) + print(string.format("%-30s Dice=%.4f", lic_id, score)) + end + print(string.rep("-", 60)) + print(colors.red .. "[!] This SPDX checker is a helper. Manual verification is still required." .. colors.reset) + print(colors.red .. " Read the LICENSE_FILE and matched SPDX license for due diligence." .. colors.reset) +end + +-- SPDX scan function: finds license files or SPDX headers in source code +local function scan_wrksrc(wrksrc) + local found = false + + local license_patterns = { + "*LICEN[CS]E*", "*COPYING*", "COPYRIGHT", + "NOTICE", "UNLICENSE", "PATENTS", "LEGAL", "AUTHORS", + ".*-license.*", ".*license.*", ".*licen[cs]e.*", "*licen[cs]e.*", + } + + print(colors.yellow .. "[*] Scanning for known license files..." .. colors.reset) + for _, pattern in ipairs(license_patterns) do + local cmd = string.format("find %s -type f -iname %q", wrksrc, pattern) + local pipe = io.popen(cmd) + if pipe then + for line in pipe:lines() do + found = true + print(colors.green .. string.format("[+] Found license file: %s", line) .. colors.reset) + print(string.format(" To check SPDX match: %s/check_spdx.lua -f '%s'", scriptsdir, line)) + end + pipe:close() + end + end + + if not found then + print(colors.red .. "[-] No license files found. Scanning source files for SPDX headers...\n" .. colors.reset) + local scan_cmd = string.format("find %s -type f", wrksrc) + local pipe = io.popen(scan_cmd) + if pipe then + for file in pipe:lines() do + local f = io.open(file, "r") + if f then + for line in f:lines() do + if line:match("[Ss][Pp][Dd][Xx]%s*%-?License%-Identifier") then + print(string.format(colors.green .. "[+] Found SPDX header in: %s", file) .. colors.reset) + print(string.format(" Line: %s", line)) + found = true + break + end + end + f:close() + end + end + pipe:close() + end + if not found then + print(colors.red .. "[-] No SPDX-License-Identifier headers found either." .. colors.reset) + end + end +end +-- Entrypoint +local i = 1 +while i <= #args do + if args[i] == "-h" then + usage() + os.exit(0) + elseif args[i] == "-d" then + debug = true + i = i + 1 + elseif args[i] == "-f" then + i = i + 1 + if not args[i] then + print(colors.red .. "[-] Missing argument after -f" .. colors.reset) + os.exit(1) + end + license_file = args[i] + i = i + 1 + elseif args[i] == "-s" then + scan_tree = true + i = i + 1 + if not args[i] then + print(colors.red .. "[-] Missing argument after -s" .. colors.reset) + os.exit(1) + end + wrksrc = args[i] + i = i + 1 + else + print(colors.red .. "[-] Unknown argument: " .. args[i] .. colors.reset) + usage() + os.exit(1) + end +end + +if not file_exists(NORMDIR) or not file_exists(TIMESTAMP) or is_stale(TIMESTAMP, DAYS_VALID) then + update_spdx() +end + +if license_file then + dprint("Analyzing LICENSE file: " .. license_file) + compare(license_file) +end + +if scan_tree then + if not wrksrc then + print(colors.red .. "[-] WRKSRC does not exist" .. colors.reset) + os.exit(1) + end + scan_wrksrc(wrksrc) +end diff --git a/Mk/bsd.licenses.mk b/Mk/bsd.licenses.mk --- a/Mk/bsd.licenses.mk +++ b/Mk/bsd.licenses.mk @@ -332,6 +332,7 @@ . endif . else _LICENSE_FILE_${lic}= ${LICENSE_FILE_${lic}} +_LICENSE_FILE_FOUND= yes . endif . else @@ -622,6 +623,34 @@ @${ECHO_MSG} "===> License ${_LICENSE} needs confirmation, will ask later" . endif +# Check and try to match possible license names from SPDX +check-spdx-license: extract +. if !exists(/usr/lib/flua/ucl.so) && !exists(${LOCALBASE}/lib/lua/5.4/ucl.so) + @${ECHO_MSG} "===> Lua UCL library not found, cannot check SPDX license match." + @${ECHO_MSG} "===> Please install textproc/libucl ports." +. else +. if defined(LICENSE_FILE) && defined(LICENSE_COMB) && ${LICENSE_COMB} != "single" + @${ECHO_MSG} "Cannot match SPDX license from combined license file ${LICENSE_FILE}" +. elif defined(LICENSE_FILE) + @${ECHO_MSG} "==> Checking SPDX match for: ${_LICENSE_FILE}" + @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -f ${LICENSE_FILE} +. elif defined(LICENSE) +. for lic in ${LICENSE:O:u} +. if defined(LICENSE_FILE_${lic}) + @echo "==> Checking SPDX match for: ${LICENSE_FILE_${lic}}" + @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -f ${LICENSE_FILE_${lic}} +. endif +. endfor +. if !defined(LICENSE_FILE_FOUND) + @${ECHO_MSG} "==> No LICENSE_FILE_ defined. Falling back to source scan." + @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -s ${WRKSRC} +. endif +. else + @${ECHO_MSG} "==> No LICENSE or LICENSE_FILE defined. Running source scan." + @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -s ${WRKSRC} +. endif +. endif + # Display, ask and save preference if requested ask-license: ${_LICENSE_COOKIE}