Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F132374504
D51414.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
15 KB
Referenced Files
None
Subscribers
None
D51414.id.diff
View Options
diff --git a/Mk/Scripts/check_spdx.lua b/Mk/Scripts/check_spdx.lua
new file mode 100755
--- /dev/null
+++ b/Mk/Scripts/check_spdx.lua
@@ -0,0 +1,490 @@
+#!/usr/libexec/flua
+
+local ucl
+local ucl_paths = {
+ "/usr/lib/flua/ucl.so",
+ "/usr/local/lib/lua/5.4/ucl.so"
+}
+
+for _, path in ipairs(ucl_paths) do
+ if io.open(path) then
+ package.cpath = path .. ";" .. package.cpath
+ ucl = require("ucl")
+ break
+ end
+end
+
+if not ucl then
+ io.stderr:write("[!] Could not load ucl module from known paths\n")
+ os.exit(1)
+end
+
+local args = {...}
+local debug = false
+local license_file = nil
+local scan_tree = false
+local scriptsdir = os.getenv("SCRIPTSDIR")
+local wrksrc = nil
+
+local BASEDIR = "/var/db/ports-licenses"
+local NORMDIR = BASEDIR .. "/normalized"
+local TIMESTAMP = BASEDIR .. "/.timestamp"
+-- local LICENSE_LIST_URL = "https://spdx.org/licenses/licenses.json"
+-- local LICENSE_BASE_URL = "https://spdx.org/licenses/"
+local LICENSE_LIST_URL = "https://fdp.bofh.network/licenses/licenses.json"
+local LICENSE_BASE_URL = "https://fdp.bofh.network/licenses/"
+local TMP_LICENSE_LIST = "/tmp/licenses.json"
+local TMP_LICENSE_JSON = "/tmp/license.json"
+local DAYS_VALID = 30
+local FETCH = 'fetch -q -o %s --user-agent="User-Agent: spdx-checker/1.0" %s'
+local TMPDIR = "/tmp"
+local colors = {
+ reset = "\27[0m",
+ red = "\27[31m",
+ green = "\27[32m",
+ yellow = "\27[33m",
+ blue = "\27[34m",
+ magenta = "\27[35m",
+ cyan = "\27[36m",
+ bold = "\27[1m"
+}
+
+local function usage()
+ print("Usage: check_spdx.lua [-h] [-d] [-f <license_file>] [-s]")
+ print(" -h Show help and exit")
+ print(" -d Enable debug output")
+ print(" -f Path to LICENSE_FILE to match against SPDX database")
+ print(" -s Scan WRKSRC for SPDX headers in source files")
+ os.exit(0)
+end
+
+local function dprint(msg)
+ if debug then print(colors.cyan .. "[D] " .. msg .. colors.reset) end
+end
+
+-- Check if file exists
+local function file_exists(path)
+ local f = io.open(path, "r")
+ if f then
+ f:close()
+ return true
+ end
+ return false
+end
+
+local function cleanup(path)
+ local removed = 0
+ local stat = io.popen
+
+ -- If it's a direct file path, try to delete it
+ if file_exists(path) then
+ os.remove(path)
+ return 1
+ end
+
+ -- Otherwise treat it as a directory or glob pattern
+ -- We assume this means: clean matching files in a directory
+ local dir, pattern = path:match("^(.-)/([^/]-)$")
+ if not dir then
+ dir = path
+ pattern = ".*"
+ end
+
+ local cmd = string.format('find "%s" -type f -name "%s"', dir, pattern)
+ local p = stat(cmd)
+ if not p then return 0 end
+
+ for fname in p:lines() do
+ os.remove(fname)
+ print(colors.red .. "[*] Removed: " .. fname .. colors.reset)
+ removed = removed + 1
+ end
+
+ p:close()
+ return removed
+end
+
+-- Return true if timestamp is older than threshold
+local function is_stale(path, days)
+ local p = io.popen("stat -f %m " .. path)
+ local mtime = tonumber(p:read("*a") or "0") or 0
+ p:close()
+ return os.time() - mtime > (days * 86400)
+end
+
+local function read_file(path)
+ local f = io.open(path, "r")
+ if not f then return nil end
+ local content = f:read("*a")
+ f:close()
+ return content
+end
+
+local function parse_json_ucl(text)
+ local parser = ucl.parser()
+ if not parser:parse_string(text) then
+ return nil, "Failed to parse JSON with libucl"
+ end
+ return parser:get_object()
+end
+
+-- Normalize SPDX license text (same logic as Python)
+local function normalize(text)
+ -- Remove copyright lines
+ text = text:gsub("([^\n]*\n?)", function(line)
+ if line:match("^%s*Copyright") then
+ return ""
+ else
+ return line
+ end
+ end)
+
+
+ -- Convert to lowercase
+ text = text:lower()
+
+ -- Remove URLs
+ -- text = text:gsub("http[s]?://[%w%p]+", "")
+ text = text:gsub("[<%(%[]?https?://[%w%p]+[>%)%]]?", "")
+
+ -- Remove comments (only lines starting with comment symbols)
+ text = text:gsub("^%s*(//|#|/%*).*", "")
+
+ -- Remove end of terms and conditions
+ text = text:gsub("end of terms and conditions.*", "")
+
+ -- Remove EXHIBIT / APPENDIX etc. sections
+ text = text:gsub("(?i)(appendix|apaddendum|exhibit).*", "")
+
+ -- Replace fancy quotes with ASCII "
+ text = text:gsub("[“”„«»"]", '')
+ text = text:gsub('"', '')
+
+ -- Replace copyright symbols
+ text = text:gsub("[©]", "(c)")
+ text = text:gsub("[Ⓒ]", "(c)")
+ text = text:gsub("[ⓒ]", "(c)")
+ text = text:gsub("%(c%)%(c%)", "(c)")
+
+ -- Remove bullets and numbering like 1., a), (i), *, etc.
+ text = text
+ :gsub("%f[%w][0-9a-z]%.%s", "") -- 1. or a. at word boundary
+ :gsub("%([0-9a-zA-Z]+%)%s", "") -- (1) or (a)
+ :gsub("%*%s", "") -- * bullet
+ :gsub("%([ivxlcdmIVXLCDM]+%)%s", "") -- (ii), (IV) roman numerals
+
+ -- Normalize leading header line if it's just a title
+ text = text:gsub("^(.-)license\\n", "")
+
+ -- British to American spelling variants
+ local replacements = {
+ ['acknowledgment'] = 'acknowledgement',
+ ['analogue'] = 'analog',
+ ['analyse'] = 'analyze',
+ ['artefact'] = 'artifact',
+ ['authorisation'] = 'authorization',
+ ['authorised'] = 'authorized',
+ ['calibre'] = 'caliber',
+ ['cancelled'] = 'canceled',
+ ['capitalisations'] = 'capitalizations',
+ ['catalogue'] = 'catalog',
+ ['categorise'] = 'categorize',
+ ['centre'] = 'center',
+ ['emphasised'] = 'emphasized',
+ ['favour'] = 'favor',
+ ['favourite'] = 'favorite',
+ ['fulfiling'] = 'fulfilling',
+ ['fulfilment'] = 'fulfillment',
+ ['initialise'] = 'initialize',
+ ['judgment'] = 'judgement',
+ ['labelling'] = 'labeling',
+ ['labour'] = 'labor',
+ ['licence'] = 'license',
+ ['maximise'] = 'maximize',
+ ['modelled'] = 'modeled',
+ ['modelling'] = 'modeling',
+ ['offence'] = 'offense',
+ ['optimise'] = 'optimize',
+ ['organisation'] = 'organization',
+ ['organise'] = 'organize',
+ ['practise'] = 'practice',
+ ['programme'] = 'program',
+ ['realise'] = 'realize',
+ ['recognise'] = 'recognize',
+ ['signalling'] = 'signaling',
+ ['sub%-license'] = 'sublicense',
+ ['sub license'] = 'sublicense',
+ ['utilisation'] = 'utilization',
+ ['whilst'] = 'while',
+ ['wilful'] = 'wilfull',
+ ['non%-commercial'] = 'noncommercial',
+ ['per cent'] = 'percent',
+ ['owner'] = 'holder'
+ }
+
+ for k, v in pairs(replacements) do
+ text = text:gsub(k, v)
+ end
+
+ -- Final pass: collapse all whitespace
+ text = text:gsub("%s+", " "):gsub("^%s+", ""):gsub("%s+$", "")
+ return text
+end
+
+-- Dice coefficient
+local function dice(a, b)
+ local function bigrams(s)
+ local set = {}
+ for i = 1, #s - 1 do
+ local bg = s:sub(i, i+1)
+ set[bg] = true
+ end
+ return set
+ end
+ local A, B = bigrams(a), bigrams(b)
+ local overlap, total = 0, 0
+ for k in pairs(A) do total = total + 1 end
+ for k in pairs(B) do
+ total = total + 1
+ if A[k] then overlap = overlap + 1 end
+ end
+ return total > 0 and (2 * overlap) / total or 1
+end
+
+local function mkdir(path)
+ os.execute("mkdir -p " .. path)
+end
+
+local function update_spdx()
+ if not file_exists(NORMDIR) then
+ mkdir(NORMDIR)
+ end
+
+ print("[*] Downloading SPDX license list...")
+ local ok, _, code = os.execute(FETCH:format(TMP_LICENSE_LIST, LICENSE_LIST_URL))
+ if not ok or code ~= 0 then
+ print(colors.red .. "[!] Failed to download license list" .. colors.reset)
+ return
+ end
+
+ local json_text = read_file(TMP_LICENSE_LIST)
+ if not json_text then
+ print(colors.red .. "[!] Could not read downloaded SPDX license list" .. colors.reset)
+ return
+ end
+
+ local licenses_obj, err = parse_json_ucl(json_text)
+ if not licenses_obj or not licenses_obj.licenses then
+ print(colors.red .. "[!] Failed to parse SPDX license list: " .. (err or "unknown error") .. colors.reset)
+ return
+ end
+
+ for _, lic in ipairs(licenses_obj.licenses) do
+ local id = lic.licenseId
+ if not id then
+ dprint("[!] Skipping license entry with missing ID")
+ elseif lic.isDeprecatedLicenseId then
+ dprint("[-] Skipping deprecated license: " .. id)
+ else
+ local lic_tmp = TMPDIR .. "/" .. id .. ".json"
+ local outpath = NORMDIR .. "/" .. id .. ".txt"
+ local url = LICENSE_BASE_URL .. id .. ".json"
+
+ if file_exists(outpath) then
+ dprint("[=] Already exists: " .. id)
+ else
+ local ok2, _, code2 = os.execute(FETCH:format(lic_tmp, url))
+ if ok2 and code2 == 0 then
+ local raw = read_file(lic_tmp)
+ if raw then
+ local parsed, perr = parse_json_ucl(raw)
+ if parsed and parsed.licenseText then
+ local text = parsed.licenseText:gsub("\\n", "\n"):gsub('\\"', '"')
+ local normtext = normalize(text)
+ local outf = io.open(outpath, "w")
+ if outf then
+ outf:write(normtext)
+ outf:close()
+ print("[+] Fetching license: " .. id)
+ else
+ print("[!] Failed to write: " .. id)
+ end
+ else
+ print("[!] licenseText missing or invalid for: " .. id)
+ end
+ cleanup(lic_tmp)
+ end
+ else
+ print("[!] Failed to fetch: " .. id)
+ end
+ end
+ end
+ end
+end
+
+-- Save normalized input for inspection/debug
+local function save_normalized_input(input_path, normalized_text)
+ local dir = "normalized-input"
+ os.execute("mkdir -p " .. dir)
+
+ local input_basename = input_path:match("^.+/(.+)$") or input_path
+ local output_path = dir .. "/" .. input_basename .. ".normalized"
+
+ local f = io.open(output_path, "w")
+ if f then
+ f:write(normalized_text)
+ f:close()
+ print(colors.green .. "[*] Saved normalized input to: " .. output_path .. colors.reset)
+ else
+ print(colors.red .. "[!] Failed to save normalized input to: " .. output_path .. colors.reset)
+ end
+end
+
+-- Compare a LICENSE_FILE
+local function compare(file)
+ if not file_exists(file) then
+ print(colors.red .. "[!] LICENSE file not found: " .. file .. colors.reset)
+ os.exit(1)
+ end
+ local f = io.open(file, "r")
+ local raw = f:read("*a")
+ f:close()
+ local input = normalize(raw)
+ if debug then save_normalized_input(file, input) end
+
+ local results = {}
+ local top_score = 0
+
+ for entry in io.popen("ls " .. NORMDIR):lines() do
+ if entry:match("%.txt$") then
+ local lic_id = entry:gsub("%.txt$", "")
+ local path = NORMDIR .. "/" .. entry
+ local f = io.open(path, "r")
+ local spdx = f:read("*a")
+ f:close()
+ local score = dice(input, spdx)
+-- if score > top_score then
+-- top_score = score
+-- results = { { lic_id, score } }
+-- elseif math.abs(score - top_score) < 1e-6 then
+ table.insert(results, { lic_id, score })
+-- end
+ end
+ end
+
+-- table.sort(results, function(a, b) return a[1] < b[1] end)
+ table.sort(results, function(a, b) return a[2] > b[2] end)
+ print("\nTop SPDX license matches for: " .. file)
+ print(string.rep("-", 60))
+ local max_display = 10
+-- for _, m in ipairs(results) do
+-- print(string.format("%-30s Dice=%.4f", m[1], m[2]))
+-- end
+ for i = 1, math.min(max_display, #results) do
+ local lic_id, score = table.unpack(results[i])
+ print(string.format("%-30s Dice=%.4f", lic_id, score))
+ end
+ print(string.rep("-", 60))
+ print(colors.red .. "[!] This SPDX checker is a helper. Manual verification is still required." .. colors.reset)
+ print(colors.red .. " Read the LICENSE_FILE and matched SPDX license for due diligence." .. colors.reset)
+end
+
+-- SPDX scan function: finds license files or SPDX headers in source code
+local function scan_wrksrc(wrksrc)
+ local found = false
+
+ local license_patterns = {
+ "*LICEN[CS]E*", "*COPYING*", "COPYRIGHT",
+ "NOTICE", "UNLICENSE", "PATENTS", "LEGAL", "AUTHORS",
+ ".*-license.*", ".*license.*", ".*licen[cs]e.*", "*licen[cs]e.*",
+ }
+
+ print(colors.yellow .. "[*] Scanning for known license files..." .. colors.reset)
+ for _, pattern in ipairs(license_patterns) do
+ local cmd = string.format("find %s -type f -iname %q", wrksrc, pattern)
+ local pipe = io.popen(cmd)
+ if pipe then
+ for line in pipe:lines() do
+ found = true
+ print(colors.green .. string.format("[+] Found license file: %s", line) .. colors.reset)
+ print(string.format(" To check SPDX match: %s/check_spdx.lua -f '%s'", scriptsdir, line))
+ end
+ pipe:close()
+ end
+ end
+
+ if not found then
+ print(colors.red .. "[-] No license files found. Scanning source files for SPDX headers...\n" .. colors.reset)
+ local scan_cmd = string.format("find %s -type f", wrksrc)
+ local pipe = io.popen(scan_cmd)
+ if pipe then
+ for file in pipe:lines() do
+ local f = io.open(file, "r")
+ if f then
+ for line in f:lines() do
+ if line:match("[Ss][Pp][Dd][Xx]%s*%-?License%-Identifier") then
+ print(string.format(colors.green .. "[+] Found SPDX header in: %s", file) .. colors.reset)
+ print(string.format(" Line: %s", line))
+ found = true
+ break
+ end
+ end
+ f:close()
+ end
+ end
+ pipe:close()
+ end
+ if not found then
+ print(colors.red .. "[-] No SPDX-License-Identifier headers found either." .. colors.reset)
+ end
+ end
+end
+-- Entrypoint
+local i = 1
+while i <= #args do
+ if args[i] == "-h" then
+ usage()
+ os.exit(0)
+ elseif args[i] == "-d" then
+ debug = true
+ i = i + 1
+ elseif args[i] == "-f" then
+ i = i + 1
+ if not args[i] then
+ print(colors.red .. "[-] Missing argument after -f" .. colors.reset)
+ os.exit(1)
+ end
+ license_file = args[i]
+ i = i + 1
+ elseif args[i] == "-s" then
+ scan_tree = true
+ i = i + 1
+ if not args[i] then
+ print(colors.red .. "[-] Missing argument after -s" .. colors.reset)
+ os.exit(1)
+ end
+ wrksrc = args[i]
+ i = i + 1
+ else
+ print(colors.red .. "[-] Unknown argument: " .. args[i] .. colors.reset)
+ usage()
+ os.exit(1)
+ end
+end
+
+if not file_exists(NORMDIR) or not file_exists(TIMESTAMP) or is_stale(TIMESTAMP, DAYS_VALID) then
+ update_spdx()
+end
+
+if license_file then
+ dprint("Analyzing LICENSE file: " .. license_file)
+ compare(license_file)
+end
+
+if scan_tree then
+ if not wrksrc then
+ print(colors.red .. "[-] WRKSRC does not exist" .. colors.reset)
+ os.exit(1)
+ end
+ scan_wrksrc(wrksrc)
+end
diff --git a/Mk/bsd.licenses.mk b/Mk/bsd.licenses.mk
--- a/Mk/bsd.licenses.mk
+++ b/Mk/bsd.licenses.mk
@@ -332,6 +332,7 @@
. endif
. else
_LICENSE_FILE_${lic}= ${LICENSE_FILE_${lic}}
+_LICENSE_FILE_FOUND= yes
. endif
. else
@@ -622,6 +623,34 @@
@${ECHO_MSG} "===> License ${_LICENSE} needs confirmation, will ask later"
. endif
+# Check and try to match possible license names from SPDX
+check-spdx-license: extract
+. if !exists(/usr/lib/flua/ucl.so) && !exists(${LOCALBASE}/lib/lua/5.4/ucl.so)
+ @${ECHO_MSG} "===> Lua UCL library not found, cannot check SPDX license match."
+ @${ECHO_MSG} "===> Please install textproc/libucl ports."
+. else
+. if defined(LICENSE_FILE) && defined(LICENSE_COMB) && ${LICENSE_COMB} != "single"
+ @${ECHO_MSG} "Cannot match SPDX license from combined license file ${LICENSE_FILE}"
+. elif defined(LICENSE_FILE)
+ @${ECHO_MSG} "==> Checking SPDX match for: ${_LICENSE_FILE}"
+ @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -f ${LICENSE_FILE}
+. elif defined(LICENSE)
+. for lic in ${LICENSE:O:u}
+. if defined(LICENSE_FILE_${lic})
+ @echo "==> Checking SPDX match for: ${LICENSE_FILE_${lic}}"
+ @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -f ${LICENSE_FILE_${lic}}
+. endif
+. endfor
+. if !defined(LICENSE_FILE_FOUND)
+ @${ECHO_MSG} "==> No LICENSE_FILE_<LICENSE> defined. Falling back to source scan."
+ @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -s ${WRKSRC}
+. endif
+. else
+ @${ECHO_MSG} "==> No LICENSE or LICENSE_FILE defined. Running source scan."
+ @${SETENV} SCRIPTSDIR=${SCRIPTSDIR} ${SCRIPTSDIR}/check_spdx.lua -s ${WRKSRC}
+. endif
+. endif
+
# Display, ask and save preference if requested
ask-license: ${_LICENSE_COOKIE}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Oct 17, 8:35 AM (5 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23804221
Default Alt Text
D51414.id.diff (15 KB)
Attached To
Mode
D51414: Mk/Scripts: Add SPDX license file normalizer and matcher
Attached
Detach File
Event Timeline
Log In to Comment