Module:auto cat
Dokumentasie vir hierdie module kan geskep word by: Module:auto cat/doc
local export = {}
local function splitLabelLang(titleObject)
local getByCanonicalName = require("Module:languages").getByCanonicalName
local canonicalName
local lang
-- Progressively add another word to the potential canonical name until it
-- matches an actual canonical name.
local words = mw.text.split(titleObject.text, " ")
canonicalName = words[#words]
lang = getByCanonicalName(canonicalName)
local label = lang and mw.getContentLanguage():lcfirst(titleObject.text:sub(1, #titleObject.text - #canonicalName - 4))
or titleObject.text
return label, lang
end
local function getObj(name, family)
if not family then
return require("Module:languages").getByCanonicalName(name)
or require("Module:etymology languages").getByCanonicalName(name)
else
return require("Module:families").getByCanonicalName(name)
end
end
-- List of handler functions that try to match the page name.
-- A handler should return a table of template title plus arguments
-- that is passed to frame:expandTemplate.
-- If a handler does not recognise the page name, it should return nil.
-- Note that the order of functions matters!
local handlers = {}
local function add_handler(func)
table.insert(handlers, func)
end
-- request cat
add_handler(function(titleObject)
if not titleObject.text:find("^Requests") then
return nil
end
return {title = "request cat"}
end)
local personal_name_types = {
"surnames", "given names",
"male given names", "female given names", "unisex given names",
"diminutives of male given names", "diminutives of female given names",
"diminutives of unisex given names",
"augmentatives of male given names", "augmentatives of female given names",
"augmentatives of unisex given names"
}
-- topic name cat; must go before general topic cat
add_handler(function(titleObject)
local code, label = titleObject.text:match("^([a-z-]+):(.+)$")
if not code then
return nil
end
for _, nametype in ipairs(personal_name_types) do
local sourcename = label:match("^(.+) " .. nametype .. "$")
if sourcename then
local source = getObj(sourcename)
if source then
return {title = "topic name cat", args = {code, source:getCode(), nametype}}
end
end
end
return nil
end)
-- Topical categories
add_handler(function(titleObject)
if not titleObject.text:find("^[a-z-]+:.") then
return nil
end
local code, label = titleObject.text:match("^([a-z-]+):(.+)$")
return {title = "topic cat", args = {code, label}}
end)
local topic_cat_with_lang = handlers[#handlers]
-- script cat
-- should go before things like "derived cat" that also look for categories
-- ending in "languages"
add_handler(function(titleObject)
local script_labels = {
"appendices",
"characters",
"characters by language",
"languages",
"modules",
"templates",
}
local scripts_without_script_in_category = {
-- We hardcode this rather than checking for all scripts without the
-- word "script", else we'd trigger on "Arabic languages", because
-- Arabic is both a script and family (as well as a language).
["Morse code"] = true,
["Flag semaphore"] = true,
}
local script, label = titleObject.text:match("^(.+) (script)$")
if not script and scripts_without_script_in_category[titleObject.text] then
script = titleObject.text
label = "script"
end
if not script then
for _, lab in ipairs(script_labels) do
script, label = titleObject.text:match("^(.+) script (" .. lab .. ")$")
if script then
break
end
-- Check for e.g. 'Morse code characters' or 'Flag semaphore templates'.
script, label = titleObject.text:match("^(.+) (" .. lab .. ")$")
if script then
if scripts_without_script_in_category[script] then
break
else
script = nil
end
end
end
end
if not script then
return nil
end
local scriptObj = require("Module:scripts").getByCanonicalName(script) or
-- [[Category:Undetermined script languages]] vs. name of script = "undetermined"
require("Module:scripts").getByCanonicalName(mw.ustring.lower(script))
if not scriptObj then
return nil
end
if label == "script" then
return { title = "script cat", args = { scriptObj:getCode() } }
else
return { title = "script cat", args = { scriptObj:getCode(), label } }
end
end)
--[[ langcatboiler
Shouldn't be used because there are additional parameters, such as
countries where that the language is or was spoken,
that should always be supplied.
add_handler(function(titleObject)
if titleObject.text:find(" by language$") or not titleObject.text:find("[lL]anguage$") then
return nil
end
local langName = titleObject.text:match("^(.+) language$")
-- Use the entire category name if it doesn't end in "language", to handle
-- cases where "language" is part of the name, e.g. ASL.
local lang = require("Module:languages").getByCanonicalName(langName) or require("Module:languages").getByCanonicalName(titleObject.text)
if not lang then
local lang2 = require("Module:languages").getByName(langName)
if lang2 then
error('"' .. langName .. '" is not a valid canonical name. Use "' .. lang2:getCanonicalName() .. '" instead.')
end
return nil
end
return { title = "langcatboiler", args = { lang:getCode() } }
end)
--]]
-- Letter names
add_handler(function(titleObject)
if not titleObject.text:find("letter names$") then
return nil
end
local langCode = titleObject.text:match("^([^:]+):")
local lang, cat
if langCode then
lang = require("Module:languages").getByCode(langCode) or error('The language code "' .. langCode .. '" is not valid.')
cat = titleObject.text:match(":(.+)$")
else
cat = titleObject.text
end
return {title = "topic cat", args = {lang and lang:getCode() or nil, cat}}
end)
-- letter cat
add_handler(function(titleObject)
-- Only recognize cases consisting of an uppercase letter followed by the
-- corresponding lowercase letter, either as the entire category name or
-- followed by a colon (for cases like [[Category:Gg: ⠛]]). Cases that
-- don't fit this profile (e.g. for Turkish [[Category:İi]] and
-- [[Category:Iı]]) need to call {{letter cat}} directly. Formerly this
-- handler was much less restrictive and would fire on categories named
-- [[Category:zh:]], [[Category:RFQ]], etc.
local upper, lower = mw.ustring.match(titleObject.text, "^(%u)(%l)%f[:%z]")
if not upper or mw.ustring.upper(lower) ~= upper then
return nil
end
return {title = "letter cat"}
end)
-- Japanese kanji reading cat
add_handler(function(titleObject)
if not titleObject.text:find("^Japanese kanji") then
return nil
end
return {title = "ja-readings-cat"}
end)
-- Okinawan kanji reading cat
add_handler(function(titleObject)
if not titleObject.text:find("^Okinawan kanji") then
return nil
end
return {title = "ryu-readings-cat"}
end)
-- FIXME! Move this to a general word-to-number converter.
local word_to_number = {
one = 1,
two = 2,
three = 3,
four = 4,
five = 5,
six = 6,
seven = 7,
eight = 8,
nine = 9,
ten = 10,
eleven = 11,
twelve = 12,
thirteen = 13,
fourteen = 14,
fifteen = 15,
sixteen = 16,
seventeen = 17,
eighteen = 18,
nineteen = 19,
twenty = 20
}
-- Japanese/Okinawan character count cat
add_handler(function(titleObject)
local langname, count = titleObject.text:match("^(.+) terms written with (.+) Han script characters?$")
if not langname then
return nil
end
local langcode
if langname == "Japanese" then
langcode = "ja"
elseif langname == "Okinawan" then
langcode = "ryu"
else
return nil
end
local num = word_to_number[count]
if not num then
return nil
end
return {title = langcode .. "-cat-written with n kanji", args = {num}}
end)
-- Japanese/Okinawan read-as cat
add_handler(function(titleObject)
local langname, reading = titleObject.text:match("^(.+) terms spelled with kanji read as (.+)$")
if not langname then
return nil
end
local langcode
if langname == "Japanese" then
langcode = "ja"
elseif langname == "Okinawan" then
langcode = "ryu"
else
return nil
end
return {title = langcode .. "-readascat", args = {reading}}
end)
-- Unicode block cat
add_handler(function(titleObject)
if not titleObject.text:find("block$") then
return nil
end
return {title = "Unicode block cat"}
end)
-- Proto-Indo-European xxx-shape roots
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not lang or lang:getCode() ~= "ine-pro" then
return nil
end
local shape = label:match("^(.+)-shape roots$")
if not shape then
return nil
end
return {title = "ine-root shape cat", args = {shape}}
end)
-- Dutch prefixed verbs
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not lang or lang:getCode() ~= "nl" then
return nil
end
local pref = label:match("^prefixed verbs with (.+%-)$")
if not pref then
return nil
end
return {title = "nl-pref verb cat", args = {pref}}
end)
-- Dutch separable verbs
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not lang or lang:getCode() ~= "nl" then
return nil
end
local sep = label:match("^separable verbs with (.+)$")
if not sep then
return nil
end
return {title = "nl-sep verb cat", args = {sep}}
end)
-- PIE root cat
add_handler(function(titleObject)
if not titleObject.text:find("[Tt]erms derived from the PIE root") then
return nil
end
return {title = "PIE root cat"}
end)
-- PIE word cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[tT]erms derived from the PIE word %*") then
return nil
end
local word = label:match("^[Tt]erms derived from the PIE word %*(.+)$")
return {title = "PIE word cat", args = {lang and lang:getCode() or nil, word}}
end)
-- ar-root cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not (lang and lang:getCode() == "ar" and label:find("^terms belonging to the root .+")) then
return nil
end
return {title = "ar-root cat"}
end)
--HE root cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not (lang and lang:getCode() == "he" and label:find("^terms belonging to the root .+")) then
return nil
end
local root = label:match("^terms belonging to the root (.+)$")
local parts = mw.text.split(root, "־", true)
return {title = "HE root cat", args = parts}
end)
-- he-patterncat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not (lang and lang:getCode() == "he" and label:find("^terms in the pattern .+")) then
return nil
end
return {title = "he-patterncat"}
end)
-- root cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
local sourcename, root = label:match("^[Tt]erms derived from the (.+) root (.+)$")
if not sourcename then
return nil
end
local source = getObj(sourcename)
if source then
return {title = "root cat", args = {lang and lang:getCode() or nil, source:getCode(), root}}
end
end)
-- tone cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not lang then
return nil
end
local pos, tone = label:match("^(.+) with tone ([^ ]+)$")
if not pos then
return nil
end
return {title = "tone cat", args = {lang:getCode(), pos, tone}}
end)
-- classifier cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not lang then
return nil
end
local pos, classifier = label:match("^(nouns) classified by (.+)$")
if not pos then
return nil
end
return {title = "classifier cat", args = {lang:getCode(), pos, classifier}}
end)
-- derived cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[Tt]erms derived from .") then
return nil
end
local sourcename = label:match("^[Tt]erms derived from (.+)$")
local source
if sourcename:find(" [Ll]anguages$") then
sourcename = sourcename:gsub(" languages$", "")
source = getObj(sourcename, true)
else
source = getObj(sourcename)
end
if source then
return {title = "derived cat", args = {lang and lang:getCode() or nil, source:getCode()}}
end
end)
-- inherited cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[Tt]erms inherited from .") then
return nil
end
local sourcename = label:match("^[Tt]erms inherited from (.+)$")
local source = getObj(sourcename)
if source then
return {title = "inherited cat", args = {lang and lang:getCode() or nil, source:getCode()}}
end
end)
-- borrowed cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[Tt]erms borrowed from .") then
return nil
end
local sourcename = label:match("^[Tt]erms borrowed from (.+)$")
local source
if sourcename:find(" [Ll]anguages$") then
sourcename = sourcename:gsub(" languages$", "")
source = getObj(sourcename, true)
else
source = getObj(sourcename)
end
if source then
return {title = "borrowed cat", args = {lang and lang:getCode() or nil, source:getCode()}}
end
end)
-- unadapted borrowing cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[Uu]nadapted borrowings from .") then
return nil
end
local sourcename = label:match("^[Uu]nadapted borrowings from (.+)$")
local source
if sourcename:find(" [Ll]anguages$") then
sourcename = sourcename:gsub(" languages$", "")
source = getObj(sourcename, true)
else
source = getObj(sourcename)
end
if source then
return {title = "unadapted borrowing cat", args = {lang and lang:getCode() or nil, source:getCode()}}
end
end)
-- calque cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[Tt]erms calqued from .") then
return nil
end
local sourcename = label:match("^[Tt]erms calqued from (.+)$")
local source
if sourcename:find(" [Ll]anguages$") then
sourcename = sourcename:gsub(" languages$", "")
source = getObj(sourcename, true)
else
source = getObj(sourcename)
end
if source then
return {title = "calque cat", args = {lang and lang:getCode() or nil, source:getCode()}}
end
end)
-- semantic loan cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[Ss]emantic loans from .") then
return nil
end
local sourcename = label:match("^[Ss]emantic loans from (.+)$")
local source
if sourcename:find(" [Ll]anguages$") then
sourcename = sourcename:gsub(" languages$", "")
source = getObj(sourcename, true)
else
source = getObj(sourcename)
end
if source then
return {title = "semantic loan cat", args = {lang and lang:getCode() or nil, source:getCode()}}
end
end)
-- translit cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^terms transliterated from other languages") then
return nil
end
return {title = "translit cat", args = {lang and lang:getCode() or nil}}
end)
-- translit cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^[Tt]ransliterations of") then
return nil
end
local sourcename = label:match("[Tt]ransliterations of (.+) terms")
local source = getObj(sourcename)
if not lang then
local lang = ""
end
if source then
return {title = "translit cat", args = {lang and lang:getCode(), source:getCode()}}
end
end)
-- circumfix cat, infix cat, interfix cat, prefix cat, suffix cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
for _, affixtype in ipairs({"circumfix", "infix", "interfix", "prefix", "suffix"}) do
if label:find("^.+ " .. affixtype .. "ed with .") then
local pos, after = label:match("^(.+) " .. affixtype .. "ed with (.+)$")
if pos == "words" then
pos = nil
end
local term, id
if after:find(". %([^()]+%)$") then
term, id = after:match("^(.+) %(([^()]+)%)$")
else
term = after
end
return {title = affixtype .. " cat", args = {lang:getCode(), term, pos = pos, id = id}}
end
end
end)
-- name cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not lang then
return nil
end
for _, nametype in ipairs(personal_name_types) do
local sourcename = label:match("^" .. nametype .. " from (.+)$")
if sourcename then
local source = getObj(sourcename)
source = source and source:getCode() or sourcename
return {title = "name cat", args = {lang:getCode(), source, nametype}}
end
end
return nil
end)
-- charactercat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
-- Don't fire on [[Category:Japanese terms spelled with 発 read as はつ]]
-- and similar.
if not label:find("^terms spelled with .+") or label:find("^terms spelled with .+ read as .+") then
return nil
end
local term = label:match("^terms spelled with (.+)$")
return {title = "charactercat", args = {lang:getCode(), term}}
end)
-- phrasebook cat
add_handler(function(titleObject)
if titleObject.text == "Phrasebooks by language" then
return {title = "phrasebook cat", args = {}}
else
local label, lang = splitLabelLang(titleObject)
if label == "phrasebook" then
return {title = "phrasebook cat", args = {lang:getCode()}}
elseif label:find("^phrasebook/.") then
label = label:match("^phrasebook/(.+)$")
return {title = "phrasebook cat", args = {lang:getCode(), label}}
end
end
end)
-- no entry cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("entries that don't exist$") then
return nil
end
return { title = "no entry cat", args = { lang:getCode() } }
end)
-- Azerbaijani compound vebs
add_handler(function(titleObject)
local with_what = titleObject.text:match("^Azerbaijani compound verbs with (.+)$")
if not with_what then
return nil
end
return {title = "az-compound cat", args = { with_what }}
end)
--[[ family cat
Must go after the "derived", "borrowed", and "transliterated" category handlers,
which sometimes have "languages" at the end, but before poscatboiler,
or families that have the same names as languages will get intercepted.
]]
add_handler(function(titleObject)
if not titleObject.text:find("languages$") then
return nil
end
local familyName = titleObject.text:match("^(.+) languages$")
local family = require("Module:families").getByCanonicalName(familyName) or
require("Module:families").getByCanonicalName(mw.ustring.lower(familyName))
if not family then
return nil
end
return { title = "family cat", args = { family:getCode() } }
end)
-- poscatboiler
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if lang then
local baseLabel, script = label:match("(.+) in (.-) script$")
if script and baseLabel ~= "terms" then
local scriptObj = require("Module:scripts").getByCanonicalName(script)
if scriptObj then
return {title = "poscatboiler", args = {lang:getCode(), baseLabel, sc = scriptObj:getCode() }}
end
end
return {title = "poscatboiler", args = {lang:getCode(), label}}
elseif label:find(". by language$") then
local label = mw.getContentLanguage():lcfirst(label:match("^(.+) by language$"))
return {title = "poscatboiler", args = {nil, label}}
end
end)
-- redundant translit cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^Terms with redundant transliterations") then
return nil
end
langCode = label:match("/(.+)")
if langCode then
return {title = "redundant translit cat", args = {langCode}}
end
end)
-- manual translit diff cat
add_handler(function(titleObject)
local label, lang = splitLabelLang(titleObject)
if not label:find("^Terms with manual transliterations different from the automated ones") then
return nil
end
local langCode = label:match("/(.+)")
if langCode then
return {title = "manual translit diff cat", args = {langCode}}
end
end)
-- topic cat
add_handler(function(titleObject)
return {title = "topic cat", args = {nil, titleObject.text}}
end)
local topic_cat_without_lang = handlers[#handlers]
function export.show(frame)
local args = require("Module:parameters").process(frame:getParent().args, {
nopos = { type = "boolean" },
})
local titleObject = mw.title.getCurrentTitle()
if titleObject.nsText == "Sjabloon" then
return "(This template should be used on pages in the Category: namespace.)"
elseif titleObject.nsText ~= "Kategorie" then
error("This template/module can only be used on pages in the Category: namespace.")
end
local first_error_cattext
-- Go through each handler in turn. If a handler doesn't recognize the format of the
-- category, it will return nil, and we will consider the next handler. Otherwise,
-- it returns a template name and arguments to call it with, but even then, that template
-- might return an error, and we need to consider the next handler. This happens,
-- for example, with the category "CAT:Mato Grosso, Brazil", where "Mato" is the name of
-- a language, so the handler for {{poscatboiler}} fires and tries to find a label
-- "Grosso, Brazil". This throws an error, and previously, this blocked fruther handler
-- consideration, but now we check for the error and continue checking handlers;
-- eventually, {{topic cat}} will fire and correctly handle the category.
--
-- FIXME: Will the topic_cat handlers correctly handle "letter names" categories?
for _, handler in ipairs(args.nopos and { topic_cat_with_lang, topic_cat_without_lang } or handlers) do
local templateObject = handler(titleObject)
if templateObject then
require("Module:debug").track("auto cat/" .. templateObject.title)
local cattext = frame:expandTemplate(templateObject)
-- FIXME! We check for specific text found in most or all error messages generated
-- by category tree templates (in particular, the second piece of text below should be
-- in all error messages generated when a given module doesn't recognize a category name).
-- If this text ever changes in the source modules (e.g. [[Module:category tree]],
-- it needs to be changed here as well.)
if cattext:find("Category:Categories with invalid label") or
cattext:find("The automatically%-generated contents of this category has errors") then
if not first_error_cattext then
first_error_cattext = cattext
end
else
return cattext
end
end
end
if first_error_cattext then
return first_error_cattext
end
error("{{auto cat}} couldn't recognize format of category name")
end
-- test function for injecting title string
function export.test(title)
if type(title) == "table" then
title = title:getParent().args[1]
end
local titleObject = {}
titleObject.text = title
for _, handler in ipairs(handlers) do
local t = handler(titleObject)
if t then
return t.title
end
end
end
return export
-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet: