Module:links: Verskil tussen weergawes

Content deleted Content added
Pynappel (Besprekings | bydraes)
Nuwe bladsy geskep met 'local export = {} --[=[ Unsupported titles and pages with high memory usage are listed at Module:links/data. Other modules used: Module:scripts [[Mo...'
 
Naudefj (Besprekings | bydraes)
Werk by vanaf en:
Lyn 6:
Other modules used:
[[Module:script utilities]]
[[Module:scripts]]
[[Module:languages]] and its submodules
[[Module:gender and number]]
[[Module:utilities]]
Line 13 ⟶ 15:
]=]
 
-- These are prefixed with u to avoid confusion with the default string methods
local m_scriptutils = require("Module:script utilities")
-- of the same name.
local usub = mw.ustring.sub
 
local table_insert = table.insert
local table_concat = table.concat
 
local ignore_cap = {
Line 20 ⟶ 27:
 
local phonetic_extraction = {
["th"] = "Module:th",
["km"] = "Module:km",
}
 
Line 35 ⟶ 43:
}
 
local unsupported_titles
function export.getLinkPage(target, lang)
ifunsupported_titles = unsupported_titles or mw.loadData("Module:links/data").unsupported_titles[target] then
return if "Unsupported titles/" .. mw.loadData("Module:links/data").unsupported_titles[target] then
return "Unsupported titles/" .. unsupported_titles[target]
end
-- If the link contains unexpanded template parameters, then don't create a link.
if target:find("{{{", nil, true) then
return nil
end
if target:findsub(1, 1) == "^:") or target:findsub(1, 2) == "^w:") or target:findsub(1, 10) == "^wikipedia:") then
return target
end
Line 51 ⟶ 61:
-- Remove diacritics from the page name
target = lang:makeEntryName(target)
if target:sub(1, 1) == "/" then
return ":" .. target
-- Link to appendix for reconstructed terms and terms in appendix-only languages
ifelseif target:findsub(1, 1) == "^*.") and #target > 1 then
if lang:getCode() == "und" then
return nil
end
target = "Reconstruction:" .. lang:getCanonicalName() .. "/" .. mw.ustring.subusub(target, 2)
elseif lang:getType() == "reconstructed" then
error("The specified language " .. lang:getCanonicalName()
.. " is unattested, while the given word is not marked with '*' to indicate that it is reconstructed")
elseif lang:getType() == "appendix-constructed" then
target = "Appendix:" .. lang:getCanonicalName() .. "/" .. target
Line 70 ⟶ 84:
-- Make a language-specific link from given link's parts
local function makeLangLink(link, lang, id, allowSelfLink)
-- Temporary tracking code
local langCode = lang:getCode()
if langCode == "se" or langCode == "sia" or langCode:find("^sm[ajns]$")
or langCode:find("^sj[dektu]$") then
if link.display and link.display:find("'") then
require("Module:debug").track("links/Sami apostrophe display")
elseif link.target and link.target:find("'") then
require("Module:debug").track("links/Sami apostrophe target")
end
end
-- Find fragments (when link didn't come from parseLink).
-- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]].
if link.fragment == nil then
-- Replace numeric character references with the corresponding character ( → '),
-- as they contain #, which causes the numeric character reference to be
-- misparsed (wa'a → waa → pagename wa&, fragment 29;a).
link.target = link.target:gsub("&#(%d+);",
function(number) return mw.ustring.char(tonumber(number)) end)
local first, second = link.target:match("^([^#]+)#(.+)$")
if first then
link.target, link.fragment = first, second
end
end
-- If there is no display form, then create a default one
if not link.display then
Line 76 ⟶ 115:
-- Strip the prefix from the displayed form
-- TODO: other interwiki links?
if link.display:findsub(1, 1) == "^:") and not mw.loadData("Module:links/data").unsupported_titles[link.display] then
link.display = link.display:gsubsub("^:", ""2) -- remove colon from beginning
else
local prefix = link.display:match("^([^:]+):")
Line 86 ⟶ 125:
if prefixes[prefix] then
link.display = link.display:gsubsub("^"#prefix ..+ prefix2) ..-- ":",remove "")prefix plus colon
end
end
Line 98 ⟶ 137:
end
-- If the target is the same as the current page, thenand returnthere ais "self-link"no likesense the software doesid
-- and linking to the same page hasn't been turned on, then return a "self-link"
if not allowSelfLink and not id and (link.target == mw.title.getCurrentTitle().prefixedText or link.target == ":" .. mw.title.getCurrentTitle().prefixedText) then
-- like the software does.
if not (allowSelfLink or id) and link.target:gsub("^:", "") == mw.title.getCurrentTitle().prefixedText then
return "<strong class=\"selflink\">" .. link.display .. "</strong>"
end
Line 108 ⟶ 149:
TabbedLanguages handles links without a section by linking to the "last visited" section,
but adding "Undetermined" would break that feature.
For localized prefixes that make syntax error, please use the format: ["xyz"] = true,
]]
local prefix = link.target:match("^:?([^:]+):")
Line 116 ⟶ 158:
}
if not (prefix and prefixes[prefix]) then
if link.fragment or mw.ustringlink.target:find(link.target, "#$") then
require("Module:debug").track {
"links/fragment",
"links/fragment/" .. lang:getCode()
Line 127 ⟶ 169:
if id then
link.fragment = require("Module:utilities").make_id(lang, id)
elseif not linkmw.ustring.target:find("^Appendix:") and not link.target:find(, "^ReconstructionAppendix:") then
and not mw.ustring.find(link.target, "^Reconstruction:") then
link.fragment = lang:getCanonicalName()
end
end
-- This allows linking to pages like [[sms:a]] without it being treated weirdly.
link.target = mw.ustringlink.target:gsub(link.target, ":", "&#x3a;")
end
Line 142 ⟶ 185:
-- Split a link into its parts
local function parseLink(linktext)
local link = { target = linktext }
local first, second = link.target:match("^([^|]+)|(.+)$")
local found, _, first, second
if first then
found, _, first, second = mw.ustring.find(link.target, "^([^|]+)|(.+)$")
if found then
link.target = first
link.display = second
Line 154 ⟶ 195:
end
found, _, first, second = mw.ustring.find(link.target, :match("^(.+)#(.+)$")
if foundfirst then
link.target = first
link.fragment = second
else
-- So that makeLangLink does not look for a fragment again
link.fragment = false
end
Line 175 ⟶ 219:
if ignore_cap[data.lang:getCode()] and text then
text = mw.ustring.text:gsub(text, "%^", "")
end
Line 195 ⟶ 239:
if data.alt then
require("Module:debug").track("links/alt-ignored")
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
"ignored alt:", data.alt, "lang:", data.lang:getCode())
end
if data.id then
require("Module:debug").track("links/id-ignored")
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
"ignored id:", data.id, "lang:", data.lang:getCode())
end
-- Begins and ends with a wikilink tag
if mw.ustring.text:find(text, "^%[%[(.+)%]%]$") then
-- There are no [ ] in between.
-- This makes the wikilink tag redundant.
if mw.ustring.text:find(text, "^%[%[[^%[%]]+%]%]$") then
require("Module:debug").track("links/redundant wikilink")
else
local temp = mw.ustring.text:gsub(text, "^%[%[(.+)%]%]$", "%1")
temp = mw.ustring.temp:gsub(temp, "%]%], %[%[", "|")
if not mw.ustring.temp:find(temp, "[%[%]]") then
require("Module:debug").track("links/list")
end
Line 217 ⟶ 265:
end
text = mw.ustring.text:gsub(text, "%[%[([^%]]+)%]%]",
function(linktext)
local link = parseLink(linktext)
Line 226 ⟶ 274:
return makeLangLink(link, data.lang, data.id, allowSelfLink, dontLinkRecons)
end)
)
-- Remove the extra * at the beginning if it's immediately followed
-- by a link whose display begins with * too
if allReconstructed then
text = mw.ustring.text:gsub(text, "^%*%[%[([^|%]]+)|%*", "[[%1|*")
end
else
-- There is no embedded wikilink, make a link using the parameters.
text = makeLangLink({ target = text, display = data.alt }, data.lang, data.id, allowSelfLink, dontLinkRecons)
end
return text
end
 
function export.mark(text, itemType, face, lang)
local tag = { "", "" }
if itemType == "gloss" then
tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">',
'</span><span class="mention-gloss-double-quote">”</span>' }
elseif itemType == "tr" then
if face == "term" then
tag = { '<span lang="' .. lang:getCode() .. '" class="tr mention-tr Latn">',
'</span>' }
else
tag = { '<span lang="' .. lang:getCode() .. '" class="tr Latn">', '</span>' }
end
elseif itemType == "ts" then
tag = { '<span class="ts mention-ts Latn">/', '/</span>' }
elseif itemType == "pos" then
tag = { '<span class="ann-pos">', '</span>' }
elseif itemType == "annotations" then
tag = { '<span class="mention-gloss-paren annotation-paren">(</span>',
'<span class="mention-gloss-paren annotation-paren">)</span>' }
end
Line 270 ⟶ 324:
-- Interwiki link
if data.interwiki then
table.inserttable_insert(output, data.interwiki)
end
Line 280 ⟶ 334:
if data.genders and #data.genders > 0 then
local m_gen = require("Module:gender and number")
table.inserttable_insert(output, "&nbsp;" .. m_gen.format_list(data.genders, data.lang))
end
local annotations = {}
-- Transliteration and transcription
if data.tr or data.ts then
local kind
if face == "term" then
Line 294 ⟶ 348:
end
table.insert(annotations,if m_scriptutils.tag_translit(data.tr, and data.lang,ts kind))then
table_insert(annotations,
require("Module:script utilities").tag_translit(data.tr, data.lang, kind)
.. " " .. export.mark(data.ts, "ts"))
elseif data.ts then
table_insert(annotations, export.mark(data.ts, "ts"))
else
table_insert(annotations,
require("Module:script utilities").tag_translit(data.tr, data.lang, kind))
end
end
-- Gloss/translation
if data.gloss then
table.inserttable_insert(annotations, export.mark(data.gloss, "gloss"))
end
-- Part of speech
if data.pos then
-- debug category for pos= containing transcriptions
table.insert(annotations, pos_tags[data.pos] or data.pos)
if data.pos:find("/[^><]*/") then
data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]"
end
 
table_insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos"))
end
-- Literal/sum-of-parts meaning
if data.lit then
table.inserttable_insert(annotations, "literally " .. export.mark(data.lit, "gloss"))
end
if #annotations > 0 then
table.inserttable_insert(output, " " .. export.mark(table.concattable_concat(annotations, ", "), "annotations"))
end
return table.concattable_concat(output)
end
 
Line 322 ⟶ 390:
function export.full_link(data, face, allowSelfLink, dontLinkRecons)
if type(data) ~= "table" then
error("The first argument to the function full_link must be a table. "
.. "See Module:links/documentation for more information.")
end
Line 338 ⟶ 407:
if not data.sc then
data.sc = require("Module:scripts").findBestScript(data.alt or data.term, data.lang)
else
-- Track uses of sc parameter
local best = require("Module:scripts").findBestScript(data.alt or data.term, data.lang)
require("Module:debug").track("links/sc")
if data.sc:getCode() == best:getCode() then
require("Module:debug").track("links/sc/redundant")
require("Module:debug").track("links/sc/redundant/" .. data.sc:getCode())
else
require("Module:debug").track("links/sc/needed")
require("Module:debug").track("links/sc/needed/" .. data.sc:getCode())
end
end
Line 343 ⟶ 424:
if data.accel then
classlocal = "form-of lang-"= data.accel.form and data.lang:getCode()accel.form .. " -form-of" ..or data.accel""
local gender = data.accel.gender and "gender-" .. data.accel.gender or ""
local translit = data.accel.translit and "transliteration-" .. data.accel.translit or ""
-- This is decoded again by [[WT:ACCEL]].
local lemma = data.accel.lemma and "origin-" .. data.accel.lemma:gsub("%%", "."):gsub(" ", "_") or ""
local lemma_translit = data.accel.lemma_translit and "origin_transliteration-" .. data.accel.lemma_translit or ""
local no_store = data.accel.no_store and "form-of-nostore" or ""
local accel =
form .. " " ..
gender .. " " ..
translit .. " " ..
lemma .. " " ..
lemma_translit .. " " ..
no_store .. " "
class = "form-of lang-" .. data.lang:getCode() .. " " .. accel
end
-- Only make a link if the term has been given, otherwise just show the alt text without a link
link = require("Module:script utilities").tag_text(
link = m_scriptutils.tag_text(data.term and export.language_link(data, allowSelfLink, dontLinkRecons) or data.alt, data.lang, data.sc, face, class)
data.term and export.language_link(data, allowSelfLink, dontLinkRecons)
or data.alt, data.lang, data.sc, face, class)
else
--[[ No term to show.
Is there at least a transliteration we can work from? ]]
link = m_scriptutilsrequire("Module:script utilities").request_script(data.lang, data.sc)
if link == "" or not data.tr or data.tr == "-" then
Line 358 ⟶ 457:
if mw.title.getCurrentTitle().nsText ~= "Template" then
table.inserttable_insert(categories, "[[Category:" .. data.lang:getCanonicalName() .. " term requests]]")
end
Line 365 ⟶ 464:
end
table.inserttable_insert(output, link)
if data.tr == "" or data.tr == "-" then
Line 374 ⟶ 473:
data.tr = data.tr or m_phonetic.getTranslit(export.remove_links(data.term))
elseif (data.term or data.alt) and not data.sc:getCode():find("Lati?n") then
and not ((data.sc:getCode():find("Latn", nil, true)) or data.sc:getCode() == "Latinx") then
-- Try to generate a transliteration, unless transliteration has been
if not mw.loadData("Module:links/data").high_memory_entries[mw.title.getCurrentTitle().text] or not data.tr then
-- supplied and we are in a high-memory entry.
-- Try to generate a transliteration if necessary
if not (data.tr and mw.loadData("Module:links/data").high_memory_entries[mw.title.getCurrentTitle().text]) then
local automated_tr = data.lang:transliterate(export.remove_links(data.alt or data.term), data.sc)
Line 386 ⟶ 485:
if manual_tr then
if manual_tr == automated_tr then
table.inserttable_insert(categories,
categories,
"[[Category:Terms with redundant transliterations]]"
.. "[[Category:Terms with redundant transliterations/" .. data.lang:getCode() .. "]]")
)
else
-- Prevents Arabic root categories from flooding the tracking categories.
if mw.title.getCurrentTitle().nsText ~= "Category" then
table.inserttable_insert(categories,
categories,
"[[Category:Terms with manual transliterations different from the automated ones]]"
.. "[[Category:Terms with manual transliterations different from the automated ones/" .. data.lang:getCode() .. "]]")
)
end
end
end
if (not manual_tr) or data.lang:overrideManualTranslit() then
data.tr = automated_tr
Line 412 ⟶ 507:
-- Link to the transliteration entry for languages that require this
if data.tr and data.lang:link_tr() then
data.tr = export.language_link { lang = data.lang, term = data.tr }
end
annotations =table_insert(output, export.format_link_annotations(data, face))
return table_concat(output) .. table_concat(categories)
table.insert(output, annotations)
return table.concat(output) .. table.concat(categories)
end
 
Line 435 ⟶ 528:
end
text = text:mw.ustring.gsub(text, "%[%[Category:[^|%]]-|?[^|%]]-%]%]", "")
text = text:gsub("%[%[[^|%]]-|", "")
text = text:gsub("%[%[", "")
text = text:gsub("%]%]", "")
 
return text
end
Line 446 ⟶ 539:
local lang = require("Module:languages").getByCode("en")
-- Parentheses around function call to remove second return value, the
text = mw.ustring.gsub(text, "%[%[([^%]]+)%]%]",
-- number of replacements.
return (text:gsub("%[%[([^%]]+)%]%]",
function(linktext)
local link = parseLink(linktext)
return makeLangLink(link, lang, nil, true, false)
end))
)
return text
end
 
Line 485 ⟶ 577:
end
return table_concat {
return '<span class="' .. data.scCode .. '" lang="' .. data.langCode .. '">[[' .. data.term .. "#" .. fragment .. "|" .. ( data.alt or data.term ) .. "]]</span>"
'<span class="', data.scCode, '" lang="', data.langCode,
'">[[', data.term, "#", fragment, "|", (data.alt or data.term), "]]</span>"
}
end
 
--[=[
For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål. 0xC3 and 0xA5 are the
hexadecimal-base representation of the two bytes used to encode the character
å in the UTF-8 encoding:
11000011 10100101
Note that the bytes used to represent a character are actually different from
the Unicode codepoint. For å, the codepoint is 0xE5. The bits (digits) that
actually spell the codepoint are found in the brackets: 110[00011] 10[100101].
For further explanation, see [[w:UTF-8#Description]].
]=]
 
-- The character class %x should not be used, as it includes the characters a-f,
-- which do not occur in these anchor encodings.
local capitalHex = "[0-9A-F]"
 
local function decodeAnchor(anchor)
return (anchor:gsub("%.(" .. capitalHex .. capitalHex .. ")",
function(hexByte)
return string.char(tonumber(hexByte, 16))
end))
end
 
Line 493 ⟶ 611:
end
link = mw.ustring.link:gsub(link, "_", " ")
local numberSigns = require("Module:string").count(link, "#")
Line 501 ⟶ 619:
end
local page, section = mw.ustring.link:match(link, "^([^#]+)#(.+)$")
if page and section then
section = decodeAnchor(section)
return table.concat{ "[[" , page, "#", section, "|", page, " § ", section, "]]" }
-- URI-encode (percent-encode) section to allow square brackets, [],
-- in section name. If not percent-encoded, they prevent the parser from
-- recognizing the link.
return table_concat { "[[", page, "#", mw.uri.encode(section, "WIKI"), "|", page, " § ", section, "]]" }
else
error('The function "' .. section_link .. '" could not find a number sign marking a section name.')