Module:Lang: Difference between revisions
Content deleted Content added
Johnrdorazio (talk | contribs) No edit summary |
sync from sandbox; |
||
Line 186: | Line 186: | ||
region: upper case |
region: upper case |
||
variant: lower case |
variant: lower case |
||
private: lower case prefixed with -x- |
|||
]] |
]] |
||
local function format_ietf_tag (code, script, region, variant) |
local function format_ietf_tag (code, script, region, variant, private) |
||
local out = {}; |
local out = {}; |
||
if is_set (private) then |
|||
return table.concat ({code:lower(), 'x', private:lower()}, '-'); -- if private, all other tags ignored |
|||
end |
|||
table.insert (out, code:lower()); |
table.insert (out, code:lower()); |
||
if is_set (script) then |
if is_set (script) then |
||
Line 432: | Line 437: | ||
if is_set (link) then |
if is_set (link) then |
||
if is_set (display) then |
if is_set (display) then |
||
return table.concat ({'[[ |
return table.concat ({'[[', link, '|', display, ']]'}); |
||
else |
else |
||
return table.concat ({'[[ |
return table.concat ({'[[', link, ']]'}); |
||
end |
end |
||
else |
else |
||
Line 522: | Line 527: | ||
table.insert (html, table.concat ({style_added, ' title=\"', language})); --start the title text |
table.insert (html, table.concat ({style_added, ' title=\"', language})); --start the title text |
||
if language:find ('languages') then |
if language:find ('languages') then |
||
table.insert (html, ' collective text |
table.insert (html, ' collective text'); -- for collective languages |
||
else |
else |
||
table.insert (html, ' language text |
table.insert (html, ' language text'); -- for individual languages |
||
end |
end |
||
table.insert (html, '>'); -- close the opening html tag |
table.insert (html, '\">'); -- close the opening html tag |
||
else |
else |
||
table.insert (html, table.concat ({style_added, '>'})); -- close the style attribute and close opening html tag |
table.insert (html, table.concat ({style_added, '>'})); -- close the style attribute and close opening html tag |
||
Line 549: | Line 554: | ||
[[Category:Articles containing explicitly cited English-language text]] |
[[Category:Articles containing explicitly cited English-language text]] |
||
for artificial languages (code: art) |
|||
[[Category:Articles containing constructed-language text]] |
|||
for ISO 639-2 collective languages (and for 639-1 bh): |
for ISO 639-2 collective languages (and for 639-1 bh): |
||
[[Category:Articles with text from the <language> languages collective]] |
[[Category:Articles with text from the <language> languages collective]] |
||
Line 557: | Line 559: | ||
]=] |
]=] |
||
local function make_category (code, language_name, nocat) |
local function make_category (code, language_name, nocat, name_get) |
||
local cat = {}; |
local cat = {}; |
||
local retval; |
|||
if (0 ~= namespace) or nocat then -- only categorize in article space |
|||
if ((0 ~= namespace) or nocat) and not name_get then -- only categorize in article space |
|||
return ''; -- return empty string for concatenation |
return ''; -- return empty string for concatenation |
||
end |
end |
||
if language_name:find ('languages') then |
if language_name:find ('languages') then |
||
retval = table.concat ({'[[Category:Articles with text from the ', language_name, ' collective]]'}); |
|||
-- |
-- retval = table.concat ({'[[Category:Articles with text from ', language_name, ']]'}); -- proposed version at Wikipedia:Categories_for_discussion/Log/2020_August_18#Category:Articles_with_text_from_the_Afro-Asiatic_languages_collective |
||
retval = name_get and retval:gsub ('[%[%]]', '') or retval; -- when called from category_from_tag() strip wikilink markup to return plain-text category name |
|||
return retval; |
|||
end |
end |
||
table.insert (cat, '[[Category:Articles containing '); |
table.insert (cat, '[[Category:Articles containing '); |
||
if 'en' == code then |
if 'en' == code then |
||
table.insert (cat, 'explicitly cited English'); |
table.insert (cat, 'explicitly cited English'); |
||
elseif 'art' == code then |
|||
table.insert (cat, 'constructed') |
|||
else |
else |
||
table.insert (cat, language_name); |
table.insert (cat, language_name); |
||
Line 581: | Line 584: | ||
table.insert (cat, '-language text]]'); |
table.insert (cat, '-language text]]'); |
||
retval = table.concat (cat); |
|||
retval = name_get and retval:gsub ('[%[%]]', '') or retval; -- when called from category_from_tag() strip wikilink markup to return plain-text category name |
|||
return retval |
|||
end |
end |
||
Line 827: | Line 832: | ||
end |
end |
||
--[[--------------------------< L A N G U A G E _ N A M E _ G E T >-------------------------------------------- |
|||
common function to return language name from the data set according to IETF tag |
|||
returns language name |
|||
]] |
|||
local function language_name_get (ietf, code, variant) |
|||
if lang_data.override[ietf:lower()] then -- look for whole IETF tag in override table |
|||
return lang_data.override[ietf:lower()][1]; -- ietf:lower() because format_ietf_tag() returns mixed case |
|||
elseif lang_data.override[code] then -- not there so try basic language code |
|||
return lang_data.override[code][1]; |
|||
elseif not is_set (variant) then -- shift to main code/name tables |
|||
if lang_name_table.lang[code] then |
|||
return lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one |
|||
end |
|||
else -- TODO: is this the right thing to do: take language display name from variants table? |
|||
if lang_name_table.variant[variant] then -- TODO: there is some discussion at Template talk:Lang about having a label parameter for use when variant name is not desired among other things |
|||
return lang_name_table.variant[variant]['descriptions'][1]; -- table entries sometimes have multiple names, always take the first one |
|||
end |
|||
end |
|||
end |
|||
--[[--------------------------< _ L A N G >-------------------------------------------------------------------- |
--[[--------------------------< _ L A N G >-------------------------------------------------------------------- |
||
Line 842: | Line 871: | ||
local out = {}; |
local out = {}; |
||
local language_name; -- used to make category names |
local language_name; -- used to make category names |
||
local category_name; -- same as language_name except that it retains any parenthetical disambiguators (if any) from the data set |
|||
local subtags = {}; -- IETF subtags script, region, variant, and private |
local subtags = {}; -- IETF subtags script, region, variant, and private |
||
local code; -- the language code |
local code; -- the language code |
||
Line 904: | Line 934: | ||
end |
end |
||
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles |
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles |
||
language_name = language_name_get (args.code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant |
|||
subtags.private = subtags.private and table.concat ({code, '-x-', subtags.private}) or nil; -- assemble a complete private ietf subtag; args.code does not get private subtag |
|||
if is_set (subtags.private) and lang_data.override[subtags.private] then -- get the language name for categorization |
|||
language_name = lang_data.override[subtags.private][1]; -- first look for private use tag language name |
|||
elseif lang_data.override[code] then |
|||
language_name = lang_data.override[code][1] -- then language names taken from the override table |
|||
elseif lang_name_table.lang[code] then |
|||
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one |
|||
end |
|||
if 'invert' == args.italic and 'span' == tag then -- invert only supported for in-line content |
if 'invert' == args.italic and 'span' == tag then -- invert only supported for in-line content |
||
Line 1,091: | Line 1,112: | ||
end |
end |
||
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant); -- format to recommended subtag styles |
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles |
||
language_name = language_name_get (args.code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant |
|||
subtags.private = subtags.private and table.concat ({code, '-x-', subtags.private}) or nil; -- assemble a complete private ietf subtag; args.code does not get private subtag |
|||
if is_set (subtags.private) and lang_data.override[subtags.private] then -- get the language name for categorization |
|||
language_name = lang_data.override[subtags.private][1]; -- first look for private use tag language name |
|||
elseif lang_data.override[args.code:lower()] then -- look for whole IETF tag in override table |
|||
language_name = lang_data.override[args.code:lower()][1]; -- args.code:lower() because format_ietf_tag() returns mixed case |
|||
elseif lang_data.override[code] then -- not there so try basic language code |
|||
language_name = lang_data.override[code][1]; |
|||
elseif not is_set (subtags.variant) then |
|||
if lang_name_table.lang[code] then |
|||
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one |
|||
end |
|||
else -- TODO: is this the right thing to do: take language display name from variants table? |
|||
if lang_name_table.variant[subtags.variant] then -- TODO: there is some discussion at Template talk:Lang about having a label parameter for use when variant name is not desired among other things |
|||
language_name = lang_name_table.variant[subtags.variant]['descriptions'][1]; -- table entries sometimes have multiple names, always take the first one |
|||
end |
|||
end |
|||
category_name = language_name; -- category names retain IANA parenthetical diambiguators (if any) |
category_name = language_name; -- category names retain IANA parenthetical diambiguators (if any) |
||
Line 1,139: | Line 1,143: | ||
args.text = proto_prefix (args.text, language_name, args.proto); -- prefix proto-language text with a splat |
args.text = proto_prefix (args.text, language_name, args.proto); -- prefix proto-language text with a splat |
||
table.insert (out, make_text_html (args.code, args.text, tag, args.rtl, args.italic, args.size)) |
table.insert (out, make_text_html (args.code, args.text, tag, args.rtl, args.italic, args.size, nil)) |
||
if is_set (args.translit) and not unicode.is_Latin (args.text) then -- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test) |
if is_set (args.translit) and not unicode.is_Latin (args.text) then -- transliteration (not supported in {{lang}}); not supported when args.text is wholly latn text (this is an imperfect test) |
||
Line 1,280: | Line 1,284: | ||
local function _is_ietf_tag (tag) -- entry point when this module is require()d into another |
local function _is_ietf_tag (tag) -- entry point when this module is require()d into another |
||
local c, s, r, v, p, err; -- code, script, region, private, error message |
local c, s, r, v, p, err; -- code, script, region, variant, private, error message |
||
c, s, r, v, p, err = get_ietf_parts (tag); -- disassemble tag into constituent part and validate |
c, s, r, v, p, err = get_ietf_parts (tag); -- disassemble tag into constituent part and validate |
||
Line 1,298: | Line 1,302: | ||
--[[--------------------------< _ N A M E _ F R O M _ |
--[[--------------------------< _ N A M E _ F R O M _ T A G >-------------------------------------------------- |
||
Returns language name associated with IETF language tag if valid; |
Returns language name associated with IETF language tag if valid; error message else. |
||
All code combinations supported by {{lang}} and the {{lang-xx}} templates are supported by this function. |
All code combinations supported by {{lang}} and the {{lang-xx}} templates are supported by this function. |
||
Line 1,314: | Line 1,318: | ||
local raw_code = args[1]; -- save a copy of the input IETF subtag |
local raw_code = args[1]; -- save a copy of the input IETF subtag |
||
local link = 'yes' == args['link']; -- make a boolean |
local link = 'yes' == args['link']; -- make a boolean |
||
local label = args.label; |
|||
local code; -- the language code |
local code; -- the language code |
||
local msg; -- gets an error message if IETF language tag is malformed or invalid |
local msg; -- gets an error message if IETF language tag is malformed or invalid |
||
Line 1,324: | Line 1,329: | ||
end |
end |
||
raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private |
|||
if lang_data.override[raw_code:lower()] then -- look for whole IETF tag in override table (force lower case) |
|||
language_name = language_name_get (raw_code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant |
|||
language_name = lang_data.override[raw_code:lower()][1]; |
|||
elseif lang_data.override[code] then -- not there so try basic language code in override table |
|||
language_name = lang_data.override[code][1]; |
|||
elseif not is_set (subtags.variant) then |
|||
if lang_name_table.lang[code] then |
|||
language_name = lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one |
|||
end |
|||
else -- TODO: is this the right thing to do: take language display name from variants table? |
|||
if lang_name_table.variant[subtags.variant] then -- TODO: there is some discussion at Template talk:Lang about having a label parameter for use when variant name is not desired among other things |
|||
language_name = lang_name_table.variant[subtags.variant]['descriptions'][1]; -- table entries sometimes have multiple names, always take the first one |
|||
end |
|||
end |
|||
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them |
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them |
||
Line 1,342: | Line 1,336: | ||
if link then -- when |link=yes, wikilink the language name |
if link then -- when |link=yes, wikilink the language name |
||
if language_name:find ('languages') then |
if language_name:find ('languages') then |
||
language_name = make_wikilink (language_name); |
language_name = make_wikilink (language_name, label); -- collective language name uses simple wikilink |
||
elseif lang_data.article_name[code] then |
elseif lang_data.article_name[code] then |
||
language_name = make_wikilink (lang_data.article_name[code][1], language_name); -- language name with wikilink from override data |
language_name = make_wikilink (lang_data.article_name[code][1], label or language_name); -- language name with wikilink from override data |
||
else |
else |
||
language_name = make_wikilink (language_name .. ' language', language_name); -- language name with wikilink |
language_name = make_wikilink (language_name .. ' language', label or language_name); -- language name with wikilink |
||
end |
end |
||
end |
end |
||
return language_name; |
return language_name; |
||
end |
end |
||
Line 1,501: | Line 1,494: | ||
local function transl (frame) |
local function transl (frame) |
||
return _transl (getArgs(frame)); |
return _transl (getArgs(frame)); |
||
end |
|||
--[[--------------------------< C A T E G O R Y _ F R O M _ T A G >-------------------------------------------- |
|||
Returns category name associated with IETF language tag if valid; error message else |
|||
All code combinations supported by {{lang}} and the {{lang-xx}} templates are supported by this function. |
|||
Module entry point from another module |
|||
]] |
|||
local function _category_from_tag (args) |
|||
local subtags = {}; -- IETF subtags script, region, variant, and private |
|||
local raw_code = args[1]; -- save a copy of the input IETF subtag |
|||
local link = 'yes' == args['link']; -- make a boolean |
|||
local label = args.label; |
|||
local code; -- the language code |
|||
local msg; -- gets an error message if IETF language tag is malformed or invalid |
|||
local language_name = ''; |
|||
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code); |
|||
if msg then |
|||
local template = (args['template'] and table.concat ({'{{', args['template'], '}}: '})) or ''; -- make template name (if provided by the template) |
|||
return table.concat ({'<span style=\"font-size:100%; font-style:normal;\" class=\"error\">error: ', template, msg, '</span>'}); |
|||
end |
|||
raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private |
|||
language_name = language_name_get (raw_code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant |
|||
return make_category (code, language_name, nil, true) |
|||
end |
|||
--[[--------------------------< C A T E G O R Y _ F R O M _ T A G >-------------------------------------------- |
|||
Module entry point from an {{#invoke:}} |
|||
]] |
|||
local function category_from_tag (frame) -- entry point from an {{#invoke:Lang|category_from_tag|<ietf tag>|template=<template name>}} |
|||
return _category_from_tag (getArgs (frame)); -- pass-on the args table, nothing else; getArgs() so we also get parent frame |
|||
end |
end |
||
Line 1,508: | Line 1,544: | ||
return { |
return { |
||
category_from_tag = category_from_tag, |
|||
lang = lang, -- entry point for {{lang}} |
lang = lang, -- entry point for {{lang}} |
||
lang_xx_inherit = lang_xx_inherit, -- entry points for {{lang-??}} |
lang_xx_inherit = lang_xx_inherit, -- entry points for {{lang-??}} |
||
Line 1,516: | Line 1,553: | ||
name_from_tag = name_from_tag, -- used for template documentation; possible use in ISO 639 name from code templates |
name_from_tag = name_from_tag, -- used for template documentation; possible use in ISO 639 name from code templates |
||
transl = transl, -- entry point for {{transl}} |
transl = transl, -- entry point for {{transl}} |
||
_category_from_tag = _category_from_tag, -- entry points when this module is require()d into other modules |
|||
_lang = _lang, |
|||
_lang_xx_inherit = _lang_xx_inherit, |
_lang_xx_inherit = _lang_xx_inherit, |
||
_lang_xx_italic = _lang_xx_italic, |
_lang_xx_italic = _lang_xx_italic, |