Module:Lang: Difference between revisions

m (1 revision imported)
(added the template namespace to the error category)
Line 6: Line 6:


require('Module:No globals');
require('Module:No globals');
local initial_style_state; -- set by lang_xx_normal() and lang_xx_italic()


local getArgs = require ('Module:Arguments').getArgs;
local getArgs = require ('Module:Arguments').getArgs;
Line 13: Line 11:
local yesno = require ('Module:Yesno');
local yesno = require ('Module:Yesno');


local lang_name_table = mw.loadData ('Module:Language/name/data');
local lang_data = mw.loadData ('Module:Lang/data'); -- language name override and transliteration tool-tip tables
local lang_name_table = lang_data.lang_name_table; -- language codes, names, regions, scripts, suppressed scripts
local lang_table = lang_data.lang_name_table.lang;
local lang_dep_table = lang_data.lang_name_table.lang_dep;
local script_table = lang_data.lang_name_table.script;
local region_table = lang_data.lang_name_table.region;
local variant_table = lang_data.lang_name_table.variant;
local suppressed_table = lang_data.lang_name_table.suppressed;
local override_table = lang_data.override;


local synonym_table = mw.loadData ('Module:Lang/ISO 639 synonyms'); -- ISO 639-2/639-2T code translation to 639-1 code
local synonym_table = mw.loadData ('Module:Lang/ISO 639 synonyms'); -- ISO 639-2/639-2T code translation to 639-1 code
local lang_data =  mw.loadData ('Module:Lang/data'); -- language name override and transliteration tool-tip tables


local namespace = mw.title.getCurrentTitle().namespace; -- used for categorization
local namespace = mw.title.getCurrentTitle().namespace; -- used for categorization


local this_wiki_lang = mw.language.getContentLanguage().code; -- get this wiki's language
local this_wiki_lang = mw.language.getContentLanguage().code; -- get this wiki's language
local initial_style_state; -- set by lang_xx_normal() and lang_xx_italic()


local maint_cats = {}; -- maintenance categories go here
local maint_cats = {}; -- maintenance categories go here
Line 221: Line 227:
script subtag - four character IANA script code
script subtag - four character IANA script code
region subtag - two-letter or three digit IANA region code
region subtag - two-letter or three digit IANA region code
variant subtag - four digit or 5-8 alnum variant code
variant subtag - four digit or 5-8 alnum variant code; only one variant subtag supported
private subtag - x- followed by 1-8 alnum private code; only supported with the primary language tag
private subtag - x- followed by 1-8 alnum private code; only supported with the primary language tag


Line 311: Line 317:
code = code:lower(); -- ensure that we use and return lower case version of this
code = code:lower(); -- ensure that we use and return lower case version of this
if not (lang_data.override[code] or lang_name_table.lang[code]) then
if not (override_table[code] or lang_table[code] or synonym_table[code] or lang_dep_table[code]) then
return nil, nil, nil, nil, nil, table.concat ({'unrecognized language code: ', code}); -- invalid language code, don't know about the others (don't care?)
return nil, nil, nil, nil, nil, table.concat ({'unrecognized language code: ', code}); -- invalid language code, don't know about the others (don't care?)
end
end
Line 331: Line 337:
if is_set (script) then
if is_set (script) then
script = script:lower(); -- ensure that we use and return lower case version of this
script = script:lower(); -- ensure that we use and return lower case version of this
if not lang_name_table.script[script] then
if not script_table[script] then
return code, nil, nil, nil, nil, table.concat ({'unrecognized script: ', script, ' for code: ', code}); -- language code ok, invalid script, don't know about the others (don't care?)
return code, nil, nil, nil, nil, table.concat ({'unrecognized script: ', script, ' for code: ', code}); -- language code ok, invalid script, don't know about the others (don't care?)
end
end
end
end
if lang_name_table.suppressed[script] then -- ensure that code-script does not use a suppressed script
if suppressed_table[script] then -- ensure that code-script does not use a suppressed script
if in_array (code, lang_name_table.suppressed[script]) then
if in_array (code, suppressed_table[script]) then
return code, nil, nil, nil, nil, table.concat ({'script: ', script, ' not supported for code: ', code}); -- language code ok, script is suppressed for this code
return code, nil, nil, nil, nil, table.concat ({'script: ', script, ' not supported for code: ', code}); -- language code ok, script is suppressed for this code
end
end
Line 351: Line 357:
if is_set (region) then
if is_set (region) then
region = region:lower(); -- ensure that we use and return lower case version of this
region = region:lower(); -- ensure that we use and return lower case version of this
if not lang_name_table.region[region] then
if not region_table[region] then
return code, script, nil, nil, nil, table.concat ({'unrecognized region: ', region, ' for code: ', code});
return code, script, nil, nil, nil, table.concat ({'unrecognized region: ', region, ' for code: ', code});
end
end
Line 366: Line 372:
if is_set (variant) then
if is_set (variant) then
variant = variant:lower(); -- ensure that we use and return lower case version of this
variant = variant:lower(); -- ensure that we use and return lower case version of this
if not lang_name_table.variant[variant] then -- make sure variant is valid
if not variant_table[variant] then -- make sure variant is valid
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant});
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant});
end -- does this duplicate/replace tests in lang() and lang_xx()?
end -- does this duplicate/replace tests in lang() and lang_xx()?
if is_set (script) then -- if script set it must be part of the 'prefix'
if is_set (script) then -- if script set it must be part of the 'prefix'
if not in_array (table.concat ({code, '-', script}), lang_name_table.variant[variant]['prefixes']) then
if not in_array (table.concat ({code, '-', script}), variant_table[variant]['prefixes']) then
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code-script pair: ', code, '-', script});
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code-script pair: ', code, '-', script});
end
end
elseif is_set (region) then -- if region set, there are some prefixes that require lang code and region (en-CA-newfound)
elseif is_set (region) then -- if region set, there are some prefixes that require lang code and region (en-CA-newfound)
if not in_array (code, lang_name_table.variant[variant]['prefixes']) then -- first see if lang code is all that's required (en-oxendict though en-GB-oxendict is preferred)
if not in_array (code, variant_table[variant]['prefixes']) then -- first see if lang code is all that's required (en-oxendict though en-GB-oxendict is preferred)
if not in_array (table.concat ({code, '-', region}), lang_name_table.variant[variant]['prefixes']) then -- now try for lang code and region (en-CA-newfound)
if not in_array (table.concat ({code, '-', region}), variant_table[variant]['prefixes']) then -- now try for lang code and region (en-CA-newfound)
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code-region pair: ', code, '-', region});
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code-region pair: ', code, '-', region});
end
end
end
end
else
else
if not in_array (code, lang_name_table.variant[variant]['prefixes']) then
if not in_array (code, variant_table[variant]['prefixes']) then
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code: ', code});
return code, script, region, nil, nil, table.concat ({'unrecognized variant: ', variant, ' for code: ', code});
end
end
Line 388: Line 394:
if is_set (private) then
if is_set (private) then
private = private:lower(); -- ensure that we use and return lower case version of this
private = private:lower(); -- ensure that we use and return lower case version of this
if not lang_data.override[table.concat ({code, '-x-', private})] then -- make sure private tag is valid; note that index  
if not override_table[table.concat ({code, '-x-', private})] then -- make sure private tag is valid; note that index  
return code, script, region, nil, nil, table.concat ({'unrecognized private tag: ', private});
return code, script, region, nil, nil, table.concat ({'unrecognized private tag: ', private});
end
end
Line 418: Line 424:
table.insert (out, '</span>');
table.insert (out, '</span>');
if (0 == namespace) and not args.nocat then -- only categorize in article space
if (0 == namespace or 10 == namespace) and not args.nocat then -- categorize in article space (and template space to take care of broken usages)
table.insert (out, table.concat ({'[[Category:', category, ' template errors]]'}));
table.insert (out, table.concat ({'[[Category:', category, ' template errors]]'}));
end
end
Line 568: Line 574:


if language_name:find ('languages') then
if language_name:find ('languages') then
retval = table.concat ({'[[Category:Articles with text from the ', language_name, ' collective]]'});
return table.concat ({'[[Category:Articles with text from the ', language_name, ' collective]]'});
-- retval = table.concat ({'[[Category:Articles with text from ', language_name, ']]'}); -- proposed version at Wikipedia:Categories_for_discussion/Log/2020_August_18#Category:Articles_with_text_from_the_Afro-Asiatic_languages_collective
-- return table.concat ({'[[Category:Articles with text from ', language_name, ']]'}); -- proposed version at Wikipedia:Categories_for_discussion/Log/2020_August_18#Category:Articles_with_text_from_the_Afro-Asiatic_languages_collective
retval = name_get and retval:gsub ('[%[%]]', '') or retval; -- when called from category_from_tag() strip wikilink markup to return plain-text category name
return retval;
end
end
Line 577: Line 581:


if 'en' == code then
if 'en' == code then
table.insert (cat, 'explicitly cited English');
table.insert (cat, 'explicitly cited ' .. language_name); -- falls back to English if regional name not available
else
else
table.insert (cat, language_name);
table.insert (cat, language_name);
Line 584: Line 588:
table.insert (cat, '-language text]]');
table.insert (cat, '-language text]]');


retval = table.concat (cat);
return table.concat (cat);
retval = name_get and retval:gsub ('[%[%]]', '') or retval; -- when called from category_from_tag() strip wikilink markup to return plain-text category name
return retval
end
end


Line 636: Line 638:
if title_table[std] then -- and if standard is legitimate
if title_table[std] then -- and if standard is legitimate
if title_table[std][tscript] then -- and if script for that standard is legitimate
if title_table[std][tscript] then -- and if script for that standard is legitimate
table.insert (tout, table.concat ({title_table[std][tscript:lower()], ' (', lang_name_table.script[tscript][1], ' script) transliteration'})); -- add the appropriate text to the tool tip
table.insert (tout, table.concat ({title_table[std][tscript:lower()], ' (', script_table[tscript][1], ' script) transliteration'})); -- add the appropriate text to the tool tip
else
else
table.insert (tout, title_table[std]['default']); -- use the default if script not in std table; TODO: maint cat? error message because script not found for this standard?
table.insert (tout, title_table[std]['default']); -- use the default if script not in std table; TODO: maint cat? error message because script not found for this standard?
Line 648: Line 650:
if title_table[std][code] then -- if language code is in the table (transl may not provide a language code)
if title_table[std][code] then -- if language code is in the table (transl may not provide a language code)
table.insert (tout, table.concat ({title_table[std][code:lower()], ' (', lang_name_table.lang[code][1], ' language) transliteration'})); -- add the appropriate text to the tool tip
-- table.insert (tout, table.concat ({title_table[std][code:lower()], ' (', lang_table[code][1], ' language) transliteration'})); -- add the appropriate text to the tool tip
table.insert (tout, table.concat ({title_table[std][code:lower()], ' (', language_name, ' language) transliteration'})); -- add the appropriate text to the tool tip
else -- code doesn't match
else -- code doesn't match
table.insert (tout, title_table[std]['default']); -- so use the standard's default
table.insert (tout, title_table[std]['default']); -- so use the standard's default
Line 837: Line 840:
common function to return language name from the data set according to IETF tag
common function to return language name from the data set according to IETF tag


returns language name
returns language name if found in data tables; nil else


]]
]]


local function language_name_get (ietf, code, variant)
local function language_name_get (ietf, code, cat)
if lang_data.override[ietf:lower()] then -- look for whole IETF tag in override table
ietf = ietf:lower(); -- ietf:lower() because format_ietf_tag() returns mixed case
return lang_data.override[ietf:lower()][1]; -- ietf:lower() because format_ietf_tag() returns mixed case
elseif lang_data.override[code] then -- not there so try basic language code
local name; -- remains nil if not found
return lang_data.override[code][1];
elseif not is_set (variant) then -- shift to main code/name tables
if override_table[ietf] then -- look for whole IETF tag in override table
if lang_name_table.lang[code] then
name = override_table[ietf][1];
return lang_name_table.lang[code][1]; -- table entries sometimes have multiple names, always take the first one
elseif override_table[code] then -- not there so try basic language code
end
name = override_table[code][1];
else -- TODO: is this the right thing to do: take language display name from variants table?
elseif lang_table[code] then -- shift to iana code/name table
if lang_name_table.variant[variant] then -- TODO: there is some discussion at Template talk:Lang about having a label parameter for use when variant name is not desired among other things
name = lang_table[code][1]; -- table entries sometimes have multiple names, always take the first one
return lang_name_table.variant[variant]['descriptions'][1]; -- table entries sometimes have multiple names, always take the first one
elseif lang_dep_table[code] then
end
-- if cat then
-- table.insert (maint_cats, table.concat ({'Lang and lang-xx using deprecated ISO 639 codes|', code}));
-- table.insert (maint_msgs, table.concat ({'code: ', code, ' is deprecated'}));
-- end
name = lang_dep_table[code][1]; -- table entries sometimes have multiple names, always take the first one
end
 
if lang_dep_table[code] and cat then -- because deprecated code may have been overridden to en.wiki preferred name
table.insert (maint_cats, table.concat ({'Lang and lang-xx using deprecated ISO 639 codes|', code}));
table.insert (maint_msgs, table.concat ({'code: ', code, ' is deprecated'}));
end
end
return name; -- name from data tables or nil
end
end


--[[--------------------------< _ L A N G >--------------------------------------------------------------------
--[[--------------------------< _ L A N G >--------------------------------------------------------------------
Line 890: Line 905:
end
end
msg = validate_text (template, args); -- ensure that |text= is set
msg = validate_text (template, args); -- ensure that |text= is set
if is_set (msg) then -- msg is an already-formatted error message
if is_set (msg) then -- msg is an already-formatted error message
return msg;
return msg;
Line 935: Line 950:


args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles
language_name = language_name_get (args.code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant
language_name = language_name_get (args.code, code, true); -- get language name; try ietf tag first, then code w/o variant then code w/ variant


if 'invert' == args.italic and 'span' == tag then -- invert only supported for in-line content
if 'invert' == args.italic and 'span' == tag then -- invert only supported for in-line content
Line 1,113: Line 1,128:


args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles
language_name = language_name_get (args.code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant
language_name = language_name_get (args.code, code, true); -- get language name; try ietf tag first, then code w/o variant then code w/ variant


category_name = language_name; -- category names retain IANA parenthetical diambiguators (if any)
category_name = language_name; -- category names retain IANA parenthetical diambiguators (if any)
Line 1,149: Line 1,164:
if 'none' ~= args.label then
if 'none' ~= args.label then
table.insert (out, '<small>');
table.insert (out, '<small>');
if lang_name_table.script[args['translit-script']] then -- when |translit-script= is set, try to use the script's name
if script_table[args['translit-script']] then -- when |translit-script= is set, try to use the script's name
translit_script_name = lang_name_table.script[args['translit-script'][1]];
translit_script_name = script_table[args['translit-script'][1]];
else
else
translit_script_name = language_name; -- fall back on language name
translit_script_name = language_name; -- fall back on language name
Line 1,330: Line 1,345:


raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private
raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private
language_name = language_name_get (raw_code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant
language_name = language_name_get (raw_code, code); -- get language name; try ietf tag first, then code w/o variant then code w/ variant


language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them
language_name = language_name:gsub ('%s+%b()', ''); -- remove IANA parenthetical disambiguators or qualifiers from names that have them
Line 1,373: Line 1,388:


if args[1] and '' ~= args[1] then
if args[1] and '' ~= args[1] then
local data = mw.loadData ('Module:Lang/name to tag'); -- get the reversed data tables
local data = mw.loadData ('Module:Lang/tag from name'); -- get the reversed data tables TODO: change when going live
local lang = args[1]:lower(); -- allow any-case for the language name (speeling must till be correct)
local lang = args[1]:lower(); -- allow any-case for the language name (speeling must till be correct)
local tag = data.rev_lang_data[lang] or data.rev_lang_name_table[lang]; -- get the code; look first in the override then in the standard
local tag = data.rev_override_table[lang] or data.rev_lang_table[lang] or data.rev_lang_dep_table[lang]; -- get the code; look first in the override then in the standard


if tag then
if tag then
Line 1,458: Line 1,473:
end
end
else
else
return make_error_msg ('missing language / script code', args, 'transl'); -- missing language / script code so quit
return make_error_msg ('missing language / script code', args, 'transl'); -- missing language / script code so quit
end
end


Line 1,470: Line 1,485:
end
end


if lang_data.override[args.code] then -- is code a language code defined in the override table?
if override_table[args.code] then -- is code a language code defined in the override table?
language_name = lang_data.override[args.code][1];
language_name = override_table[args.code][1];
elseif lang_name_table.lang[args.code] then -- is code a language code defined in the standard language code tables?
elseif lang_table[args.code] then -- is code a language code defined in the standard language code tables?
language_name = lang_name_table.lang[args.code][1];
language_name = lang_table[args.code][1];
elseif lang_name_table.script[args.code] then -- if here, code is not a language code; is it a script code?
elseif lang_dep_table[args.code] then -- is code a language code defined in the deprecated language code tables?
language_name = lang_name_table.script[args.code][1];
language_name = lang_dep_table[args.code][1];
elseif script_table[args.code] then -- if here, code is not a language code; is it a script code?
language_name = script_table[args.code][1];
script = args.code; -- code was an ISO 15924 script so use that instead
script = args.code; -- code was an ISO 15924 script so use that instead
args.code = ''; -- unset because not a language code
args.code = ''; -- unset because not a language code
Line 1,514: Line 1,531:
local code; -- the language code
local code; -- the language code
local msg; -- gets an error message if IETF language tag is malformed or invalid
local msg; -- gets an error message if IETF language tag is malformed or invalid
local language_name = '';
local category_name = '';
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code);
code, subtags.script, subtags.region, subtags.variant, subtags.private, msg = get_ietf_parts (raw_code);
Line 1,523: Line 1,540:


raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private
raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private
language_name = language_name_get (raw_code, code, subtags.variant); -- get language name; try ietf tag first, then code w/o variant then code w/ variant
category_name = language_name_get (raw_code, code); -- get language name; try ietf tag first, then code w/o variant then code w/ variant
category_name = make_category (code, category_name, nil, true):gsub ('[%[%]]', '');


return make_category (code, language_name, nil, true)
return category_name;
end
end