Module:Lang/data: Difference between revisions
Content deleted Content added
+art; |
Johnrdorazio (talk | contribs) m 1 revision imported |
||
(8 intermediate revisions by 2 users not shown) | |||
Line 1:
local lang_obj = mw.language.getContentLanguage();
local this_wiki_lang_tag = lang_obj.code; -- get this wiki's language tag
--[[--------------------------< L A N G _ N A M E _ T A B L E >------------------------------------------------
Line 10 ⟶ 14:
all of these data come from separate modules that are derived from the IANA language-subtag-registry file
key_to_lower() avoids the metatable trap and sets all keys in the subtables to lowercase.
have multiple associated names; Module:lang is only concerned with the first name so key_to_lower() only fetches
the first name.
]]
Line 24 ⟶ 22:
local function key_to_lower (module, src_type)
local out = {};
local source = (('
if '
for k, v in pairs (source) do
out[k:lower()] = v; -- for variant
end
elseif 'lang' == src_type and source.active then -- for ~/iana_languages (active)
for k, v in pairs (source.active) do
out[k:lower()] =
end
elseif 'lang_dep' == src_type and source.deprecated then -- for ~/iana_languages (deprecated)
for k, v in pairs (source.deprecated) do
out[k:lower()] =
end
else -- here for all other sources
for k, v in pairs (source) do
out[k:lower()] =
end
end
Line 48 ⟶ 46:
end
local
lang = key_to_lower ('Module:Language/data/iana languages', 'lang'),
lang_dep = key_to_lower ('Module:Language/data/iana languages', 'lang_dep'),
script = key_to_lower ('Module:Language/data/iana scripts'), -- script keys are capitalized; set to lower
region = key_to_lower ('Module:Language/data/iana regions'), -- region keys are uppercase; set to lower
variant = key_to_lower ('Module:Language/data/iana variants', '
suppressed = key_to_lower ('Module:Language/data/iana suppressed scripts', 'var_sup'), -- script keys are capitalized; set to lower
}
--[[--------------------------< I 1 8 N M E D I A W I K I O V E R R I D E >--------------------------------
For internationalization; not used at en.wiki
The language names taken from the IANA language-subtag-registry file are given in English. That may not be ideal.
Translating ~8,000 language names is also not ideal. MediaWiki maintains (much) shorter lists of language names
in most languages for which there is a Wikipedia edition. When desired, Module:Lang can use the MediaWiki
language list for the local language.
Caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all.
When incomplete, MediaWiki's list will 'fall back' to another language (typically English). When that happens
add an appropriate entry to the override table below.
Caveat lector: the list of MediaWiki language names for your language may not be correct. At en.wiki, the
MediaWiki language names do not agree with the IANA language names for these ISO 639-1 tags. Often it is simply
spelling differences:
bh: IANA: Bihari languages MW: Bhojpuri – the ISO 639-3 tag for Bhojpuri is bho
bn: IANA: Bengali MW: Bangla – Bengali is the exonym, Bangla is the endonym
dv: IANA: Dhivehi MW: Divehi
el: IANA: Modern Greek MW: Greek
ht: IANA: Haitian MW: Haitian Creole
ky: IANA: Kirghiz MW: Kyrgyz
li: IANA: Limburgan MW: Limburgish
or: IANA: Oriya MW: Odia
os: IANA: Ossetian MW: Ossetic
"pa: IANA: Panjabi MW: Punjabi
"ps: IANA: Pushto MW: Pashto
"to: IANA: Tonga MW: Tongan
"ug: IANA: Uighur MW: Uyghur
use the override table to override language names that are incorrect for your project
To see the list of names that MediaWiki has for your language, enter this in the Debug colsole:
=mw.dumpObject (mw.language.fetchLanguageNames ('<tag>', 'all'))
(replacing <tag> with the language tag for your language)
Use of the MediaWiki language names lists is enabled when media_wiki_override_enable is set to boolean true.
]]
local media_wiki_override_enable = false; -- set to true to override IANA names with MediaWiki names; always false at en.wiki
-- caveat lector: the list of MediaWiki language names for your language may not be complete or may not exist at all
if true == media_wiki_override_enable then
local mw_languages_by_tag_t = mw.language.fetchLanguageNames (this_wiki_lang_tag, 'all'); -- get a table of language tag/name pairs known to MediaWiki
for tag, name in pairs (mw_languages_by_tag_t) do -- loop through each tag/name pair in the MediaWiki list
if lang_name_table_t.lang[tag] then -- if the tag is in the main list
lang_name_table_t.lang[tag] = name; -- overwrite exisiting name with the name from MediaWiki
end
end
end
Line 67 ⟶ 116:
local override = {
------------------------------< I S O
["ab"] = "Abkhaz", -- to match en.wiki article name
["ca-valencia"] = "Valencian",
["
["de-at"] = "Austrian German", -- these code-region and code-variant tags to match en.wiki article names
["
["en-
["en-
["en-
["en-
["en-
["en-
["en-
["en-
["en-za"] = "South African English",
["
["mo"] = "Moldovan", -- Moldavian (deprecated code); to match en.wiki article title
["
["
["ps"] = "Pashto", -- Pushto
["pt-br"] = "Brazilian Portuguese", -- match MediaWiki
["tw-asante"] = "Asante Twi",
["ug"] = "Uyghur", -- 2nd IANA name; to match en.wiki article name
-- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages
--<begin do-not-edit except to comment out>--
["av"] =
["bo"] =
["el"] =
-- ["en-SA"] =
["ff"] =
["ht"] =
["hz"] =
["ii"] =
["ki"] =
["kl"] =
["ky"] =
["lg"] =
["li"] =
["mi"] =
["na"] =
["nb"] =
["nd"] =
["nn"] =
["nr"] =
["ny"] =
["oj"] =
["or"] =
["pa"] =
["rn"] =
["sl"] =
["ss"] =
["st"] =
["to"] =
--<end do-not-edit except to comment out>--
------------------------------< I S O
["
["
["
["
["
["
["
["
["
["
["
["
["
["
["
["
["
[
["
["
["
["
["
["
['mis'] = "uncoded", -- Uncoded languages; capitalization; special scope, not collective scope;
["
["
[
[
["
["new"] = "Newar", -- Newari, Nepal Bhasa; to match en,wiki article title
["ngf"] = "Trans–New Guinea languages", -- to match en.wiki article title (endash)
["nic"] = "Niger–Congo languages", -- Niger-Kordofanian languages; to match en,wiki article title
["nrf"] = "Norman", -- not quite a collective - IANA name: Jèrriais + Guernésiais; categorizes to Norman-language text
["nrf-gg"] = "Guernésiais", -- match MediaWiki
["nrf-je"] = "Jèrriais", -- match MediaWiki
["nzi"] = "Nzema", -- Nzima; to match en.wiki article title
["oma"] = "Omaha–Ponca", -- to match en.wiki article title (endash)
["orv"] = "Old East Slavic", -- Old Russian
["pfl"] = "Palatine German", -- Pfaelzisch; to match en.wiki article
["pie"] = "Piro Pueblo", -- Piro; to match en.wiki article
["pms"] = "Piedmontese", -- Piemontese; to match en.wiki article title
["pnb"] = "Punjabi (Western)", -- Western Panjabi; dab added to override import from ~/wp languages and distinguish pnb from pa in reverse look up tag_from_name()
["rop"] = "Australian Kriol", -- Kriol; en.wiki article is a dab; point to correct en.wiki article
["sdo"] = "Bukar–Sadong", -- Bukar-Sadung Bidayuh; to match en.wiki article title
["stq"] = "Saterland Frisian", -- Saterfriesisch
["und"] = "undetermined", -- capitalization to match existing category
["wrg"] = "Warrongo", -- Warungu
["xal-ru"] = "Kalmyk", -- to match en.wiki article title
["xgf"] = "Tongva", -- ISO 639-3 is Gabrielino-Fernandeño
["yuf"] = "Havasupai–Hualapai", -- Havasupai-Walapai-Yavapai; to match en.wiki article title
["zxx"] = "no linguistic content", -- capitalization
-- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages
--<begin do-not-edit except to comment out>--
["ace"] =
["aec"] =
["akl"] =
["alt"] =
["apm"] =
["bal"] =
-- ["bcl"] =
["bin"] =
["bpy"] =
["chg"] =
["ckb"] =
["cnu"] =
["coc"] =
["diq"] =
["fit"] =
["fkv"] =
["frk"] =
["gez"] =
["gju"] =
["gsw"] =
["gul"] =
["hak"] =
["hbo"] =
["hnd"] =
-- ["ikt"] =
["kaa"] =
["khb"] =
["kmr"] =
["kpo"] =
["krj"] =
["ktz"] =
["lez"] =
["liv"] =
["lng"] =
["mia"] =
["miq"] =
["mix"] =
["mni"] =
["mrj"] =
["mww"] =
["nds-nl"] =
-- ["new"] =
["nso"] =
-- ["nwc"] =
["ood"] =
["otk"] =
["pal"] =
["pam"] =
["phr"] =
["pka"] =
-- ["pnb"] =
["psu"] =
["rap"] =
["rar"] =
["rmu"] =
["rom"] =
["rup"] =
["ryu"] =
["sdc"] =
["sdn"] =
["shp"] =
["src"] =
["sro"] =
["tkl"] =
["tvl"] =
["tyv"] =
["vls"] =
["wep"] =
["xal"] =
["xcl"] =
["yua"] =
--<end do-not-edit except to comment out>--
------------------------------< P R I V A T E
["akk-x-latbabyl"] = "Late Babylonian",
["akk-x-midassyr"] = "Middle Assyrian Akkadian",
["akk-x-midbabyl"] = "Middle Babylonian Akkadian",
["
["
["
["
["
["alg-x-proto"] = "Proto-Algonquian", -- alg in IANA is Algonquian languages
["
["cel-x-proto"] = "Proto-Celtic", -- cel in IANA is Celtic languages
["
["
["egy-x-middle"] = "Middle Egyptian",
["egy-x-old"] = "Old Egyptian",
["
["gmw-x-ecg"] = "East Central German",
["
["grc-x-attic"] = "Attic Greek",
["grc-x-biblical"] = "Biblical Greek",
["grc-x-byzant"] = "Byzantine Greek",
["
["
["grc-x-hellen"] = "Hellenistic Greek",
["grc-x-ionic"] = "Ionic Greek",
["grc-x-koine"] = "Koinē Greek",
["grc-x-medieval"] = "Medieval Greek",
["grc-x-patris"] = "Patristic Greek",
["grk-x-proto"] = "Proto-Greek", -- grk in IANA is Greek languages
["iir-x-proto"] = "Proto-Indo-Iranian", -- iir in IANA is Indo-Iranian Languages
["ine-x-proto"] = "Proto-Indo-European",
["ira-x-proto"] = "Proto-Iranian", -- ira in IANA is Iranian languages
["itc-x-proto"] = "Proto-Italic", -- itc in IANA is Italic languages
["ksh-x-colog"] = "Colognian", -- en.wiki article is Colognian; ksh (Kölsch) redirects there
["la-x-medieval"] = "Medieval Latin",
["la-x-new"] = "New Latin",
["lmo-x-milanese"] = "Milanese", -- lmo in IANA is Lombard; Milanese is a dialect
["mis-x-ripuar"] = "Ripuarian", -- replaces improper use of ksh in wp_languages
["prg-x-old"] = "Old Prussian",
["sem-x-ammonite"] = "Ammonite",
["sem-x-aramaic"] = "Aramaic",
["sem-x-canaan"] = "Canaanite languages",
["sem-x-dumaitic"] = "Dumaitic",
["sem-x-egurage"] = "Eastern Gurage",
["sem-x-hatran"] = "Hatran Aramaic",
["sem-x-oldsoara"] = "Old South Arabian",
["sem-x-palmyren"] = "Palmyrene Aramaic",
["sem-x-proto"] = "Proto-Semitic",
["sem-x-taymanit"] = "Taymanitic",
["sla-x-proto"] = "Proto-Slavic", -- sla in IANA is Slavic languages
["yuf-x-hav"] = "Havasupai", -- IANA name for these three is Havasupai-Walapai-Yavapai
["yuf-x-wal"] = "Walapai",
["yuf-x-yav"] = "Yavapai",
}
Line 261 ⟶ 366:
for those rare occasions when article titles don't fit with the normal '<language name>-language', this table
maps language code to article title.
that is the long-standing method of handling article names that don't fit with the normal pattern
Line 267 ⟶ 372:
local article_name = {
[
[
['
['
[
['mru'] = "Mono language (Cameroon)", -- Mono (Cameroon)
['qwm'] = "Kuman (Russia)", -- Kuman (Russia); to avoid Kuman dab page
["snq"] = "Sangu language (Gabon)", -- Sangu (Gabon)
["xlg"] = "Ligurian (ancient language)", -- see Template_talk:Lang#Ligurian_dab
["zmw"] = "Mbo language (Congo)", -- Mbo (Democratic Republic of Congo)
}
Line 277 ⟶ 387:
--[=[-------------------------< R T L _ S C R I P T S >--------------------------------------------------------
ISO 15924 scripts that are written right-to-left.
last update to this list: 2017-12-24
Line 291 ⟶ 401:
--[[--------------------------< T R A N S L I T
This is a table of tables of transliteration standards and the language codes or language scripts that apply to
those standards.
by some of the {{lang-??}} templates.
These tables are more-or-less copied directly from {{transl}}.
the cases when no |std= parameter value is provided.
Line 418 ⟶ 528:
['gu'] = 'ISO 15919 Indic',
['hi'] = 'ISO 15919 Indic',
['hno'] = 'ISO 15919 Indic',
['inc'] = 'ISO 15919 Indic',
['kn'] = 'ISO 15919 Indic',
Line 430 ⟶ 541:
['or'] = 'ISO 15919 Indic',
['pa'] = 'ISO 15919 Indic',
['pnb'] = 'ISO 15919 Indic',
['raj'] = 'ISO 15919 Indic',
['sa'] = 'ISO 15919 Indic',
Line 435 ⟶ 547:
['sd'] = 'ISO 15919 Indic',
['si'] = 'ISO 15919 Indic',
['skr'] = 'ISO 15919 Indic',
['ta'] = 'ISO 15919 Indic',
['tcy'] = 'ISO 15919 Indic',
Line 455 ⟶ 568:
['jyutping'] = {
['default'] = 'Jyutping transliteration',
},
['mlcts'] = {
['default'] = 'Myanmar Language Commission Transcription System',
},
Line 486 ⟶ 603:
['satts'] = {
['default'] = 'Standard Arabic Technical Transliteration System transliteration',
},
['scientific'] = {
['default'] = 'scientific transliteration',
},
['ukrainian'] = {
['default'] = 'Ukrainian National system of romanization',
},
Line 504 ⟶ 629:
return
{
this_wiki_lang_tag = this_wiki_lang_tag,
this_wiki_lang_dir = lang_obj:getDir(), -- wiki's language direction
article_name = article_name,
lang_name_table =
override = override,
rtl_scripts = rtl_scripts,
special_tags_table = special_tags_table,
translit_title_table = translit_title_table,
};
|