Revision as of 20:02, September 23, 2020 view source Johnrdorazio (talk \| contribs) Bureaucrats, Interface administrators, Administrators 2,414 edits m 1 revision imported ← Older edit		Revision as of 10:57, October 4, 2020 view source en>Trappist the monk +art; Newer edit →
Line 1: --[[--------------------------< L A N G _ N A M E _ T A B L E >------------------------------------------------ ~~local lang_data = {};~~ primary table of tables that decode: lang -> language tags and names script -> ISO 15924 script tags region -> ISO 3166 region tags variant -> iana registered variant tags suppressed -> map of scripts tags and their associated language tags all of these data come from separate modules that are derived from the IANA language-subtag-registry file key_to_lower() avoids the metatable trap and sets all keys in the subtables to lowercase. Many language codes have multiple associated names; Module:lang is only concerned with the first name so key_to_lower() only fetches the first name. TODO: instead of returning: ["key"] = {"name"} where each table has only one name, return ["key"] = "name" requires changes in Module:Lang. ]] local function key_to_lower (module, src_type) local out = {}; local source = (('variants' == src_type) and require (module)) or mw.loadData (module); -- fetch data from this module; require() avoids metatable trap for variant data if 'variants' == src_type then for k, v in pairs (source) do out[k:lower()] = v; -- for variant, everything is needed end elseif 'lang' == src_type and source.active then -- for ~/iana_languages (active) for k, v in pairs (source.active) do out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only end elseif 'lang_dep' == src_type and source.deprecated then -- for ~/iana_languages (deprecated) for k, v in pairs (source.deprecated) do out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only end else -- here for all other sources for k, v in pairs (source) do out[k:lower()] = {v[1]}; -- ignore multiple names; take first name only end end return out; end local lang_name_table = { lang = key_to_lower ('Module:Language/data/iana languages', 'lang'), lang_dep = key_to_lower ('Module:Language/data/iana languages', 'lang_dep'), script = key_to_lower ('Module:Language/data/iana scripts'), -- script keys are capitalized; set to lower region = key_to_lower ('Module:Language/data/iana regions'), -- region keys are uppercase; set to lower variant = key_to_lower ('Module:Language/data/iana variants', 'variants'), suppressed = key_to_lower ('Module:Language/data/iana suppressed scripts'), -- script keys are capitalized; set to lower } --[[--------------------------< O V E R R I D E >-------------------------------------------------------------- Language codes and names in this table override the BCP47 names in ~~Module:Language/name/data~~lang_name_table. indexes in this table shall always be lower case ~~code indexes in this table shall always be lower case~~ ]] local override = { ------------------------------< I S O 6 3 9 - 1 >------------------------------------------------------------ ~~-- ISO 639-1 codes~~ ~~["ab"] = {"Abkhazian"}, -- IANA name is Abkhazian; override wp_languages {"Abkhaz"}; to achieve this, use \|label=~~ ~~["bh"] = {"Bihari languages"}, -- only ISO 639-1 collective; defined here to override improper redefinition (Bihari) in wp_languages~~ ["ca-valencia"] = {"Valencian"}, ["cu"] = {"Church Slavonic"}, -- 2nd IANA name; ["de-at"] = {"Austrian German"}, -- these code-region and code-variant tags to match en.wiki article names ["de-ch"] = {"Swiss Standard German"}, ["en-au"] = {"Australian English"}, Line 26 ⟶ 82: ["en-us"] = {"American English"}, ["en-za"] = {"South African English"}, ["fy"] = {"West Frisian"}, -- ~~IANA name is~~ Western Frisian ["psmo"] = {"~~Pashto~~Moldovan"}, -- ~~IANA~~Moldavian ~~name~~(deprecated iscode); ~~Pushto~~to match en.wiki article title ["oc-provenc"] = {"Provençal"}, ~~["si"] = {"Sinhala"}, -- IANA name is Sinhala, Sinhalese; override wp_languages {"Sinhalese"}; see Module talk:Language/data/wp languages#Request to undo an edit~~ ["ps"] = {"Pashto"}, -- Pushto ["tw-asante"] = {"Asante Twi"}, ~~["sr-cyrl"] = {"Serbian"}, -- override wp_languages Serbian Cyrillic; to achieve this, use \|label=~~ -- these ISO 639-1 language-name overrides imported from Module:Language/data/wp_languages ~~-- ISO 639-2, -3 codes~~ --<begin do-not-edit except to comment out>-- ~~["arc"] = {"Aramaic"}, -- IANA names are: Official Aramaic (700-300 BCE), Imperial Aramaic (700-300 BCE);~~ ["~~ber~~av"] = {"~~Berber languages~~Avar"}, -- ~~ISO 639-2 collective; defined here to override redefinition in wp_languages~~Avaric ["~~bhd~~bo"] = {"~~Bhadarwahi~~Standard Tibetan"}, -- ~~IANA, ISO 639-3 name is Bhadrawahi; to match en.wiki article title~~Tibetan ["~~bla~~el"] = {"~~Blackfoot~~Greek"}, -- ~~IANA, ISO 639-2, -3 name is Siksika; to match en.wiki article~~Modern ~~title~~Greek -- ["~~bua~~en-SA"] = {"~~Buryat~~South African English"}, -- ~~IANA name~~English; ~~Buriat~~no; ~~this~~SA is anot ~~macro~~South ~~language;~~Africa ~~these~~it ~~four~~Saudi ~~use~~Arabia; wp ~~preferred~~ZA ~~transliteration;~~is South Africa ["~~bxm~~ff"] = {"~~Mongolian Buryat~~Fula"}, -- ~~IANA name Mongolia Buriat; these three all redirect to Buryat~~Fulah ["~~bxr~~ht"] = {"~~Russian~~Haitian ~~Buryat~~Creole"}, -- ~~IANA name Russia Buriat;~~Haitian ["~~bxu~~hz"] = {"~~Chinese Buryat~~Otjiherero"}, -- ~~IANA name China Buriat;~~Herero ["~~byr~~ii"] = {"~~Yipma~~Yi"}, -- ~~IANA names are Baruya and~~Sichuan ~~Yipma~~Yi ["ki"] = {"Gikuyu"}, -- Kikuyu ~~["cel"] = {"Celtic languages"}, -- ISO 639-2 collective; defined here to override improper redefinition ('Proto-Celtic') in wp_languages; use cel-x-proto instead~~ ["~~egy~~kl"] = {"~~Ancient Egyptian~~Greenlandic"}, -- ~~IANA name is Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic~~ Kalaallisut ["~~frr~~ky"] = {"~~North Frisian~~Kyrgyz"}, -- ~~IANA name is Northern Frisian~~Kirghiz ["~~frs~~lg"] = {"~~East Frisian Low Saxon~~Luganda"}, -- ~~IANA name is Eastern Frisian~~Ganda ["li"] = {"Limburgish"}, -- Limburgan ~~["gem"] = {"Germanic languages"}, -- ISO 639-2 collective; defined here to override improper redefinition ('Proto-Germanic') in wp_languages; use gem-x-proto instead~~ ["~~ilo~~mi"] = {"~~Ilocano~~Māori"}, -- ~~IANA, ISO 639-2, -3 name is Iloko; to match en.wiki article title~~Maori ["~~jam~~na"] = {"~~Jamaican Patois~~Nauruan"}, -- ~~IANA name is Jamaican Creole English~~Nauru ["~~mhr~~nb"] = {"~~Meadow Mari~~Bokmål"}, -- ~~IANA name is Eastern~~Norwegian ~~Mari~~Bokmål ["~~mid~~nd"] = {"~~Modern~~Northern ~~Mandaic~~Ndebele"}, -- ~~IANA name is~~North ~~Mandaic~~Ndebele ["~~mla~~nn"] = {"~~Tamambo~~Nynorsk"}, -- ~~wp_languages name is Medieval Latin; IANA and ISO 639-1 name is~~Norwegian ~~Malo~~Nynorsk ["~~myn~~nr"] = {"~~Mayan~~Southern ~~languages~~Ndebele"}, -- ~~ISO 639-2 collective; defined here to override redefinition in~~South ~~wp_languages~~Ndebele ["~~nah~~ny"] = {"~~Nahuatl languages~~Chichewa"}, -- ~~ISO 639-2 collective; defined here to override redefinition in wp_languages~~Nyanja ["oj"] = {"Ojibwe"}, -- Ojibwa ["or"] = {"Odia"}, -- Oriya ["pa"] = {"Punjabi"}, -- Panjabi ["rn"] = {"Kirundi"}, -- Rundi ["sl"] = {"Slovene"}, -- Slovenian ["ss"] = {"Swazi"}, -- Swati ["st"] = {"Sotho"}, -- Southern Sotho ["to"] = {"Tongan"}, -- Tonga --<end do-not-edit except to comment out>-- ------------------------------< I S O 6 3 9 - 2, - 3, - 5 >---------------------------------------------- ["arc"] = {"Aramaic"}, -- Official Aramaic (700-300 BCE), Imperial Aramaic (700-300 BCE); ["art"] = {"constructed"}, -- to match en.wiki article; lowercase for category name ["bhd"] = {"Bhadarwahi"}, -- Bhadrawahi; to match en.wiki article title ["bla"] = {"Blackfoot"}, -- Siksika; to match en.wiki article title ["bua"] = {"Buryat"}, -- Buriat; this is a macro language; these four use wp preferred transliteration; ["bxm"] = {"Mongolian Buryat"}, -- Mongolia Buriat; these three all redirect to Buryat ["bxr"] = {"Russian Buryat"}, -- Russia Buriat; ["bxu"] = {"Chinese Buryat"}, -- China Buriat; ["byr"] = {"Yipma"}, -- Baruya, Yipma ["egy"] = {"Ancient Egyptian"}, -- Egyptian (Ancient); distinguish from contemporary arz: Egyptian Arabic ["frr"] = {"North Frisian"}, -- Northern Frisian ["frs"] = {"East Frisian Low Saxon"}, -- Eastern Frisian ["ilo"] = {"Ilocano"}, -- Iloko; to match en.wiki article title ["jam"] = {"Jamaican Patois"}, -- Jamaican Creole English ["mhr"] = {"Meadow Mari"}, -- Eastern Mari ["mid"] = {"Modern Mandaic"}, -- Mandaic ["mla"] = {"Tamambo"}, -- Malo ['mte'] = {"Mono-Alu"}, -- Mono (Solomon Islands) ["nan-tw"] = {"Taiwanese Hokkien"}, -- make room for IANA / 639-3 nan Min Nan Chinese; match en.wiki article title ["nrf"] = {"Norman"}, -- not quite a collective - IANA name: Jèrriais; categorizes to Norman-language text ["nzi"] = {"Nzema"}, -- ~~IANA, ISO 639-2, -3 name is~~ Nzima; to match en.wiki article title ["orv"] = {"Old East Slavic"}, -- ~~IANA name is~~ Old Russian ["pfl"] = {"Palatine German"}, -- ~~IANA name is~~ Pfaelzisch; to match en.wiki article ["pms"] = {"Piedmontese"}, -- ~~IANA / 639-3 name is~~ Piemontese; to match en.wiki article title ["~~pra~~pnb"] = {"~~Prakrit~~Punjabi ~~languages~~(Western)"}, -- ~~ISO~~Western ~~639-2 collective~~Panjabi; ~~defined~~dab ~~here~~added to override ~~redefinition~~import from ~/wp languages and distinguish pnb from pa in ~~wp_languages~~reverse look up tag_from_name() ["stq"] = {"Saterland Frisian"}, -- Saterfriesisch ~~["roa"] = {"Romance languages"}, -- ISO 639-2 collective; defined here to override improper redefinition (Jèrriais) in wp_language; IANA name is "Romance languages"~~ ~~["sal"] = {"Salishan languages"}, -- ISO 639-2 collective; defined here to override redefinition in wp_languages~~ ~~["sla"] = {"Slavic languages"}, -- ISO 639-2 collective; defined here to override redefinition in wp_languages~~ ~~["son"] = {"Songhai languages"}, -- ISO 639-2 collective; defined here to override redefinition in wp_languages~~ ~~["stq"] = {"Saterland Frisian"}, -- IANA name is Saterfriesisch~~ ["und"] = {"undetermined"}, -- capitalization to match existing category ["~~wen~~wrg"] = {"~~Sorbian languages~~Warrongo"}, -- ~~ISO 639-2 collective; defined here to override redefinition in wp_languages~~Warungu ~~["wrg"] = {"Warrongo"}, -- IANA name is Warungu~~ ["xal-ru"] = {"Kalmyk"}, -- to match en.wiki article title ["xgf"] = {"Tongva"}, -- ISO 639-3 is Gabrielino-Fernandeño -- these ISO 639-2, -3 language-name overrides imported from Module:Language/data/wp_languages ~~-- private use codes~~ --<begin do-not-edit except to comment out>-- ["ace"] = {"Acehnese"}, -- Achinese ["aec"] = {"Sa'idi Arabic"}, -- Saidi Arabic ["akl"] = {"Aklan"}, -- Aklanon ["alt"] = {"Altay"}, -- Southern Altai ["apm"] = {"Mescalero-Chiricahua"}, -- Mescalero-Chiricahua Apache ["bal"] = {"Balochi"}, -- Baluchi ["bcl"] = {"Central Bicolano"}, -- Central Bikol ["bin"] = {"Edo"}, -- Bini ["bpy"] = {"Bishnupriya Manipuri"}, -- Bishnupriya ["chg"] = {"Chagatay"}, -- Chagatai ["ckb"] = {"Sorani Kurdish"}, -- Central Kurdish ["cnu"] = {"Shenwa"}, -- Chenoua ["coc"] = {"Cocopah"}, -- Cocopa ["diq"] = {"Zazaki"}, -- Dimli ["fit"] = {"Meänkieli"}, -- Tornedalen Finnish ["fkv"] = {"Kven"}, -- Kven Finnish ["frk"] = {"Old Frankish"}, -- Frankish ["gez"] = {"Ge'ez"}, -- Geez ["gju"] = {"Gujari"}, -- Gujari ["gsw"] = {"Alemannic German"}, -- Swiss German ["gul"] = {"Gullah"}, -- Sea Island Creole English ["hak"] = {"Hakka"}, -- Hakka Chinese ["hbo"] = {"Biblical Hebrew"}, -- Ancient Hebrew ["hnd"] = {"Hindko"}, -- Southern Hindko -- ["ikt"] = {"Inuvialuk"}, -- Inuinnaqtun ["kaa"] = {"Karakalpak"}, -- Kara-Kalpak ["khb"] = {"Tai Lü"}, -- Lü ["kmr"] = {"Kurmanji Kurdish"}, -- Northern Kurdish ["kpo"] = {"Kposo"}, -- Ikposo ["krj"] = {"Kinaray-a"}, -- Kinaray-A ["ktz"] = {"Juǀ'hoan"}, -- Juǀʼhoan ["lez"] = {"Lezgian"}, -- Lezghian ["liv"] = {"Livonian"}, -- Liv ["lng"] = {"Lombardic"}, -- Langobardic ["mia"] = {"Miami-Illinois"}, -- Miami ["miq"] = {"Miskito"}, -- Mískito ["mix"] = {"Mixtec"}, -- Mixtepec Mixtec ["mni"] = {"Meitei"}, -- Manipuri ["mrj"] = {"Hill Mari"}, -- Western Mari ["mww"] = {"White Hmong"}, -- Hmong Daw ["nds-nl"] = {"Dutch Low Saxon"}, -- Low German ["new"] = {"Nepal Bhasa"}, -- Newari ["nso"] = {"Northern Sotho"}, -- Pedi ["nwc"] = {"Classical Nepal Bhasa"}, -- Classical Newari ["ood"] = {"O'odham"}, -- Tohono O'odham ["otk"] = {"Old Turkic"}, -- Old Turkish ["pal"] = {"Middle Persian"}, -- Pahlavi ["pam"] = {"Kapampangan"}, -- Pampanga ["phr"] = {"Potwari"}, -- Pahari-Potwari ["pka"] = {"Jain Prakrit"}, -- Ardhamāgadhī Prākrit -- ["pnb"] = {"Punjabi"}, -- Western Panjabi ["psu"] = {"Shauraseni"}, -- Sauraseni Prākrit ["rap"] = {"Rapa Nui"}, -- Rapanui ["rar"] = {"Cook Islands Māori"}, -- Rarotongan ["rmu"] = {"Scandoromani"}, -- Tavringer Romani ["rom"] = {"Romani"}, -- Romany ["rup"] = {"Aromanian"}, -- Macedo-Romanian ["ryu"] = {"Okinawan"}, -- Central Okinawan ["sdc"] = {"Sassarese"}, -- Sassarese Sardinian ["sdn"] = {"Gallurese"}, -- Gallurese Sardinian ["shp"] = {"Shipibo"}, -- Shipibo-Conibo ["src"] = {"Logudorese"}, -- Logudorese Sardinian ["sro"] = {"Campidanese"}, -- Campidanese Sardinian ["tkl"] = {"Tokelauan"}, -- Tokelau ["tvl"] = {"Tuvaluan"}, -- Tuvalu ["tyv"] = {"Tuvan"}, -- Tuvinian ["vls"] = {"West Flemish"}, -- Vlaams ["wep"] = {"Westphalian"}, -- Westphalien ["xal"] = {"Oirat"}, -- Kalmyk ["xcl"] = {"Old Armenian"}, -- Classical Armenian ["yua"] = {"Yucatec Maya"}, -- Yucateco --<end do-not-edit except to comment out>-- ------------------------------< P R I V A T E - U S E T A G S >---------------------------------------------- ["cel-x-proto"] = {"Proto-Celtic"}, -- cel in IANA is Celtic languages ["gem-x-proto"] = {"Proto-Germanic"}, -- gem in IANA is Germanic languages Line 108 ⟶ 267: local article_name = { ["lij"] = {"Ligurian (Romance language)"}, -- Ligurian; see Template_talk:Lang#Ligurian_dab ['mnh'] = {"Mono language (Congo)"}, -- Mono (Democratic Republic of Congo); see Template_talk:Lang#Mono_languages ['mnr'] = {"Mono language (California)"}, -- Mono (USA) ['mru'] = {"Mono language (Cameroon)"}, -- Mono (Cameroon) ~~['mte'] = {"Mono-Alu language"},~~ ["xlg"] = {"Ligurian (ancient language)"}, -- see Template_talk:Lang#Ligurian_dab } Line 346 ⟶ 504: return { ~~override = override,~~ article_name = article_name, lang_name_table = lang_name_table, override = override, rtl_scripts = rtl_scripts, translit_title_table = translit_title_table,

Module:Lang/data: Difference between revisions