Module:Language/name/data: Difference between revisions

From Zoophilia Wiki
Jump to navigationJump to search
meta>Amalthea
(Removed redundancies and nil-entries)
m (88 revisions imported)
 
(85 intermediate revisions by 7 users not shown)
Line 1: Line 1:
-- put tables and their tables together
local function __coalesce(...)
    local coalesced = {}
    for _, langslist in ipairs{...} do
        for langcode, langnames in pairs(langslist) do
            for _, langname in pairs(langnames) do
                if not coalesced[langcode] then
                    coalesced[langcode] = {}
                end
                table.insert(coalesced[langcode], langname)
            end
        end
    end
    return coalesced
end
-- make the keys lowercase
local function __preprocess(t, first_of_array_in_array)
    local preprocessed = {}
    if first_of_array_in_array then
    -- keep only the 1st language name for each code, excluding synonyms
        for k, v in pairs(t) do
            preprocessed[k:lower()] = {v[1]}
        end
    else
        for k, v in pairs(t) do
            preprocessed[k:lower()] = v
        end
    end
    return preprocessed
end
-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3      = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages  = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts    = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions    = __preprocess(require("Module:Language/data/iana regions"));
-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));
return {
return {
   codes = {
    lang   = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
  ["ab"]  = "Abkhaz"
    lang_iana = __iana_languages,
,["abk"] = "Abkhaz"
    script = iana_scripts,
,["ace"] = "Acehnese"
    region = iana_regions,
,["ady"] = "Adyghe"
    variant = iana_variants,
,["aa"]  = "Afar"
    suppressed = iana_suppressed_scripts,
,["aar"] = "Afar"
,["af"]  = "Afrikaans"
,["afr"] = "Afrikaans"
,["ain"] = "Ainu"
,["ak"]  = "Akan"
,["aka"] = "Akan"
,["akk"] = "Akkadian"
,["akl"] = "Aklan"
,["alb"] = "Albanian"
,["sq"]  = "Albanian"
,["sqi"] = "Albanian"
,["als"] = "Albanian (Tosk)"
,["gsw"] = "Alemannic"
,["arq"] = "Algerian Arabic"
,["am"]  = "Amharic"
,["amh"] = "Amharic"
,["grc"] = "Ancient Greek"
,["oj"]  = "Anishinaabe"
,["oji"] = "Anishinaabe"
,["ar"]  = "Arabic"
,["ara"] = "Arabic"
,["an"]  = "Aragonese"
,["arg"] = "Aragonese"
,["arc"] = "Aramaic"
,["arm"] = "Armenian"
,["hy"]  = "Armenian"
,["hye"] = "Armenian"
,["rup"] = "Aromanian"
,["frp"] = "Arpitan"
,["as"]  = "Assamese"
,["asm"] = "Assamese"
,["ast"] = "Asturian"
,["av"]  = "Avaric"
,["ava"] = "Avaric"
,["ae"]  = "Avestan"
,["ave"] = "Avestan"
,["ay"]  = "Aymara"
,["aym"] = "Aymara"
,["az"]  = "Azerbaijani"
,["aze"] = "Azerbaijani"
,["bal"] = "Balochi"
,["bam"] = "Bambara"
,["bm"]  = "Bambara"
,["bjn"] = "Banjar"
,["ba"]  = "Bashkir"
,["bak"] = "Bashkir"
,["baq"] = "Basque"
,["eu"]  = "Basque"
,["eus"] = "Basque"
,["bar"] = "Bavarian"
,["be"]  = "Belarusian"
,["bel"] = "Belarusian"
,["ben"] = "Bengali"
,["bn"]  = "Bengali"
,["ber"] = "Berber"
,["bho"] = "Bhojpuri"
,["bik"] = "Bicol"
,["bh"]  = "Bihari"
,["bih"] = "Bihari"
,["bpy"] = "Bishnupriya Manipuri"
,["bi"]  = "Bislama"
,["bis"] = "Bislama"
,["bos"] = "Bosnian"
,["bs"]  = "Bosnian"
,["por-BR"] = "Brazilian Portuguese"
,["pt-BR"] = "Brazilian Portuguese"
,["br"]  = "Breton"
,["bre"] = "Breton"
,["bug"] = "Buginese"
,["bg"]  = "Bulgarian"
,["bul"] = "Bulgarian"
,["bur"] = "Burmese"
,["my"]  = "Burmese"
,["mya"] = "Burmese"
,["bua"] = "Buryat"
,["bxr"] = "Buryat (Russia)"
,["cbv"] = "Cacua"
,["kex"] = "Canara Konkani"
,["yue"] = "Cantonese"
,["kea"] = "Cape Verdean Creole"
,["car"] = "Carib"
,["ca"]  = "Catalan"
,["cat"] = "Catalan"
,["ceb"] = "Cebuano"
,["esu"] = "Central Alaskan Yup'ik"
,["tzm"] = "Central Atlas Tamazight"
,["bcl"] = "Central Bicalono"
,["ckb"] = "Central Kurdish"
,["ch"]  = "Chamorro"
,["cha"] = "Chamorro"
,["cbk"] = "Chavacano"
,["ce"]  = "Chechen"
,["che"] = "Chechen"
,["chr"] = "Cherokee"
,["chy"] = "Cheyenne"
,["ny"]  = "Chichewa"
,["nya"] = "Chichewa"
,["chi"] = "Chinese"
,["zh"]  = "Chinese"
,["zho"] = "Chinese"
,["cho"] = "Choctaw"
,["ckt"] = "Chukchi"
,["chv"] = "Chuvash"
,["cv"]  = "Chuvash"
,["nci"] = "Classical Nahuatl"
,["ksh"] = "Colognian"
,["cop"] = "Coptic"
,["cor"] = "Cornish"
,["kw"]  = "Cornish"
,["co"]  = "Corsican"
,["cos"] = "Corsican"
,["cr"]  = "Cree"
,["cre"] = "Cree"
,["crh"] = "Crimean Tatar"
,["hr"]  = "Croatian"
,["hrv"] = "Croatian"
,["cro"] = "Crow"
,["ces"] = "Czech"
,["cs"]  = "Czech"
,["cze"] = "Czech"
,["dlm"] = "Dalmatian"
,["da"]  = "Danish"
,["dan"] = "Danish"
,["del"] = "Delaware"
,["div"] = "Dhivehi"
,["dv"]  = "Dhivehi"
,["dut"] = "Dutch"
,["nl"]  = "Dutch"
,["nld"] = "Dutch"
,["dz"]  = "Dzongkha"
,["dzo"] = "Dzongkha"
,["egy"] = "Egyptian"
,["arz"] = "Egyptian Spoken Arabic"
,["elx"] = "Elamite"
,["en"]  = "English"
,["eng"] = "English"
,["myv"] = "Erzya"
,["ags"] = "Esimbi"
,["eo"]  = "Esperanto"
,["epo"] = "Esperanto"
,["est"] = "Estonian"
,["et"]  = "Estonian"
,["evn"] = "Evenki"
,["tut"] = "Evenki"
,["ee"]  = "Ewe"
,["ewe"] = "Ewe"
,["ext"] = "Extremaduran"
,["fax"] = "Fala"
,["fan"] = "Fang"
,["fao"] = "Faroese"
,["fo"]  = "Faroese"
,["hif"] = "Fiji Hindi"
,["fij"] = "Fijian"
,["fj"]  = "Fijian"
,["fi"]  = "Finnish"
,["fin"] = "Finnish"
,["fr"]  = "French"
,["fra"] = "French"
,["fre"] = "French"
,["fur"] = "Friulian"
,["ff"]  = "Fula"
,["ful"] = "Fula"
,["gag"] = "Gagauz"
,["gl"]  = "Galician"
,["glg"] = "Galician"
,["sdn"] = "Gallurese"
,["gan"] = "Gan"
,["geo"] = "Georgian"
,["ka"]  = "Georgian"
,["kat"] = "Georgian"
,["de"]  = "German"
,["deu"] = "German"
,["ger"] = "German"
,["gem"] = "Germanic"
,["ki"]  = "Gikuyu"
,["kik"] = "Gikuyu"
,["glk"] = "Gilaki"
,["ank"] = "Goemai"
,["got"] = "Gothic"
,["el"]  = "Greek"
,["ell"] = "Greek"
,["gre"] = "Greek"
,["gn"]  = "Guaraní"
,["grn"] = "Guaraní"
,["gu"]  = "Gujarati"
,["guj"] = "Gujarati"
,["hat"] = "Haitian Creole"
,["ht"]  = "Haitian Creole"
,["hak"] = "Hakka"
,["hni"] = "Hani"
,["ha"]  = "Hausa"
,["hau"] = "Hausa"
,["yuf"] = "Havasupai-Hualapai-Yavapai"
,["haw"] = "Hawaiian"
,["haz"] = "Hazaragi"
,["he"]  = "Hebrew"
,["heb"] = "Hebrew"
,["her"] = "Herero"
,["hz"]  = "Herero"
,["hil"] = "Hiligaynon"
,["mrj"] = "Hill Mari"
,["hi"]  = "Hindi"
,["hin"] = "Hindi"
,["hmo"] = "Hiri Motu"
,["ho"]  = "Hiri Motu"
,["hit"] = "Hittite"
,["hop"] = "Hopi"
,["hu"]  = "Hungarian"
,["hun"] = "Hungarian"
,["ice"] = "Icelandic"
,["is"]  = "Icelandic"
,["isl"] = "Icelandic"
,["ido"] = "Ido"
,["io"]  = "Ido"
,["ibo"] = "Igbo"
,["ig"]  = "Igbo"
,["ilo"] = "Ilokano"
,["ine"] = "Indo-European"
,["iir"] = "Indo-Iranian"
,["id"]  = "Indonesian"
,["ind"] = "Indonesian"
,["inh"] = "Ingush"
,["ia"]  = "Interlingua"
,["ina"] = "Interlingua"
,["ie"]  = "Interlingue"
,["ile"] = "Interlingue"
,["iku"] = "Inuktitut"
,["iu"]  = "Inuktitut"
,["ik"]  = "Inupiaq"
,["ipk"] = "Inupiaq"
,["irk"] = "Iraqw"
,["ga"]  = "Irish"
,["gle"] = "Irish"
,["ruo"] = "Istro-Romanian"
,["it"]  = "Italian"
,["ita"] = "Italian"
,["jac"] = "Jakaltek"
,["ja"]  = "Japanese"
,["jpn"] = "Japanese"
,["jav"] = "Javanese"
,["jv"]  = "Javanese"
,["apj"] = "Jicarilla Apache"
,["lad"] = "Judaeo-Spanish"
,["kbd"] = "Kabardian"
,["kab"] = "Kabyle"
,["kal"] = "Kalaallisut"
,["kl"]  = "Kalaallisut"
,["kan"] = "Kannada"
,["kn"]  = "Kannada"
,["kau"] = "Kanuri"
,["kr"]  = "Kanuri"
,["pam"] = "Kapampangan"
,["krc"] = "Karachay-Balkar"
,["kaa"] = "Karakalpak"
,["kar"] = "Karen"
,["kas"] = "Kashmiri"
,["ks"]  = "Kashmiri"
,["csb"] = "Kashubian"
,["kaz"] = "Kazakh"
,["kk"]  = "Kazakh"
,["khm"] = "Khmer"
,["km"]  = "Khmer"
,["quc"] = "K'iche'"
,["sjd"] = "Kildin Sami"
,["kin"] = "Kinyarwanda"
,["rw"]  = "Kinyarwanda"
,["rn"]  = "Kirundi"
,["run"] = "Kirundi"
,["tlh"] = "Klingon"
,["kom"] = "Komi"
,["kv"]  = "Komi"
,["koi"] = "Komi-Permyak"
,["kg"]  = "Kongo"
,["kon"] = "Kongo"
,["knn"] = "Konkani"
,["kok"] = "Konkani"
,["ko"]  = "Korean"
,["kor"] = "Korean"
,["eko"] = "Koti"
,["kum"] = "Kumyk"
,["ku"]  = "Kurdish"
,["kur"] = "Kurdish"
,["kmr"] = "Kurmanji"
,["kj"]  = "Kwanyama"
,["kua"] = "Kwanyama"
,["kir"] = "Kyrgyz"
,["ky"]  = "Kyrgyz"
,["lbe"] = "Lak"
,["lkt"] = "Lakota"
,["lao"] = "Lao"
,["lo"]  = "Lao"
,["ltg"] = "Latgalian"
,["la"]  = "Latin"
,["lat"] = "Latin"
,["lav"] = "Latvian"
,["lv"]  = "Latvian"
,["lij"] = "Ligurian"
,["li"]  = "Limburgish"
,["lim"] = "Limburgish"
,["lin"] = "Lingala"
,["ln"]  = "Lingala"
,["lit"] = "Lithuanian"
,["lt"]  = "Lithuanian"
,["liv"] = "Livonian"
,["jbo"] = "Lojban"
,["lmo"] = "Lombard"
,["lou"] = "Louisiana Creole French"
,["nds"] = "Low Saxon"
,["dsb"] = "Lower Sorbian"
,["lg"]  = "Luganda"
,["lug"] = "Luganda"
,["luy"] = "Luhya"
,["lut"] = "Lushootseed"
,["lb"]  = "Luxembourgish"
,["ltz"] = "Luxembourgish"
,["mac"] = "Macedonian"
,["mk"]  = "Macedonian"
,["mkd"] = "Macedonian"
,["mg"]  = "Malagasy"
,["mlg"] = "Malagasy"
,["may"] = "Malay"
,["ms"]  = "Malay"
,["msa"] = "Malay"
,["mal"] = "Malayalam"
,["ml"]  = "Malayalam"
,["mlt"] = "Maltese"
,["mt"]  = "Maltese"
,["mam"] = "Mam"
,["mnc"] = "Manchu"
,["cmn"] = "Mandarin Chinese"
,["glv"] = "Manx"
,["gv"]  = "Manx"
,["mao"] = "Māori"
,["mi"]  = "Māori"
,["mri"] = "Māori"
,["arn"] = "Mapudungun"
,["mar"] = "Marathi"
,["mr"]  = "Marathi"
,["chm"] = "Mari"
,["mrc"] = "Maricopa"
,["mah"] = "Marshallese"
,["mh"]  = "Marshallese"
,["mwr"] = "Marwari"
,["mfe"] = "Mauritian creole"
,["myn"] = "Mayan"
,["mzn"] = "Mazandarani"
,["mhr"] = "Meadow Mari"
,["dum"] = "Middle Dutch"
,["enm"] = "Middle English"
,["gmh"] = "Middle High German"
,["mga"] = "Middle Irish"
,["gml"] = "Middle Low German"
,["pal"] = "Middle Persian"
,["wlm"] = "Middle Welsh"
,["cdo"] = "Min-dong"
,["xmf"] = "Mingrelian"
,["nan"] = "Min-nan"
,["mwl"] = "Mirandese"
,["mov"] = "Mohave"
,["moh"] = "Mohawk"
,["mdf"] = "Moksha"
,["mn"]  = "Mongolian"
,["mon"] = "Mongolian"
,["mus"] = "Muscogee"
,["gmy"] = "Mycenaean Greek"
,["nah"] = "Nahuatl"
,["na"]  = "Nauruan"
,["nau"] = "Nauruan"
,["nav"] = "Navajo"
,["nv"]  = "Navajo"
,["ndo"] = "Ndonga"
,["ng"]  = "Ndonga"
,["nap"] = "Neapolitan"
,["new"] = "Nepal Bhasa"
,["ne"]  = "Nepali"
,["nep"] = "Nepali"
,["pih"] = "Norfolk"
,["nrm"] = "Norman"
,["frr"] = "North Frisian"
,["apc"] = "North Levantine Arabic"
,["nd"]  = "Northern Ndebele"
,["nde"] = "Northern Ndebele"
,["nso"] = "Northern Sotho"
,["nod"] = "Northern Thai"
,["no"]  = "Norwegian"
,["nor"] = "Norwegian"
,["nb"]  = "Norwegian Bokmål"
,["nob"] = "Norwegian Bokmål"
,["nn"]  = "Norwegian Nynorsk"
,["nno"] = "Norwegian Nynorsk"
,["nov"] = "Novial"
,["oc"]  = "Occitan"
,["oci"] = "Occitan"
,["xal"] = "Oirat"
,["ryu"] = "Okinawan"
,["chu"] = "Old Church Slavonic"
,["cu"]  = "Old Church Slavonic"
,["sla"] = "Old East Slavic"
,["ang"] = "Old English"
,["fro"] = "Old French"
,["ofs"] = "Old Frisian"
,["goh"] = "Old High German"
,["sga"] = "Old Irish"
,["non"] = "Old Norse"
,["peo"] = "Old Persian"
,["osx"] = "Old Saxon"
,["owl"] = "Old Welsh"
,["one"] = "Oneida"
,["or"]  = "Oriya"
,["ori"] = "Oriya"
,["om"]  = "Oromo"
,["orm"] = "Oromo"
,["os"]  = "Ossetic"
,["oss"] = "Ossetic"
,["roa"] = "Other Romance"
,["oto"] = "Otomi"
,["ota"] = "Ottoman Turkish"
,["pfl"] = "Palatinate German"
,["pi"]  = "Pāli"
,["pli"] = "Pāli"
,["pag"] = "Pangasinan"
,["pap"] = "Papiamento"
,["ps"]  = "Pashto"
,["pus"] = "Pashto"
,["uun"] = "Pazeh"
,["pdc"] = "Pennsylvania German"
,["fa"]  = "Persian"
,["fas"] = "Persian"
,["per"] = "Persian"
,["pcd"] = "Picard"
,["cel"] = "Pictish"
,["pms"] = "Piemontese"
,["pny"] = "Pinyin"
,["crk"] = "Plains Cree"
,["pl"]  = "Polish"
,["pol"] = "Polish"
,["pnt"] = "Pontic Greek"
,["por"] = "Portuguese"
,["pt"]  = "Portuguese"
,["phr"] = "Potwari"
,["pa"]  = "Punjabi"
,["pan"] = "Punjabi"
,["qu"]  = "Quechua"
,["que"] = "Quechua"
,["qya"] = "Quenya"
,["rar"] = "Rarotongan"
,["rom"] = "Romani"
,["ro"]  = "Romanian"
,["ron"] = "Romanian"
,["rum"] = "Romanian"
,["rm"]  = "Romansh"
,["roh"] = "Romansh"
,["ru"]  = "Russian"
,["rus"] = "Russian"
,["rue"] = "Rusyn"
,["se"]  = "Sami"
,["sme"] = "Sami"
,["sm"]  = "Samoan"
,["smo"] = "Samoan"
,["sag"] = "Sango"
,["sg"]  = "Sango"
,["sa"]  = "Sanskrit"
,["san"] = "Sanskrit"
,["skr"] = "Saraiki"
,["sc"]  = "Sardinian"
,["srd"] = "Sardinian"
,["sdc"] = "Sassarese"
,["stq"] = "Saterland Frisian"
,["sco"] = "Scots"
,["gd"]  = "Scottish Gaelic"
,["gla"] = "Scottish Gaelic"
,["see"] = "Seneca"
,["sr"]  = "Serbian"
,["srp"] = "Serbian"
,["hbs"] = "Serbo-Croatian"
,["scl"] = "Shina"
,["sn"]  = "Shona"
,["sna"] = "Shona"
,["shs"] = "Shuswap"
,["scn"] = "Sicilian"
,["sgn"] = "Sign language"
,["bla"] = "Siksika"
,["szl"] = "Silesian"
,["zh-Hans"] = "Simplified Chinese"
,["sjn"] = "Sindarin"
,["sd"]  = "Sindhi"
,["snd"] = "Sindhi"
,["si"]  = "Sinhala"
,["sin"] = "Sinhala"
,["sk"]  = "Slovak"
,["slk"] = "Slovak"
,["slo"] = "Slovak"
,["sl"]  = "Slovene"
,["slv"] = "Slovene"
,["so"]  = "Somali"
,["som"] = "Somali"
,["wen"] = "Sorbian"
,["sot"] = "Sotho"
,["st"]  = "Sotho"
,["nbl"] = "Southern Ndebele"
,["nr"]  = "Southern Ndebele"
,["es"]  = "Spanish"
,["spa"] = "Spanish"
,["srn"] = "Sranan"
,["sux"] = "Sumerian"
,["su"]  = "Sundanese"
,["sun"] = "Sundanese"
,["sw"]  = "Swahili"
,["swa"] = "Swahili"
,["ss"]  = "Swati"
,["ssw"] = "Swati"
,["sv"]  = "Swedish"
,["swe"] = "Swedish"
,["syc"] = "Syriac"
,["syr"] = "Syriac"
,["fil"] = "Tagalog"
,["tgl"] = "Tagalog"
,["tl"]  = "Tagalog"
,["tah"] = "Tahitian"
,["ty"]  = "Tahitian"
,["tg"]  = "Tajik"
,["tgk"] = "Tajik"
,["ta"]  = "Tamil"
,["tam"] = "Tamil"
,["tat"] = "Tatar"
,["tt"]  = "Tatar"
,["te"]  = "Telugu"
,["tel"] = "Telugu"
,["tet"] = "Tetum"
,["th"]  = "Thai"
,["tha"] = "Thai"
,["bo"]  = "Tibetan"
,["bod"] = "Tibetan"
,["tib"] = "Tibetan"
,["ti"]  = "Tigrinya"
,["tir"] = "Tigrinya"
,["tpi"] = "Tok Pisin"
,["to"]  = "Tongan"
,["ton"] = "Tongan"
,["zh-Hant"] = "Traditional Chinese"
,["lu"]  = "Tshiluba"
,["lub"] = "Tshiluba"
,["ts"]  = "Tsonga"
,["tso"] = "Tsonga"
,["tn"]  = "Tswana"
,["tsn"] = "Tswana"
,["tcy"] = "Tulu"
,["tum"] = "Tumbuka"
,["aeb"] = "Tunisian Arabic"
,["tr"]  = "Turkish"
,["tur"] = "Turkish"
,["tk"]  = "Turkmen"
,["tuk"] = "Turkmen"
,["tus"] = "Tuscarora"
,["tyv"] = "Tuvan"
,["tw"]  = "Twi"
,["twi"] = "Twi"
,["udm"] = "Udmurt"
,["uk"]  = "Ukrainian"
,["ukr"] = "Ukrainian"
,["und"] = "undetermined"
,["hsb"] = "Upper Sorbian"
,["ur"]  = "Urdu"
,["urd"] = "Urdu"
,["ug"]  = "Uyghur"
,["uig"] = "Uyghur"
,["uz"]  = "Uzbek"
,["uzb"] = "Uzbek"
,["ve"]  = "Venda"
,["ven"] = "Venda"
,["vec"] = "Venetian"
,["vep"] = "Veps"
,["vi"]  = "Vietnamese"
,["vie"] = "Vietnamese"
,["rmy"] = "Vlax Romani"
,["vo"]  = "Volapük"
,["vol"] = "Volapük"
,["vro"] = "Võro"
,["wa"]  = "Walloon"
,["wln"] = "Walloon"
,["war"] = "Waray-Waray"
,["cy"]  = "Welsh"
,["cym"] = "Welsh"
,["wel"] = "Welsh"
,["vls"] = "West Flemish"
,["fry"] = "West Frisian"
,["fy"]  = "West Frisian"
,["gmw"] = "West Germanic"
,["pnb"] = "Western Panjabi"
,["wo"]  = "Wolof"
,["wol"] = "Wolof"
,["wuu"] = "Wuu"
,["xh"]  = "Xhosa"
,["xho"] = "Xhosa"
,["sah"] = "Yakut"
,["kdd"] = "Yankunytjatjara"
,["ii"]  = "Yi"
,["iii"] = "Yi"
,["yi"]  = "Yiddish"
,["yid"] = "Yiddish"
,["yo"]  = "Yoruba"
,["yor"] = "Yoruba"
,["yua"] = "Yukatek Maya"
,["diq"] = "Zazaki"
,["zza"] = "Zazaki"
,["zea"] = "Zeelandic"
,["zen"] = "Zenaga"
,["za"]  = "Zhuang"
,["zha"] = "Zhuang"
,["zu"]  = "Zulu"
,["zul"] = "Zulu"
,["zun"] = "Zuni"
}
}
}

Latest revision as of 04:42, 3 September 2020

This pulls data from data modules and bunches them together. Language names are gathered in this order:

  1. Module:Language/data/ISO 639-3 – as defined in ISO 639-3
  2. Module:Language/data/iana languages – as defined by the IETF-sanctioned IANA registry; names almost always the same as ISO, but does not include 3-letter codes for languages with 2-letter codes, hence the fallback
  3. Module:Language/data/wp languages – specific to Wikipedia; languages for which we've decided another name is better

In other words, a name set in wp languages will take precedence over one set in iana languages and that too would take precedence over one from iso 639-3. wp languages can be discarded by passing dataset="iana" to the parent module.

See also


-- put tables and their tables together
local function __coalesce(...)
    local coalesced = {}
    for _, langslist in ipairs{...} do
        for langcode, langnames in pairs(langslist) do
            for _, langname in pairs(langnames) do
                if not coalesced[langcode] then
                    coalesced[langcode] = {}
                end
                table.insert(coalesced[langcode], langname)
            end
        end
    end
 
    return coalesced
end

-- make the keys lowercase
local function __preprocess(t, first_of_array_in_array)
    local preprocessed = {}
    if first_of_array_in_array then
    	-- keep only the 1st language name for each code, excluding synonyms
        for k, v in pairs(t) do
            preprocessed[k:lower()] = {v[1]}
        end
    else
        for k, v in pairs(t) do
            preprocessed[k:lower()] = v
        end
    end

    return preprocessed
end

-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3      = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages   = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts     = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions     = __preprocess(require("Module:Language/data/iana regions"));

-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));

return {
    lang   = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
    lang_iana = __iana_languages,
    script = iana_scripts,
    region = iana_regions,
    variant = iana_variants,
    suppressed = iana_suppressed_scripts,
}