local function __preprocess(t, first_of_array_in_array)
["abk"] = "Abkhaz",
local preprocessed = {}
["ace"] = "Acehnese",
if first_of_array_in_array then
["ady"] = "Adyghe",
-- keep only the 1st language name for each code, excluding synonyms
["afr"] = "Afrikaans",
for k, v in pairs(t) do
["ain"] = "Ainu",
preprocessed[k:lower()] = {v[1]}
["aka"] = "Akan",
end
["alb"] = "Albanian",
else
["amh"] = "Amharic",
for k, v in pairs(t) do
["ang"] = "Old English",
preprocessed[k:lower()] = v
["ara"] = "Arabic",
end
["arc"] = "Aramaic",
end
["arg"] = "Aragonese",
["arm"] = "Armenian",
["arn"] = "Mapudungun",
["asm"] = "Assamese",
["ast"] = "Asturian",
["ava"] = "Avaric",
["ave"] = "Avestan",
["aym"] = "Aymara",
["aze"] = "Azerbaijani",
["bak"] = "Bashkir",
["bal"] = "Balochi",
["bam"] = "Bambara",
["baq"] = "Basque",
["bel"] = "Belarusian",
["ben"] = "Bengali",
["ber"] = "Berber",
["bho"] = "Bhojpuri",
["bih"] = "Bihari",
["bik"] = "Bicol",
["bis"] = "Bislama",
["bla"] = "Siksika",
["bod"] = "Tibetan",
["bos"] = "Bosnian",
["bre"] = "Breton",
["bua"] = "Buryat",
["bug"] = "Buginese",
["bul"] = "Bulgarian",
["bur"] = "Burmese",
["car"] = "Carib",
["cat"] = "Catalan",
["ceb"] = "Cebuano",
["cel"] = "Pictish",
["ces"] = "Czech",
["cha"] = "Chamorro",
["che"] = "Chechen",
["chi"] = "Chinese",
["chm"] = "Mari",
["cho"] = "Choctaw",
["chr"] = "Cherokee",
["chu"] = "Old Church Slavonic",
["chv"] = "Chuvash",
["chy"] = "Cheyenne",
["cop"] = "Coptic",
["cor"] = "Cornish",
["cos"] = "Corsican",
["cre"] = "Cree",
["crh"] = "Crimean Tatar",
["csb"] = "Kashubian",
["cym"] = "Welsh",
["cze"] = "Czech",
["dan"] = "Danish",
["del"] = "Delaware",
["deu"] = "German",
["div"] = "Dhivehi",
["dsb"] = "Lower Sorbian",
["dum"] = "Middle Dutch",
["dut"] = "Dutch",
["dzo"] = "Dzongkha",
["egy"] = "Egyptian",
["ell"] = "Greek",
["eng"] = "English",
["enm"] = "Middle English",
["epo"] = "Esperanto",
["est"] = "Estonian",
["eus"] = "Basque",
["ewe"] = "Ewe",
["fan"] = "Fang",
["fao"] = "Faroese",
["fas"] = "Persian",
["fij"] = "Fijian",
["fil"] = "Tagalog",
["fin"] = "Finnish",
["fra"] = "French",
["fre"] = "French",
["fro"] = "Old French",
["frr"] = "North Frisian",
["fry"] = "West Frisian",
["ful"] = "Fula",
["fur"] = "Friulian",
["gem"] = "Germanic",
["geo"] = "Georgian",
["ger"] = "German",
["gla"] = "Scottish Gaelic",
["gle"] = "Irish",
["glg"] = "Galician",
["glv"] = "Manx",
["gmh"] = "Middle High German",
["goh"] = "Old High German",
["got"] = "Gothic",
["grc"] = "Ancient Greek",
["gre"] = "Greek",
["grn"] = "Guaraní",
["gsw"] = "Alemannic",
["guj"] = "Gujarati",
["hat"] = "Haitian Creole",
["hau"] = "Hausa",
["haw"] = "Hawaiian",
["heb"] = "Hebrew",
["her"] = "Herero",
["hil"] = "Hiligaynon",
["hin"] = "Hindi",
["hit"] = "Hittite",
["hmo"] = "Hiri Motu",
["hrv"] = "Croatian",
["hsb"] = "Upper Sorbian",
["hun"] = "Hungarian",
["hye"] = "Armenian",
["ibo"] = "Igbo",
["ice"] = "Icelandic",
["ido"] = "Ido",
["iii"] = "Yi",
["iku"] = "Inuktitut",
["ile"] = "Interlingue",
["ilo"] = "Ilokano",
["ina"] = "Interlingua",
["ind"] = "Indonesian",
["ine"] = "Indo-European",
["inh"] = "Ingush",
["ipk"] = "Inupiaq",
["isl"] = "Icelandic",
["ita"] = "Italian",
["jav"] = "Javanese",
["jbo"] = "Lojban",
["jpn"] = "Japanese",
["kaa"] = "Karakalpak",
["kab"] = "Kabyle",
["kal"] = "Kalaallisut",
["kan"] = "Kannada",
["kar"] = "Karen",
["kas"] = "Kashmiri",
["kat"] = "Georgian",
["kau"] = "Kanuri",
["kaz"] = "Kazakh",
["kbd"] = "Kabardian",
["khm"] = "Khmer",
["kik"] = "Gikuyu",
["kin"] = "Kinyarwanda",
["kir"] = "Kyrgyz",
["kok"] = "Konkani",
["kom"] = "Komi",
["kon"] = "Kongo",
["kor"] = "Korean",
["krc"] = "Karachay-Balkar",
["kua"] = "Kwanyama",
["kum"] = "Kumyk",
["kur"] = "Kurdish",
["lad"] = "Judaeo-Spanish",
["lao"] = "Lao",
["lat"] = "Latin",
["lav"] = "Latvian",
["lim"] = "Limburgish",
["lin"] = "Lingala",
["lit"] = "Lithuanian",
["ltz"] = "Luxembourgish",
["lub"] = "Tshiluba",
["lug"] = "Luganda",
["mac"] = "Macedonian",
["mah"] = "Marshallese",
["mal"] = "Malayalam",
["mao"] = "Māori",
["mar"] = "Marathi",
["may"] = "Malay",
["mdf"] = "Moksha",
["mga"] = "Middle Irish",
["mkd"] = "Macedonian",
["mlg"] = "Malagasy",
["mlt"] = "Maltese",
["mnc"] = "Manchu",
["moh"] = "Mohawk",
["mon"] = "Mongolian",
["mri"] = "Māori",
["msa"] = "Malay",
["mus"] = "Muscogee",
["mwl"] = "Mirandese",
["mwr"] = "Marwari",
["mya"] = "Burmese",
["myn"] = "Mayan",
["myv"] = "Erzya",
["nah"] = "Nahuatl",
["nap"] = "Neapolitan",
["nau"] = "Nauruan",
["nav"] = "Navajo",
["nbl"] = "Southern Ndebele",
["nde"] = "Northern Ndebele",
["ndo"] = "Ndonga",
["nds"] = "Low Saxon",
["nep"] = "Nepali",
["new"] = "Nepal Bhasa",
["nld"] = "Dutch",
["nno"] = "Norwegian Nynorsk",
["nob"] = "Norwegian Bokmål",
["non"] = "Old Norse",
["nor"] = "Norwegian",
["nso"] = "Northern Sotho",
["nya"] = "Chichewa",
["oci"] = "Occitan",
["oji"] = "Anishinaabe",
["ori"] = "Oriya",
["orm"] = "Oromo",
["oss"] = "Ossetic",
["ota"] = "Ottoman Turkish",
["oto"] = "Otomi",
["pag"] = "Pangasinan",
["pal"] = "Middle Persian",
["pam"] = "Kapampangan",
["pan"] = "Punjabi",
["pap"] = "Papiamento",
["peo"] = "Old Persian",
["per"] = "Persian",
["pli"] = "Pāli",
["pol"] = "Polish",
["por"] = "Portuguese",
["pus"] = "Pashto",
["que"] = "Quechua",
["rar"] = "Rarotongan",
["roa"] = "Other Romance",
["roh"] = "Romansh",
["rom"] = "Romani",
["ron"] = "Romanian",
["rum"] = "Romanian",
["run"] = "Kirundi",
["rup"] = "Aromanian",
["rus"] = "Russian",
["sag"] = "Sango",
["sah"] = "Yakut",
["san"] = "Sanskrit",
["scn"] = "Sicilian",
["sco"] = "Scots",
["sga"] = "Old Irish",
["sgn"] = "Sign language",
["sin"] = "Sinhala",
["sla"] = "Old East Slavic",
["slk"] = "Slovak",
["slo"] = "Slovak",
["slv"] = "Slovene",
["sme"] = "Sami",
["smo"] = "Samoan",
["sna"] = "Shona",
["snd"] = "Sindhi",
["som"] = "Somali",
["sot"] = "Sotho",
["spa"] = "Spanish",
["sqi"] = "Albanian",
["srd"] = "Sardinian",
["srn"] = "Sranan",
["srp"] = "Serbian",
["ssw"] = "Swati",
["sun"] = "Sundanese",
["sux"] = "Sumerian",
["swa"] = "Swahili",
["swe"] = "Swedish",
["syc"] = "Syriac",
["syr"] = "Syriac",
["tah"] = "Tahitian",
["tam"] = "Tamil",
["tat"] = "Tatar",
["tel"] = "Telugu",
["tet"] = "Tetum",
["tgk"] = "Tajik",
["tgl"] = "Tagalog",
["tha"] = "Thai",
["tib"] = "Tibetan",
["tir"] = "Tigrinya",
["tlh"] = "Klingon",
["ton"] = "Tongan",
["tpi"] = "Tok Pisin",
["tsn"] = "Tswana",
["tso"] = "Tsonga",
["tuk"] = "Turkmen",
["tum"] = "Tumbuka",
["tur"] = "Turkish",
["tut"] = "Evenki",
["twi"] = "Twi",
["tyv"] = "Tuvan",
["udm"] = "Udmurt",
["uig"] = "Uyghur",
["ukr"] = "Ukrainian",
["und"] = "undetermined",
["urd"] = "Urdu",
["uzb"] = "Uzbek",
["ven"] = "Venda",
["vie"] = "Vietnamese",
["vol"] = "Volapük",
["war"] = "Waray-Waray",
["wel"] = "Welsh",
["wen"] = "Sorbian",
["wln"] = "Walloon",
["wol"] = "Wolof",
["xal"] = "Oirat",
["xho"] = "Xhosa",
["yid"] = "Yiddish",
["yor"] = "Yoruba",
["zen"] = "Zenaga",
["zha"] = "Zhuang",
["zho"] = "Chinese",
["zul"] = "Zulu",
["zun"] = "Zuni"
}
local part_3 = {
return preprocessed
["aar"] = "Afar",
end
["abk"] = "Abkhaz",
["ace"] = "Acehnese",
["ady"] = "Adyghe",
["aeb"] = "Tunisian Arabic",
["afr"] = "Afrikaans",
["ags"] = "Esimbi",
["ain"] = "Ainu",
["aka"] = "Akan",
["akk"] = "Akkadian",
["akl"] = "Aklan",
["als"] = "Albanian (Tosk)",
["amh"] = "Amharic",
["ang"] = "Old English",
["ank"] = "Goemai",
["apc"] = "North Levantine Arabic",
["apj"] = "Jicarilla Apache",
["ara"] = "Arabic",
["arc"] = "Aramaic",
["arg"] = "Aragonese",
["arn"] = "Mapudungun",
["arq"] = "Algerian Arabic",
["arz"] = "Egyptian Spoken Arabic",
["asm"] = "Assamese",
["ast"] = "Asturian",
["ava"] = "Avaric",
["ave"] = "Avestan",
["aym"] = "Aymara",
["aze"] = "Azerbaijani",
["bak"] = "Bashkir",
["bal"] = "Balochi",
["bam"] = "Bambara",
["bar"] = "Bavarian",
["bcl"] = "Central Bicalono",
["bel"] = "Belarusian",
["ben"] = "Bengali",
["bho"] = "Bhojpuri",
["bik"] = "Bicol",
["bis"] = "Bislama",
["bjn"] = "Banjar",
["bla"] = "Siksika",
["bod"] = "Tibetan",
["bos"] = "Bosnian",
["bpy"] = "Bishnupriya Manipuri",
["bre"] = "Breton",
["bua"] = "Buryat",
["bug"] = "Buginese",
["bul"] = "Bulgarian",
["bxr"] = "Buryat (Russia)",
["byq"] = "Basay",
["car"] = "Carib",
["cat"] = "Catalan",
["cbk"] = "Chavacano",
["cbv"] = "Cacua",
["cdo"] = "Min-dong",
["ceb"] = "Cebuano",
["ces"] = "Czech",
["cha"] = "Chamorro",
["che"] = "Chechen",
["chm"] = "Mari",
["cho"] = "Choctaw",
["chr"] = "Cherokee",
["chu"] = "Old Church Slavonic",
["chv"] = "Chuvash",
["chy"] = "Cheyenne",
["ckb"] = "Central Kurdish",
["ckt"] = "Chukchi",
["ckv"] = "Kavalan",
["clw"] = "Chulym",
["cmn"] = "Mandarin Chinese",
["cop"] = "Coptic",
["cor"] = "Cornish",
["cos"] = "Corsican",
["cre"] = "Cree",
["crh"] = "Crimean Tatar",
["crk"] = "Plains Cree",
["cro"] = "Crow",
["csb"] = "Kashubian",
["cym"] = "Welsh",
["dan"] = "Danish",
["del"] = "Delaware",
["deu"] = "German",
["diq"] = "Zazaki",
["div"] = "Dhivehi",
["dlm"] = "Dalmatian",
["dsb"] = "Lower Sorbian",
["dum"] = "Middle Dutch",
["dzo"] = "Dzongkha",
["egy"] = "Egyptian",
["eko"] = "Koti",
["ell"] = "Greek",
["elx"] = "Elamite",
["eng"] = "English",
["enm"] = "Middle English",
["epo"] = "Esperanto",
["est"] = "Estonian",
["esu"] = "Central Alaskan Yup'ik",
["eus"] = "Basque",
["evn"] = "Evenki",
["ewe"] = "Ewe",
["ext"] = "Extremaduran",
["fan"] = "Fang",
["fao"] = "Faroese",
["fas"] = "Persian",
["fax"] = "Fala",
["fij"] = "Fijian",
["fil"] = "Tagalog",
["fin"] = "Finnish",
["fra"] = "French",
["fro"] = "Old French",
["frp"] = "Arpitan",
["frr"] = "North Frisian",
["fry"] = "West Frisian",
["ful"] = "Fula",
["fur"] = "Friulian",
["gag"] = "Gagauz",
["gan"] = "Gan",
["gla"] = "Scottish Gaelic",
["gle"] = "Irish",
["glg"] = "Galician",
["glk"] = "Gilaki",
["glv"] = "Manx",
["gmh"] = "Middle High German",
["gml"] = "Middle Low German",
["gmy"] = "Mycenaean Greek",
["goh"] = "Old High German",
["got"] = "Gothic",
["grc"] = "Ancient Greek",
["grn"] = "Guaraní",
["gsw"] = "Alemannic",
["guj"] = "Gujarati",
["hak"] = "Hakka",
["hat"] = "Haitian Creole",
["hau"] = "Hausa",
["haw"] = "Hawaiian",
["haz"] = "Hazaragi",
["hbs"] = "Serbo-Croatian",
["heb"] = "Hebrew",
["her"] = "Herero",
["hif"] = "Fiji Hindi",
["hil"] = "Hiligaynon",
["hin"] = "Hindi",
["hit"] = "Hittite",
["hmo"] = "Hiri Motu",
["hni"] = "Hani",
["hop"] = "Hopi",
["hrv"] = "Croatian",
["hsb"] = "Upper Sorbian",
["hun"] = "Hungarian",
["hye"] = "Armenian",
["ibo"] = "Igbo",
["ido"] = "Ido",
["iii"] = "Yi",
["iku"] = "Inuktitut",
["ile"] = "Interlingue",
["ilo"] = "Ilokano",
["ina"] = "Interlingua",
["ind"] = "Indonesian",
["inh"] = "Ingush",
["ipk"] = "Inupiaq",
["irk"] = "Iraqw",
["isl"] = "Icelandic",
["ita"] = "Italian",
["jac"] = "Jakaltek",
["jav"] = "Javanese",
["jbo"] = "Lojban",
["jpn"] = "Japanese",
["kaa"] = "Karakalpak",
["kab"] = "Kabyle",
["kal"] = "Kalaallisut",
["kan"] = "Kannada",
["kas"] = "Kashmiri",
["kat"] = "Georgian",
["kau"] = "Kanuri",
["kaz"] = "Kazakh",
["kbd"] = "Kabardian",
["kdd"] = "Yankunytjatjara",
["kea"] = "Cape Verdean Creole",
["kex"] = "Canara Konkani",
["khm"] = "Khmer",
["kik"] = "Gikuyu",
["kin"] = "Kinyarwanda",
["kir"] = "Kyrgyz",
["kmr"] = "Kurmanji",
["knn"] = "Konkani",
["koi"] = "Komi-Permyak",
["kok"] = "Konkani",
["kom"] = "Komi",
["kon"] = "Kongo",
["kor"] = "Korean",
["krc"] = "Karachay-Balkar",
["ksh"] = "Colognian",
["kua"] = "Kwanyama",
["kum"] = "Kumyk",
["kur"] = "Kurdish",
["lad"] = "Judaeo-Spanish",
["lao"] = "Lao",
["lat"] = "Latin",
["lav"] = "Latvian",
["lbe"] = "Lak",
["lij"] = "Ligurian",
["lim"] = "Limburgish",
["lin"] = "Lingala",
["lit"] = "Lithuanian",
["liv"] = "Livonian",
["lkt"] = "Lakota",
["lmo"] = "Lombard",
["lou"] = "Louisiana Creole French",
["ltg"] = "Latgalian",
["ltz"] = "Luxembourgish",
["lub"] = "Tshiluba",
["lug"] = "Luganda",
["lut"] = "Lushootseed",
["luy"] = "Luhya",
["lzh"] = "Classical Chinese",
["mah"] = "Marshallese",
["mal"] = "Malayalam",
["mam"] = "Mam",
["mar"] = "Marathi",
["mdf"] = "Moksha",
["mfe"] = "Mauritian creole",
["mga"] = "Middle Irish",
["mhr"] = "Meadow Mari",
["min"] = "Minangkabau",
["mkd"] = "Macedonian",
["mlg"] = "Malagasy",
["mlt"] = "Maltese",
["mnc"] = "Manchu",
["moh"] = "Mohawk",
["mon"] = "Mongolian",
["mov"] = "Mohave",
["mrc"] = "Maricopa",
["mri"] = "Māori",
["mrj"] = "Hill Mari",
["msa"] = "Malay",
["mus"] = "Muscogee",
["mwl"] = "Mirandese",
["mwr"] = "Marwari",
["mya"] = "Burmese",
["myv"] = "Erzya",
["mzn"] = "Mazandarani",
["nan"] = "Min-nan",
["nap"] = "Neapolitan",
["nau"] = "Nauruan",
["nav"] = "Navajo",
["nbl"] = "Southern Ndebele",
["nci"] = "Classical Nahuatl",
["nde"] = "Northern Ndebele",
["ndo"] = "Ndonga",
["nds"] = "Low Saxon",
["nep"] = "Nepali",
["new"] = "Nepal Bhasa",
["nld"] = "Dutch",
["nno"] = "Norwegian Nynorsk",
["nob"] = "Norwegian Bokmål",
["nod"] = "Northern Thai",
["non"] = "Old Norse",
["nor"] = "Norwegian",
["nov"] = "Novial",
["nrm"] = "Norman",
["nso"] = "Northern Sotho",
["nya"] = "Chichewa",
["oci"] = "Occitan",
["ofs"] = "Old Frisian",
["oji"] = "Anishinaabe",
["one"] = "Oneida",
["ori"] = "Oriya",
["orm"] = "Oromo",
["oss"] = "Ossetic",
["osx"] = "Old Saxon",
["ota"] = "Ottoman Turkish",
["owl"] = "Old Welsh",
["pag"] = "Pangasinan",
["pal"] = "Middle Persian",
["pam"] = "Kapampangan",
["pan"] = "Punjabi",
["pap"] = "Papiamento",
["pcd"] = "Picard",
["pdc"] = "Pennsylvania German",
["peo"] = "Old Persian",
["pfl"] = "Palatinate German",
["phr"] = "Potwari",
["pih"] = "Norfolk",
["pli"] = "Pāli",
["pms"] = "Piemontese",
["pnb"] = "Western Panjabi",
["pnt"] = "Pontic Greek",
["pny"] = "Pinyin",
["pol"] = "Polish",
["por"] = "Portuguese",
["pus"] = "Pashto",
["quc"] = "K'iche'",
["que"] = "Quechua",
["qya"] = "Quenya",
["rar"] = "Rarotongan",
["rmy"] = "Vlax Romani",
["roh"] = "Romansh",
["rom"] = "Romani",
["ron"] = "Romanian",
["rue"] = "Rusyn",
["run"] = "Kirundi",
["ruo"] = "Istro-Romanian",
["rup"] = "Aromanian",
["rus"] = "Russian",
["ryu"] = "Okinawan",
["sag"] = "Sango",
["sah"] = "Yakut",
["san"] = "Sanskrit",
["scl"] = "Shina",
["scn"] = "Sicilian",
["sco"] = "Scots",
["sdc"] = "Sassarese",
["sdn"] = "Gallurese",
["see"] = "Seneca",
["sga"] = "Old Irish",
["shs"] = "Shuswap",
["sin"] = "Sinhala",
["sjd"] = "Kildin Sami",
["sjn"] = "Sindarin",
["skr"] = "Saraiki",
["slk"] = "Slovak",
["slv"] = "Slovene",
["sme"] = "Sami",
["smo"] = "Samoan",
["sna"] = "Shona",
["snd"] = "Sindhi",
["som"] = "Somali",
["sot"] = "Sotho",
["spa"] = "Spanish",
["sqi"] = "Albanian",
["srd"] = "Sardinian",
["srn"] = "Sranan",
["srp"] = "Serbian",
["ssf"] = "Thao",
["ssw"] = "Swati",
["stq"] = "Saterland Frisian",
["sun"] = "Sundanese",
["sux"] = "Sumerian",
["swa"] = "Swahili",
["swe"] = "Swedish",
["syc"] = "Syriac",
["syr"] = "Syriac",
["szl"] = "Silesian",
["tah"] = "Tahitian",
["tam"] = "Tamil",
["tat"] = "Tatar",
["tcy"] = "Tulu",
["tel"] = "Telugu",
["tet"] = "Tetum",
["tgk"] = "Tajik",
["tgl"] = "Tagalog",
["tha"] = "Thai",
["tir"] = "Tigrinya",
["tlh"] = "Klingon",
["ton"] = "Tongan",
["tpi"] = "Tok Pisin",
["tsn"] = "Tswana",
["tso"] = "Tsonga",
["tuk"] = "Turkmen",
["tum"] = "Tumbuka",
["tur"] = "Turkish",
["tus"] = "Tuscarora",
["twi"] = "Twi",
["tyv"] = "Tuvan",
["tzm"] = "Central Atlas Tamazight",
["udm"] = "Udmurt",
["uig"] = "Uyghur",
["ukr"] = "Ukrainian",
["und"] = "undetermined",
["urd"] = "Urdu",
["uun"] = "Pazeh",
["uzb"] = "Uzbek",
["vec"] = "Venetian",
["ven"] = "Venda",
["vep"] = "Veps",
["vie"] = "Vietnamese",
["vls"] = "West Flemish",
["vol"] = "Volapük",
["vro"] = "Võro",
["war"] = "Waray-Waray",
["wlm"] = "Middle Welsh",
["wln"] = "Walloon",
["wol"] = "Wolof",
["wuu"] = "Wuu",
["xal"] = "Oirat",
["xho"] = "Xhosa",
["xmf"] = "Mingrelian",
["yid"] = "Yiddish",
["yor"] = "Yoruba",
["yua"] = "Yukatek Maya",
["yue"] = "Cantonese",
["yuf"] = "Havasupai-Hualapai-Yavapai",
["zea"] = "Zeelandic",
["zen"] = "Zenaga",
["zha"] = "Zhuang",
["zho"] = "Chinese",
["zul"] = "Zulu",
["zun"] = "Zuni",
["zza"] = "Zazaki"
}
local part_5 = {
-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
["ber"] = "Berber",
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
["cel"] = "Pictish",
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
["gem"] = "Germanic",
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
["gmw"] = "West Germanic",
local __iso_639_3 = __preprocess(require("Module:Language/data/ISO 639-3"));
["iir"] = "Indo-Iranian",
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
["ine"] = "Indo-European",
local __wp_languages = __preprocess(require("Module:Language/data/wp languages"), true);
["kar"] = "Karen",
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
["myn"] = "Mayan",
local iana_scripts = __preprocess(require("Module:Language/data/iana scripts"));
["nah"] = "Nahuatl",
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
["oto"] = "Otomi",
local iana_regions = __preprocess(require("Module:Language/data/iana regions"));
["roa"] = "Other Romance",
["sgn"] = "Sign language",
["sla"] = "Old East Slavic",
["tut"] = "Evenki",
["wen"] = "Sorbian"
}
local compounded = {
-- variant subtags from IANA; table format differs from the other IANA data tables
["be-x-old"] = "Belarusian (Taraškievica)",
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
["de-AT"] = "Austrian German",
-- suppressed script subtags from IANA;
["en-GB"] = "British English",
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));
["en-US"] = "American English",
["pt-BR"] = "Brazilian Portuguese",
["por-BR"] = "Brazilian Portuguese",
["nds-NL"] = "Dutch Low Saxon",
["zh-Hans"] = "simplified Chinese",
["zh-Hant"] = "traditional Chinese",
["zh-CN"] = "simplified Chinese",
["zh-HK"] = "traditional Chinese (Hong Kong)",
["zh-MO"] = "Chinese (Macau)",
["zh-SG"] = "Chinese (Singapore)",
["zh-TW"] = "Chinese (Taiwan)"
}
local codes_to_return = {}
for k, v in pairs(part_1) do codes_to_return[k:lower()] = v end
for k, v in pairs(part_2) do codes_to_return[k:lower()] = v end
for k, v in pairs(part_3) do codes_to_return[k:lower()] = v end
for k, v in pairs(part_5) do codes_to_return[k:lower()] = v end
for k, v in pairs(compounded) do codes_to_return[k:lower()] = v end
return {
return {
codes = codes_to_return
lang = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
lang_iana = __iana_languages,
script = iana_scripts,
region = iana_regions,
variant = iana_variants,
suppressed = iana_suppressed_scripts,
}
}
Latest revision as of 04:42, 3 September 2020
This module depends on the following other modules:
This Lua module is used on many pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages, or in your own module sandbox. Consider discussing changes on the talk page before implementing them.
This pulls data from data modules and bunches them together. Language names are gathered in this order:
Module:Language/data/iana languages – as defined by the IETF-sanctioned IANA registry; names almost always the same as ISO, but does not include 3-letter codes for languages with 2-letter codes, hence the fallback
In other words, a name set in wp languages will take precedence over one set in iana languages and that too would take precedence over one from iso 639-3. wp languages can be discarded by passing dataset="iana" to the parent module.
-- put tables and their tables together
local function __coalesce(...)
local coalesced = {}
for _, langslist in ipairs{...} do
for langcode, langnames in pairs(langslist) do
for _, langname in pairs(langnames) do
if not coalesced[langcode] then
coalesced[langcode] = {}
end
table.insert(coalesced[langcode], langname)
end
end
end
return coalesced
end
-- make the keys lowercase
local function __preprocess(t, first_of_array_in_array)
local preprocessed = {}
if first_of_array_in_array then
-- keep only the 1st language name for each code, excluding synonyms
for k, v in pairs(t) do
preprocessed[k:lower()] = {v[1]}
end
else
for k, v in pairs(t) do
preprocessed[k:lower()] = v
end
end
return preprocessed
end
-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3 = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions = __preprocess(require("Module:Language/data/iana regions"));
-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));
return {
lang = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
lang_iana = __iana_languages,
script = iana_scripts,
region = iana_regions,
variant = iana_variants,
suppressed = iana_suppressed_scripts,
}