local function __preprocess(t, first_of_array_in_array)
local preprocessed = {}
if first_of_array_in_array then
-- keep only the 1st language name for each code, excluding synonyms
for k, v in pairs(t) do
preprocessed[k:lower()] = {v[1]}
end
else
for k, v in pairs(t) do
preprocessed[k:lower()] = v
end
end
return preprocessed
end
-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3 = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions = __preprocess(require("Module:Language/data/iana regions"));
-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));
return {
return {
codes = {
lang = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
["ab"] = "Abkhaz"
lang_iana = __iana_languages,
,["abk"] = "Abkhaz"
script = iana_scripts,
,["ace"] = "Acehnese"
region = iana_regions,
,["ady"] = "Adyghe"
variant = iana_variants,
,["aa"] = "Afar"
suppressed = iana_suppressed_scripts,
,["aar"] = "Afar"
,["af"] = "Afrikaans"
,["afr"] = "Afrikaans"
,["ain"] = "Ainu"
,["ak"] = "Akan"
,["aka"] = "Akan"
,["akk"] = "Akkadian"
,["akl"] = "Aklan"
,["alb"] = "Albanian"
,["sq"] = "Albanian"
,["sqi"] = "Albanian"
,["als"] = "Albanian (Tosk)"
,["gsw"] = "Alemannic"
,["arq"] = "Algerian Arabic"
,["am"] = "Amharic"
,["amh"] = "Amharic"
,["grc"] = "Ancient Greek"
,["oj"] = "Anishinaabe"
,["oji"] = "Anishinaabe"
,["ar"] = "Arabic"
,["ara"] = "Arabic"
,["an"] = "Aragonese"
,["arg"] = "Aragonese"
,["arc"] = "Aramaic"
,["arm"] = "Armenian"
,["hy"] = "Armenian"
,["hye"] = "Armenian"
,["rup"] = "Aromanian"
,["frp"] = "Arpitan"
,["as"] = "Assamese"
,["asm"] = "Assamese"
,["ast"] = "Asturian"
,["av"] = "Avaric"
,["ava"] = "Avaric"
,["ae"] = "Avestan"
,["ave"] = "Avestan"
,["ay"] = "Aymara"
,["aym"] = "Aymara"
,["az"] = "Azerbaijani"
,["aze"] = "Azerbaijani"
,["bal"] = "Balochi"
,["bam"] = "Bambara"
,["bm"] = "Bambara"
,["bjn"] = "Banjar"
,["ba"] = "Bashkir"
,["bak"] = "Bashkir"
,["baq"] = "Basque"
,["eu"] = "Basque"
,["eus"] = "Basque"
,["bar"] = "Bavarian"
,["be"] = "Belarusian"
,["bel"] = "Belarusian"
,["ben"] = "Bengali"
,["bn"] = "Bengali"
,["ber"] = "Berber"
,["bho"] = "Bhojpuri"
,["bik"] = "Bicol"
,["bh"] = "Bihari"
,["bih"] = "Bihari"
,["bpy"] = "Bishnupriya Manipuri"
,["bi"] = "Bislama"
,["bis"] = "Bislama"
,["bos"] = "Bosnian"
,["bs"] = "Bosnian"
,["por-BR"] = "Brazilian Portuguese"
,["pt-BR"] = "Brazilian Portuguese"
,["br"] = "Breton"
,["bre"] = "Breton"
,["bug"] = "Buginese"
,["bg"] = "Bulgarian"
,["bul"] = "Bulgarian"
,["bur"] = "Burmese"
,["my"] = "Burmese"
,["mya"] = "Burmese"
,["bua"] = "Buryat"
,["bxr"] = "Buryat (Russia)"
,["cbv"] = "Cacua"
,["kex"] = "Canara Konkani"
,["yue"] = "Cantonese"
,["kea"] = "Cape Verdean Creole"
,["car"] = "Carib"
,["ca"] = "Catalan"
,["cat"] = "Catalan"
,["ceb"] = "Cebuano"
,["esu"] = "Central Alaskan Yup'ik"
,["tzm"] = "Central Atlas Tamazight"
,["bcl"] = "Central Bicalono"
,["ckb"] = "Central Kurdish"
,["ch"] = "Chamorro"
,["cha"] = "Chamorro"
,["cbk"] = "Chavacano"
,["ce"] = "Chechen"
,["che"] = "Chechen"
,["chr"] = "Cherokee"
,["chy"] = "Cheyenne"
,["ny"] = "Chichewa"
,["nya"] = "Chichewa"
,["chi"] = "Chinese"
,["zh"] = "Chinese"
,["zho"] = "Chinese"
,["cho"] = "Choctaw"
,["ckt"] = "Chukchi"
,["chv"] = "Chuvash"
,["cv"] = "Chuvash"
,["nci"] = "Classical Nahuatl"
,["ksh"] = "Colognian"
,["cop"] = "Coptic"
,["cor"] = "Cornish"
,["kw"] = "Cornish"
,["co"] = "Corsican"
,["cos"] = "Corsican"
,["cr"] = "Cree"
,["cre"] = "Cree"
,["crh"] = "Crimean Tatar"
,["hr"] = "Croatian"
,["hrv"] = "Croatian"
,["cro"] = "Crow"
,["ces"] = "Czech"
,["cs"] = "Czech"
,["cze"] = "Czech"
,["dlm"] = "Dalmatian"
,["da"] = "Danish"
,["dan"] = "Danish"
,["del"] = "Delaware"
,["div"] = "Dhivehi"
,["dv"] = "Dhivehi"
,["dut"] = "Dutch"
,["nl"] = "Dutch"
,["nld"] = "Dutch"
,["dz"] = "Dzongkha"
,["dzo"] = "Dzongkha"
,["egy"] = "Egyptian"
,["arz"] = "Egyptian Spoken Arabic"
,["elx"] = "Elamite"
,["en"] = "English"
,["eng"] = "English"
,["myv"] = "Erzya"
,["ags"] = "Esimbi"
,["eo"] = "Esperanto"
,["epo"] = "Esperanto"
,["est"] = "Estonian"
,["et"] = "Estonian"
,["evn"] = "Evenki"
,["tut"] = "Evenki"
,["ee"] = "Ewe"
,["ewe"] = "Ewe"
,["ext"] = "Extremaduran"
,["fax"] = "Fala"
,["fan"] = "Fang"
,["fao"] = "Faroese"
,["fo"] = "Faroese"
,["hif"] = "Fiji Hindi"
,["fij"] = "Fijian"
,["fj"] = "Fijian"
,["fi"] = "Finnish"
,["fin"] = "Finnish"
,["fr"] = "French"
,["fra"] = "French"
,["fre"] = "French"
,["fur"] = "Friulian"
,["ff"] = "Fula"
,["ful"] = "Fula"
,["gag"] = "Gagauz"
,["gl"] = "Galician"
,["glg"] = "Galician"
,["sdn"] = "Gallurese"
,["gan"] = "Gan"
,["geo"] = "Georgian"
,["ka"] = "Georgian"
,["kat"] = "Georgian"
,["de"] = "German"
,["deu"] = "German"
,["ger"] = "German"
,["gem"] = "Germanic"
,["ki"] = "Gikuyu"
,["kik"] = "Gikuyu"
,["glk"] = "Gilaki"
,["ank"] = "Goemai"
,["got"] = "Gothic"
,["el"] = "Greek"
,["ell"] = "Greek"
,["gre"] = "Greek"
,["gn"] = "Guaraní"
,["grn"] = "Guaraní"
,["gu"] = "Gujarati"
,["guj"] = "Gujarati"
,["hat"] = "Haitian Creole"
,["ht"] = "Haitian Creole"
,["hak"] = "Hakka"
,["hni"] = "Hani"
,["ha"] = "Hausa"
,["hau"] = "Hausa"
,["yuf"] = "Havasupai-Hualapai-Yavapai"
,["haw"] = "Hawaiian"
,["haz"] = "Hazaragi"
,["he"] = "Hebrew"
,["heb"] = "Hebrew"
,["her"] = "Herero"
,["hz"] = "Herero"
,["hil"] = "Hiligaynon"
,["mrj"] = "Hill Mari"
,["hi"] = "Hindi"
,["hin"] = "Hindi"
,["hmo"] = "Hiri Motu"
,["ho"] = "Hiri Motu"
,["hit"] = "Hittite"
,["hop"] = "Hopi"
,["hu"] = "Hungarian"
,["hun"] = "Hungarian"
,["ice"] = "Icelandic"
,["is"] = "Icelandic"
,["isl"] = "Icelandic"
,["ido"] = "Ido"
,["io"] = "Ido"
,["ibo"] = "Igbo"
,["ig"] = "Igbo"
,["ilo"] = "Ilokano"
,["ine"] = "Indo-European"
,["iir"] = "Indo-Iranian"
,["id"] = "Indonesian"
,["ind"] = "Indonesian"
,["inh"] = "Ingush"
,["ia"] = "Interlingua"
,["ina"] = "Interlingua"
,["ie"] = "Interlingue"
,["ile"] = "Interlingue"
,["iku"] = "Inuktitut"
,["iu"] = "Inuktitut"
,["ik"] = "Inupiaq"
,["ipk"] = "Inupiaq"
,["irk"] = "Iraqw"
,["ga"] = "Irish"
,["gle"] = "Irish"
,["ruo"] = "Istro-Romanian"
,["it"] = "Italian"
,["ita"] = "Italian"
,["jac"] = "Jakaltek"
,["ja"] = "Japanese"
,["jpn"] = "Japanese"
,["jav"] = "Javanese"
,["jv"] = "Javanese"
,["apj"] = "Jicarilla Apache"
,["lad"] = "Judaeo-Spanish"
,["kbd"] = "Kabardian"
,["kab"] = "Kabyle"
,["kal"] = "Kalaallisut"
,["kl"] = "Kalaallisut"
,["kan"] = "Kannada"
,["kn"] = "Kannada"
,["kau"] = "Kanuri"
,["kr"] = "Kanuri"
,["pam"] = "Kapampangan"
,["krc"] = "Karachay-Balkar"
,["kaa"] = "Karakalpak"
,["kar"] = "Karen"
,["kas"] = "Kashmiri"
,["ks"] = "Kashmiri"
,["csb"] = "Kashubian"
,["kaz"] = "Kazakh"
,["kk"] = "Kazakh"
,["khm"] = "Khmer"
,["km"] = "Khmer"
,["quc"] = "K'iche'"
,["sjd"] = "Kildin Sami"
,["kin"] = "Kinyarwanda"
,["rw"] = "Kinyarwanda"
,["rn"] = "Kirundi"
,["run"] = "Kirundi"
,["tlh"] = "Klingon"
,["kom"] = "Komi"
,["kv"] = "Komi"
,["koi"] = "Komi-Permyak"
,["kg"] = "Kongo"
,["kon"] = "Kongo"
,["knn"] = "Konkani"
,["kok"] = "Konkani"
,["ko"] = "Korean"
,["kor"] = "Korean"
,["eko"] = "Koti"
,["kum"] = "Kumyk"
,["ku"] = "Kurdish"
,["kur"] = "Kurdish"
,["kmr"] = "Kurmanji"
,["kj"] = "Kwanyama"
,["kua"] = "Kwanyama"
,["kir"] = "Kyrgyz"
,["ky"] = "Kyrgyz"
,["lbe"] = "Lak"
,["lkt"] = "Lakota"
,["lao"] = "Lao"
,["lo"] = "Lao"
,["ltg"] = "Latgalian"
,["la"] = "Latin"
,["lat"] = "Latin"
,["lav"] = "Latvian"
,["lv"] = "Latvian"
,["lij"] = "Ligurian"
,["li"] = "Limburgish"
,["lim"] = "Limburgish"
,["lin"] = "Lingala"
,["ln"] = "Lingala"
,["lit"] = "Lithuanian"
,["lt"] = "Lithuanian"
,["liv"] = "Livonian"
,["jbo"] = "Lojban"
,["lmo"] = "Lombard"
,["lou"] = "Louisiana Creole French"
,["nds"] = "Low Saxon"
,["dsb"] = "Lower Sorbian"
,["lg"] = "Luganda"
,["lug"] = "Luganda"
,["luy"] = "Luhya"
,["lut"] = "Lushootseed"
,["lb"] = "Luxembourgish"
,["ltz"] = "Luxembourgish"
,["mac"] = "Macedonian"
,["mk"] = "Macedonian"
,["mkd"] = "Macedonian"
,["mg"] = "Malagasy"
,["mlg"] = "Malagasy"
,["may"] = "Malay"
,["ms"] = "Malay"
,["msa"] = "Malay"
,["mal"] = "Malayalam"
,["ml"] = "Malayalam"
,["mlt"] = "Maltese"
,["mt"] = "Maltese"
,["mam"] = "Mam"
,["mnc"] = "Manchu"
,["cmn"] = "Mandarin Chinese"
,["glv"] = "Manx"
,["gv"] = "Manx"
,["mao"] = "Māori"
,["mi"] = "Māori"
,["mri"] = "Māori"
,["arn"] = "Mapudungun"
,["mar"] = "Marathi"
,["mr"] = "Marathi"
,["chm"] = "Mari"
,["mrc"] = "Maricopa"
,["mah"] = "Marshallese"
,["mh"] = "Marshallese"
,["mwr"] = "Marwari"
,["mfe"] = "Mauritian creole"
,["myn"] = "Mayan"
,["mzn"] = "Mazandarani"
,["mhr"] = "Meadow Mari"
,["dum"] = "Middle Dutch"
,["enm"] = "Middle English"
,["gmh"] = "Middle High German"
,["mga"] = "Middle Irish"
,["gml"] = "Middle Low German"
,["pal"] = "Middle Persian"
,["wlm"] = "Middle Welsh"
,["cdo"] = "Min-dong"
,["xmf"] = "Mingrelian"
,["nan"] = "Min-nan"
,["mwl"] = "Mirandese"
,["mov"] = "Mohave"
,["moh"] = "Mohawk"
,["mdf"] = "Moksha"
,["mn"] = "Mongolian"
,["mon"] = "Mongolian"
,["mus"] = "Muscogee"
,["gmy"] = "Mycenaean Greek"
,["nah"] = "Nahuatl"
,["na"] = "Nauruan"
,["nau"] = "Nauruan"
,["nav"] = "Navajo"
,["nv"] = "Navajo"
,["ndo"] = "Ndonga"
,["ng"] = "Ndonga"
,["nap"] = "Neapolitan"
,["new"] = "Nepal Bhasa"
,["ne"] = "Nepali"
,["nep"] = "Nepali"
,["pih"] = "Norfolk"
,["nrm"] = "Norman"
,["frr"] = "North Frisian"
,["apc"] = "North Levantine Arabic"
,["nd"] = "Northern Ndebele"
,["nde"] = "Northern Ndebele"
,["nso"] = "Northern Sotho"
,["nod"] = "Northern Thai"
,["no"] = "Norwegian"
,["nor"] = "Norwegian"
,["nb"] = "Norwegian Bokmål"
,["nob"] = "Norwegian Bokmål"
,["nn"] = "Norwegian Nynorsk"
,["nno"] = "Norwegian Nynorsk"
,["nov"] = "Novial"
,["oc"] = "Occitan"
,["oci"] = "Occitan"
,["xal"] = "Oirat"
,["ryu"] = "Okinawan"
,["chu"] = "Old Church Slavonic"
,["cu"] = "Old Church Slavonic"
,["sla"] = "Old East Slavic"
,["ang"] = "Old English"
,["fro"] = "Old French"
,["ofs"] = "Old Frisian"
,["goh"] = "Old High German"
,["sga"] = "Old Irish"
,["non"] = "Old Norse"
,["peo"] = "Old Persian"
,["osx"] = "Old Saxon"
,["owl"] = "Old Welsh"
,["one"] = "Oneida"
,["or"] = "Oriya"
,["ori"] = "Oriya"
,["om"] = "Oromo"
,["orm"] = "Oromo"
,["os"] = "Ossetic"
,["oss"] = "Ossetic"
,["roa"] = "Other Romance"
,["oto"] = "Otomi"
,["ota"] = "Ottoman Turkish"
,["pfl"] = "Palatinate German"
,["pi"] = "Pāli"
,["pli"] = "Pāli"
,["pag"] = "Pangasinan"
,["pap"] = "Papiamento"
,["ps"] = "Pashto"
,["pus"] = "Pashto"
,["uun"] = "Pazeh"
,["pdc"] = "Pennsylvania German"
,["fa"] = "Persian"
,["fas"] = "Persian"
,["per"] = "Persian"
,["pcd"] = "Picard"
,["cel"] = "Pictish"
,["pms"] = "Piemontese"
,["pny"] = "Pinyin"
,["crk"] = "Plains Cree"
,["pl"] = "Polish"
,["pol"] = "Polish"
,["pnt"] = "Pontic Greek"
,["por"] = "Portuguese"
,["pt"] = "Portuguese"
,["phr"] = "Potwari"
,["pa"] = "Punjabi"
,["pan"] = "Punjabi"
,["qu"] = "Quechua"
,["que"] = "Quechua"
,["qya"] = "Quenya"
,["rar"] = "Rarotongan"
,["rom"] = "Romani"
,["ro"] = "Romanian"
,["ron"] = "Romanian"
,["rum"] = "Romanian"
,["rm"] = "Romansh"
,["roh"] = "Romansh"
,["ru"] = "Russian"
,["rus"] = "Russian"
,["rue"] = "Rusyn"
,["se"] = "Sami"
,["sme"] = "Sami"
,["sm"] = "Samoan"
,["smo"] = "Samoan"
,["sag"] = "Sango"
,["sg"] = "Sango"
,["sa"] = "Sanskrit"
,["san"] = "Sanskrit"
,["skr"] = "Saraiki"
,["sc"] = "Sardinian"
,["srd"] = "Sardinian"
,["sdc"] = "Sassarese"
,["stq"] = "Saterland Frisian"
,["sco"] = "Scots"
,["gd"] = "Scottish Gaelic"
,["gla"] = "Scottish Gaelic"
,["see"] = "Seneca"
,["sr"] = "Serbian"
,["srp"] = "Serbian"
,["hbs"] = "Serbo-Croatian"
,["scl"] = "Shina"
,["sn"] = "Shona"
,["sna"] = "Shona"
,["shs"] = "Shuswap"
,["scn"] = "Sicilian"
,["sgn"] = "Sign language"
,["bla"] = "Siksika"
,["szl"] = "Silesian"
,["zh-Hans"] = "Simplified Chinese"
,["sjn"] = "Sindarin"
,["sd"] = "Sindhi"
,["snd"] = "Sindhi"
,["si"] = "Sinhala"
,["sin"] = "Sinhala"
,["sk"] = "Slovak"
,["slk"] = "Slovak"
,["slo"] = "Slovak"
,["sl"] = "Slovene"
,["slv"] = "Slovene"
,["so"] = "Somali"
,["som"] = "Somali"
,["wen"] = "Sorbian"
,["sot"] = "Sotho"
,["st"] = "Sotho"
,["nbl"] = "Southern Ndebele"
,["nr"] = "Southern Ndebele"
,["es"] = "Spanish"
,["spa"] = "Spanish"
,["srn"] = "Sranan"
,["sux"] = "Sumerian"
,["su"] = "Sundanese"
,["sun"] = "Sundanese"
,["sw"] = "Swahili"
,["swa"] = "Swahili"
,["ss"] = "Swati"
,["ssw"] = "Swati"
,["sv"] = "Swedish"
,["swe"] = "Swedish"
,["syc"] = "Syriac"
,["syr"] = "Syriac"
,["fil"] = "Tagalog"
,["tgl"] = "Tagalog"
,["tl"] = "Tagalog"
,["tah"] = "Tahitian"
,["ty"] = "Tahitian"
,["tg"] = "Tajik"
,["tgk"] = "Tajik"
,["ta"] = "Tamil"
,["tam"] = "Tamil"
,["tat"] = "Tatar"
,["tt"] = "Tatar"
,["te"] = "Telugu"
,["tel"] = "Telugu"
,["tet"] = "Tetum"
,["th"] = "Thai"
,["tha"] = "Thai"
,["bo"] = "Tibetan"
,["bod"] = "Tibetan"
,["tib"] = "Tibetan"
,["ti"] = "Tigrinya"
,["tir"] = "Tigrinya"
,["tpi"] = "Tok Pisin"
,["to"] = "Tongan"
,["ton"] = "Tongan"
,["zh-Hant"] = "Traditional Chinese"
,["lu"] = "Tshiluba"
,["lub"] = "Tshiluba"
,["ts"] = "Tsonga"
,["tso"] = "Tsonga"
,["tn"] = "Tswana"
,["tsn"] = "Tswana"
,["tcy"] = "Tulu"
,["tum"] = "Tumbuka"
,["aeb"] = "Tunisian Arabic"
,["tr"] = "Turkish"
,["tur"] = "Turkish"
,["tk"] = "Turkmen"
,["tuk"] = "Turkmen"
,["tus"] = "Tuscarora"
,["tyv"] = "Tuvan"
,["tw"] = "Twi"
,["twi"] = "Twi"
,["udm"] = "Udmurt"
,["uk"] = "Ukrainian"
,["ukr"] = "Ukrainian"
,["und"] = "undetermined"
,["hsb"] = "Upper Sorbian"
,["ur"] = "Urdu"
,["urd"] = "Urdu"
,["ug"] = "Uyghur"
,["uig"] = "Uyghur"
,["uz"] = "Uzbek"
,["uzb"] = "Uzbek"
,["ve"] = "Venda"
,["ven"] = "Venda"
,["vec"] = "Venetian"
,["vep"] = "Veps"
,["vi"] = "Vietnamese"
,["vie"] = "Vietnamese"
,["rmy"] = "Vlax Romani"
,["vo"] = "Volapük"
,["vol"] = "Volapük"
,["vro"] = "Võro"
,["wa"] = "Walloon"
,["wln"] = "Walloon"
,["war"] = "Waray-Waray"
,["cy"] = "Welsh"
,["cym"] = "Welsh"
,["wel"] = "Welsh"
,["vls"] = "West Flemish"
,["fry"] = "West Frisian"
,["fy"] = "West Frisian"
,["gmw"] = "West Germanic"
,["pnb"] = "Western Panjabi"
,["wo"] = "Wolof"
,["wol"] = "Wolof"
,["wuu"] = "Wuu"
,["xh"] = "Xhosa"
,["xho"] = "Xhosa"
,["sah"] = "Yakut"
,["kdd"] = "Yankunytjatjara"
,["ii"] = "Yi"
,["iii"] = "Yi"
,["yi"] = "Yiddish"
,["yid"] = "Yiddish"
,["yo"] = "Yoruba"
,["yor"] = "Yoruba"
,["yua"] = "Yukatek Maya"
,["diq"] = "Zazaki"
,["zza"] = "Zazaki"
,["zea"] = "Zeelandic"
,["zen"] = "Zenaga"
,["za"] = "Zhuang"
,["zha"] = "Zhuang"
,["zu"] = "Zulu"
,["zul"] = "Zulu"
,["zun"] = "Zuni"
}
}
}
Latest revision as of 04:42, 3 September 2020
This module depends on the following other modules:
This Lua module is used on many pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages, or in your own module sandbox. Consider discussing changes on the talk page before implementing them.
This pulls data from data modules and bunches them together. Language names are gathered in this order:
Module:Language/data/iana languages – as defined by the IETF-sanctioned IANA registry; names almost always the same as ISO, but does not include 3-letter codes for languages with 2-letter codes, hence the fallback
In other words, a name set in wp languages will take precedence over one set in iana languages and that too would take precedence over one from iso 639-3. wp languages can be discarded by passing dataset="iana" to the parent module.
-- put tables and their tables together
local function __coalesce(...)
local coalesced = {}
for _, langslist in ipairs{...} do
for langcode, langnames in pairs(langslist) do
for _, langname in pairs(langnames) do
if not coalesced[langcode] then
coalesced[langcode] = {}
end
table.insert(coalesced[langcode], langname)
end
end
end
return coalesced
end
-- make the keys lowercase
local function __preprocess(t, first_of_array_in_array)
local preprocessed = {}
if first_of_array_in_array then
-- keep only the 1st language name for each code, excluding synonyms
for k, v in pairs(t) do
preprocessed[k:lower()] = {v[1]}
end
else
for k, v in pairs(t) do
preprocessed[k:lower()] = v
end
end
return preprocessed
end
-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3 = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions = __preprocess(require("Module:Language/data/iana regions"));
-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));
return {
lang = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
lang_iana = __iana_languages,
script = iana_scripts,
region = iana_regions,
variant = iana_variants,
suppressed = iana_suppressed_scripts,
}