Module:Language/name/data: Difference between revisions

From Zoophilia Wiki
Jump to navigationJump to search
meta>Lfdder
No edit summary
Ookami (talk | contribs)
m 88 revisions imported
 
(18 intermediate revisions by 5 users not shown)
Line 1: Line 1:
-- put tables and their tables together
local function __coalesce(...)
local function __coalesce(...)
     local coalesced = {}
     local coalesced = {}
     for r, t in ipairs(arg) do
     for _, langslist in ipairs{...} do
        for k, v in pairs(t) do
        for langcode, langnames in pairs(langslist) do
            coalesced[k:lower()] = v
            for _, langname in pairs(langnames) do
                if not coalesced[langcode] then
                    coalesced[langcode] = {}
                end
                table.insert(coalesced[langcode], langname)
            end
         end
         end
     end
     end
Line 10: Line 16:
end
end


local __iso_639_3      = require("Module:Language/data/iso 639-3")
-- make the keys lowercase
local __iana_languages = require("Module:Language/data/iana languages")
local function __preprocess(t, first_of_array_in_array)
local __wp_languages  = require("Module:Language/data/wp languages")
    local preprocessed = {}
local iana_scripts    = __coalesce(require("Module:Language/data/iana scripts"))
    if first_of_array_in_array then
local iana_regions    = __coalesce(require("Module:Language/data/iana regions"))
    -- keep only the 1st language name for each code, excluding synonyms
        for k, v in pairs(t) do
            preprocessed[k:lower()] = {v[1]}
        end
    else
        for k, v in pairs(t) do
            preprocessed[k:lower()] = v
        end
    end
 
    return preprocessed
end
 
-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3      = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages  = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts    = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions    = __preprocess(require("Module:Language/data/iana regions"));
 
-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));
 
return {
return {
     iana = {
     lang  = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
        lang  = __coalesce(__iso_639_3, __iana_languages),
    lang_iana = __iana_languages,
        script = iana_scripts,
    script = iana_scripts,
        region = iana_regions
    region = iana_regions,
    },
     variant = iana_variants,
     wikipedia = {
    suppressed = iana_suppressed_scripts,
        lang  = __coalesce(__iso_639_3, __wp_languages),
        script = iana_scripts,
        region = iana_regions
    }
}
}

Latest revision as of 04:42, 3 September 2020

This pulls data from data modules and bunches them together. Language names are gathered in this order:

  1. Module:Language/data/ISO 639-3 – as defined in ISO 639-3
  2. Module:Language/data/iana languages – as defined by the IETF-sanctioned IANA registry; names almost always the same as ISO, but does not include 3-letter codes for languages with 2-letter codes, hence the fallback
  3. Module:Language/data/wp languages – specific to Wikipedia; languages for which we've decided another name is better

In other words, a name set in wp languages will take precedence over one set in iana languages and that too would take precedence over one from iso 639-3. wp languages can be discarded by passing dataset="iana" to the parent module.

See also


-- put tables and their tables together
local function __coalesce(...)
    local coalesced = {}
    for _, langslist in ipairs{...} do
        for langcode, langnames in pairs(langslist) do
            for _, langname in pairs(langnames) do
                if not coalesced[langcode] then
                    coalesced[langcode] = {}
                end
                table.insert(coalesced[langcode], langname)
            end
        end
    end
 
    return coalesced
end

-- make the keys lowercase
local function __preprocess(t, first_of_array_in_array)
    local preprocessed = {}
    if first_of_array_in_array then
    	-- keep only the 1st language name for each code, excluding synonyms
        for k, v in pairs(t) do
            preprocessed[k:lower()] = {v[1]}
        end
    else
        for k, v in pairs(t) do
            preprocessed[k:lower()] = v
        end
    end

    return preprocessed
end

-- all valid primary language subtags for BCP47 from IANA (most of them from ISO 639-1, -2 and -3 with some exclusions)
local __iana_languages = __preprocess(require("Module:Language/data/iana languages"));
-- ISO 639-3 contains additional 3-letter codes not inserted in the IANA database as they are aliased to 2-letter codes,
-- but excludes some deleted codes still valid in BCP47 (some of them are aliased)
local __iso_639_3      = __preprocess(require("Module:Language/data/ISO 639-3"));
-- Wikimedia wikis uses some non-standard codes and a subset of IANA codes, plus composite codes
local __wp_languages   = __preprocess(require("Module:Language/data/wp languages"), true);
-- all valid script subtags for BCP47 from IANA (excluding special ISO 15924 codes)
local iana_scripts     = __preprocess(require("Module:Language/data/iana scripts"));
-- all valid region subtags for BCP47 from IANA (derived from ISO 3166-1 excluding special codes, and from 3-digit UN M.49 codes for groups of countries)
local iana_regions     = __preprocess(require("Module:Language/data/iana regions"));

-- variant subtags from IANA; table format differs from the other IANA data tables
local iana_variants = __preprocess(require("Module:Language/data/iana variants"));
-- suppressed script subtags from IANA;
local iana_suppressed_scripts = __preprocess (require("Module:Language/data/iana suppressed scripts"));

return {
    lang   = __coalesce(__wp_languages, __iana_languages, __iso_639_3),
    lang_iana = __iana_languages,
    script = iana_scripts,
    region = iana_regions,
    variant = iana_variants,
    suppressed = iana_suppressed_scripts,
}