Module:Citation/CS1/COinS

From Zoophilia Wiki
Revision as of 09:54, 25 December 2025 by SockyPaws (talk | contribs) (Update submodule)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigationJump to search

This page contains various functions render a CS1/CS2 template's metadata.

These files comprise the module support for CS1/CS2 citation templates:

CS1 | CS2 modules
Live Sandbox Diff Description
Gold padlock Module:Citation/CS1 Module:Citation/CS1/sandbox [edit] diff Rendering and support functions
Module:Citation/CS1/Configuration Module:Citation/CS1/Configuration/sandbox [edit] diff Translation tables; error and identifier handlers
Module:Citation/CS1/Whitelist Module:Citation/CS1/Whitelist/sandbox [edit] diff List of active and deprecated CS1/CS2 parameters
Module:Citation/CS1/Date validation Module:Citation/CS1/Date validation/sandbox [edit] diff Date format validation functions
Module:Citation/CS1/Identifiers Module:Citation/CS1/Identifiers/sandbox [edit] diff Functions that support the named identifiers (ISBN, DOI, PMID, etc.)
Module:Citation/CS1/Utilities Module:Citation/CS1/Utilities/sandbox [edit] diff Common functions and tables
Module:Citation/CS1/COinS Module:Citation/CS1/COinS/sandbox [edit] diff Functions that render a CS1/CS2 template's metadata
Module:Citation/CS1/styles.css Module:Citation/CS1/sandbox/styles.css [edit] diff CSS styles applied to the CS1/CS2 templates
Silver padlock Module:Citation/CS1/Suggestions Module:Citation/CS1/Suggestions/sandbox [edit] diff List that maps common erroneous parameter names to valid parameter names

Other documentation:


--[[-----------------< F O R W A R D   D E C L A R A T I O N S >--------------]]
-- Functions in Module:Citation/CS1/Utilities.
local has_accept_as_written
local in_array
local is_set
local remove_wiki_link
local strip_apostrophe_markup

-- Configuration supertable that's defined in Module:Citation/CS1/Configuration.
local cfg

--[[-------------------< M A K E _ C O I N S _ T I T L E >----------------------
  Makes a title for COinS from Title and / or ScriptTitle (or any other
  name-script pairs). Apostrophe markup (bold, italics) is stripped from each
  value so that the COinS metadata isn't corrupted with strings of %27%27…
]]
local function make_coins_title(title, script)
  title = has_accept_as_written(title)
  if is_set(title) then
    title = strip_apostrophe_markup(title)  -- Strip any apostrophe markup.
  else
    title = ""  -- If not set, make sure title is an empty string.
  end
  if is_set(script) then
    -- Remove language prefix if present (script value may now be empty string).
    script = script:gsub("^%l%l%s*:%s*", "")
    script = strip_apostrophe_markup(script)  -- Strip any apostrophe markup.
  else
    script = ""  -- If not set, make sure script is an empty string.
  end
  if is_set(title) and is_set(script) then
    script = " " .. script  -- Add a space before we concatenate.
  end
  return title .. script  -- Return the concatenation.
end

--[[---------------< E S C A P E _ L U A _ M A G I C _ C H A R S >--------------
  Returns a string where all of Lua's magic characters have been escaped. This
  is important because functions like string.gsub() treat their pattern and
  replace strings as patterns, not literal strings.
]]
local function escape_lua_magic_chars(argument)
  argument = argument:gsub("%%", "%%%%")  -- Replace % with %%.
  -- Replace all other Lua magic pattern characters.
  argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1")
  return argument
end

--[[--------------------< G E T _ C O I N S _ P A G E S >-----------------------
  Extract page numbers from external wikilinks in any of the |page=, |pages=, or
  |at= parameters for use in COinS.
]]
local function get_coins_pages(pages)
  local pattern
  if not is_set(pages) then
    return pages
  end  -- If there are no page numbers, then we're done.

  while true do
    -- Pattern is the opening bracket, the URL and following space(s): "[url ".
    pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]")
    if nil == pattern then
      break
    end  -- No more URLs.
    -- Pattern is not a literal string; escape Lua's magic pattern characters.
    pattern = escape_lua_magic_chars(pattern)
    -- Remove as many instances of pattern as possible.
    pages = pages:gsub(pattern, "")
  end

  pages = pages:gsub("[%[%]]", "")  -- Remove the brackets.
  pages = pages:gsub("–", "-")  -- Replace endashes with hyphens…
  -- …and replace HTML entities (&ndash; etc.) with hyphens; do we need to
  -- replace numerical entities like &#32; and the like, too?
  pages = pages:gsub("&%w+;", "-")
  -- Remove HTML-like tags; spans are added to <Pages> by
  -- utilities.hyphen_to_dash(), which should not appear in COinS metadata.
  pages = pages:gsub("%b<>", "")
  return pages
end

--[=[-----< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >-------
  There are three options for math markup rendering that depend on the editor's
  math preference settings. These settings are at
  [[Special:Preferences#mw-prefsection-rendering]] and are: PNG images, TeX
  source and MathML (with SVG or PNG fallback). All three are heavy with HTML
  and CSS which doesn't belong in the metadata.

  Without this function, the metadata saved in the raw wikitext contained the
  rendering determined by the settings of the last editor to save the page.

  This function gets the rendered form of an equation according to the editor's
  preference before the page is saved. It then searches the rendering for the
  text equivalent of the rendered equation and replaces the rendering with that
  so that the page is saved without extraneous HTML/CSS markup and with a
  reasonably readable text form of the equation.

  When a replacement is made, this function returns true and the value with
  replacement, otherwise false and the initial value. To replace multipe
  equations, it is necessary to call this function from within a loop.
]=]
local function coins_replace_math_stripmarker(value)
  local stripmarker = cfg.stripmarkers["math"]
  local rendering = value:match(stripmarker)  -- Is there a math stripmarker?

  -- When value doesn't have a math stripmarker, abandon this test.
  if not rendering then
    return false, value
  end

  -- Convert stripmarker into rendered value (or nil? when math render error).
  rendering = mw.text.unstripNoWiki(rendering)

  if rendering:match('alt="[^"]+"') then  -- If PNG math option…
    rendering = rendering:match('alt="([^"]+)"')
  -- If TeX math option; $ is legit character that is escapes as \$.
  elseif rendering:match("$%s+.+%s+%$") then
    rendering = rendering:match("$%s+(.+)%s+%$")
  elseif rendering:match("<annotation[^>]+>.+</annotation>") then -- If MathML.
    rendering = rendering:match("<annotation[^>]+>(.+)</annotation>")
  else
    return false, value  -- Had math stripmarker but not of the defined forms.
  end

  return true, value:gsub(stripmarker, rendering, 1)
end

--[[---------------------< C O I N S _ C L E A N U P >--------------------------
  Cleanup parameter values for the metadata by removing or replacing invisible
  characters and certain HTML entities.

  2015-12-10: There is a bug in mw.text.unstripNoWiki(). It replaces math
  stripmarkers with the appropriate content when it shouldn't. See phab:121085
  and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29.

  TODO: Move the replacement patterns and replacement values into a table in
  /Configuration, similar to the invisible characters table?
]]
local function coins_cleanup(value)
  local replaced = true  -- Default state to get the do loop running.
  while replaced do  -- Loop until all math stripmarkers replaced.
  -- Replace math stripmarker with text representation of the equation.
    replaced, value = coins_replace_math_stripmarker(value)
  end
  -- One or more couldn't be replaced; insert vague error message.
  value = value:gsub(cfg.stripmarkers["math"], "MATH RENDER ERROR")
  -- Replace nowiki stripmarkers with their content.
  value = mw.text.unstripNoWiki(value)
  -- Replace {{'}} or {{'s}} with simple apostrophe or apostrophe-s.
  value = value:gsub('<span class="nowrap" style="padding%-left: 0%.1em;">&#39;(s?)</span>', "'%1")
  value = value:gsub("&nbsp;", " ") -- Replace &nbsp; entity with plain space.
  value = value:gsub("\226\128\138", " ") -- Replace hair space with space.
  -- Don't remove zero-width joiner characters from Indic scripts.
  if not mw.ustring.find(value, cfg.indic_script) then
    value = value:gsub("&zwj;", "")  -- Remove &zwj; entities.
    -- Remove zero-width joiner, zero-width space and soft hyphen.
    value = mw.ustring.gsub(value, "[\226\128\141\226\128\139\194\173]", "")
  end
  -- Replace horizontal tab, line feed and carriage return with plain space.
  value = value:gsub("[\009\010\013 ]+", " ")
  return value
end

--[[--------------------------< C O I N S >-------------------------------------
  COinS metadata (see <https://ocoins.info/>) allows automated tools to parse
  the citation information.
]]
local function COinS(data, class)
  if "table" ~= type(data) or nil == next(data) then
    return ""
  end

  for k, v in pairs(data) do  -- Loop through all the metadata parameter values.
    -- Except the ID_list and Author tables (author nowiki stripmarker done when
    -- Author table processed).
    if "ID_list" ~= k and "Authors" ~= k then
      data[k] = coins_cleanup(v)
    end
  end

  local ctx_ver = "Z39.88-2004"

  -- Treat table strictly as an array with only set values.
  local OCinSoutput =
    setmetatable(
      {},
      {
        __newindex = function(self, key, value)
          if is_set(value) then
            rawset(self, #self + 1, table.concat {key, "=", mw.uri.encode(remove_wiki_link(value))})
          end
        end
      }
    )

  if
  in_array(class, {"arxiv", "biorxiv", "citeseerx", "medrxiv", "ssrn", "journal", "news", "magazine"}) or
    (in_array(class, {"conference", "interview", "map", "press release", "web"}) and is_set(data.Periodical)) or
    ("citation" == class and is_set(data.Periodical) and not is_set(data.Encyclopedia))
  then
    -- Journal metadata identifier.
    OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"
    -- Set genre according to the type of citation template we are rendering
    if in_array(class, {"arxiv", "biorxiv", "citeseerx", "medrxiv", "ssrn"}) then
      -- Cite_arxiv, Cite_biorxiv, Cite_citeseerx, Cite_medrxiv, Cite_SSRN.
      OCinSoutput["rft.genre"] = "preprint"
    elseif "conference" == class then
      -- Cite_conference (when Periodical set).
      OCinSoutput["rft.genre"] = "conference"
    elseif "web" == class then
      OCinSoutput["rft.genre"] = "unknown" -- Cite_web, when Periodical set.
    else
      -- Journal and other 'periodical' articles.
      OCinSoutput["rft.genre"] = "article"
    end
    OCinSoutput["rft.jtitle"] = data.Periodical  -- Journal only.
    OCinSoutput["rft.atitle"] = data.Title  -- 'periodical' article titles

    -- These used only for periodicals.
    -- Keywords: winter, spring, summer, fall.
    OCinSoutput["rft.ssn"] = data.Season
    -- Single digits 1->first quarter, etc.
    OCinSoutput["rft.quarter"] = data.Quarter
    OCinSoutput["rft.chron"] = data.Chron  -- Free-form date components.
    OCinSoutput["rft.volume"] = data.Volume  -- Does not apply to books.
    OCinSoutput["rft.issue"] = data.Issue
    OCinSoutput["rft.artnum"] = data.ArticleNumber  -- {{Cite journal}} only
    OCinSoutput["rft.pages"] = data.Pages  -- Also used in book metadata.
    -- All others except Cite_thesis are treated as 'book' metadata; genre
    -- distinguishes.
  elseif "thesis" ~= class then
    -- Book metadata identifier.
    OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book"
    -- Cite_report and Cite_techreport.
    if "report" == class or "techreport" == class then
      OCinSoutput["rft.genre"] = "report"
    -- Cite_conference when Periodical not set.
    elseif "conference" == class then
      OCinSoutput["rft.genre"] = "conference"
      -- Conference paper as chapter in proceedings (book).
      OCinSoutput["rft.atitle"] = data.Chapter
    elseif in_array(class, {"book", "citation", "encyclopaedia", "interview", "map"}) then
      if is_set(data.Chapter) then
        OCinSoutput["rft.genre"] = "bookitem"
        -- Book chapter, encyclopedia article, book interview or map title.
        OCinSoutput["rft.atitle"] = data.Chapter
      else
        if "map" == class or "interview" == class then
          OCinSoutput["rft.genre"] = "unknown" -- Standalone map/interview.
        else
          OCinSoutput["rft.genre"] = "book"  -- Book and encyclopedia.
        end
      end
    -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview',
    -- 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release',
    -- 'serial', 'sign', 'speech', 'web'}
    else
      OCinSoutput["rft.genre"] = "unknown"
    end
    OCinSoutput["rft.btitle"] = data.Title  -- Book only.
    OCinSoutput["rft.place"] = data.PublicationPlace  -- Book only.
    OCinSoutput["rft.series"] = data.Series  -- Book only.
    OCinSoutput["rft.pages"] = data.Pages  -- Book, journal.
    OCinSoutput["rft.edition"] = data.Edition  -- Book only.
    OCinSoutput["rft.pub"] = data.PublisherName  -- Book and dissertation.
  else  -- Cite thesis.
    -- Dissertation metadata identifier.
    OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"
    -- Dissertation (also patent, but that is not yet supported).
    OCinSoutput["rft.title"] = data.Title
    OCinSoutput["rft.degree"] = data.Degree  -- Dissertation only.
    OCinSoutput["rft.inst"] = data.PublisherName  -- Book and dissertation.
  end
  -- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent",
  -- "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc",
  -- "info:ofi/fmt:kev:mtx:ctx" and now common parameters (as much as can be).
  OCinSoutput["rft.date"] = data.Date  -- Book, journal, dissertation.

  -- What to do about these? For now, assume that they are common to all?
  for k, v in pairs(data.ID_list) do
    if k == "ISBN" then
      v = v:gsub("[^-0-9X]", "")
    end
    local id = cfg.id_handlers[k].COinS
    -- For IDs that are in the info:registry.
    if string.sub(id or "", 1, 4) == "info" then
      OCinSoutput["rft_id"] = table.concat {id, "/", v}
      -- For ISBN, ISSN, eISSN, etc., that have defined COinS keywords.
    elseif string.sub(id or "", 1, 3) == "rft" then
      OCinSoutput[id] = v
    -- For URLs that are assembled in ~/Identifiers; |asin= and |ol=.
    elseif "url" == id then
      OCinSoutput["rft_id"] = table.concat({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label})
    -- When cfg.id_handlers[k].COinS is not nil so urls created here.
    elseif id then
      OCinSoutput["rft_id"] =
        table.concat {
          cfg.id_handlers[k].prefix,
          v,
          cfg.id_handlers[k].suffix or "",
          "#id-name=",
          cfg.id_handlers[k].label
        }
      -- Others; provide a URL and indicate identifier name as #fragment
      -- (human-readable, but transparent to browsers).
    end
  end

  local last, first
  for k, v in ipairs(data.Authors) do
    -- Replace any nowiki stripmarkers, non-printing or invisible characters.
    last, first = coins_cleanup(v.last), coins_cleanup(v.first or "")
    if k == 1 then  -- For the first author name only.
    -- Set these COinS values if |first= and |last= specify first author name.
      if is_set(last) and is_set(first) then
        OCinSoutput["rft.aulast"] = last  -- Book, journal, dissertation.
        OCinSoutput["rft.aufirst"] = first  -- Book, journal, dissertation.
      elseif is_set(last) then
        -- Book, journal, dissertation; otherwise use this for the first name.
        OCinSoutput["rft.au"] = last
      end
    else  -- For all other authors.
      -- TODO: At present we do not report "et al.". Add anything special if
      -- this condition applies?
      if is_set(last) and is_set(first) then
        -- Book, journal, dissertation.
        OCinSoutput["rft.au"] = table.concat {last, ", ", first}
      elseif is_set(last) then
        OCinSoutput["rft.au"] = last  -- Book, journal, dissertation.
      end
    end
  end

  OCinSoutput.rft_id = data.URL
  OCinSoutput.rfr_id = table.concat {"info:sid/", mw.site.server:match("[^/]*$"), ":", data.RawPage}

  -- TODO: Add optional extra info:
  -- rfr_dat=#REVISION<version> (referrer private data)
  -- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
  -- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
  -- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
  OCinSoutput = setmetatable(OCinSoutput, nil)

  -- sort with version string always first, and combine.
  -- table.sort( OCinSoutput );
  table.insert(OCinSoutput, 1, "ctx_ver=" .. ctx_ver) -- such as "Z39.88-2004"
  return table.concat(OCinSoutput, "&")
end

--[[--------------< S E T _ S E L E C T E D _ M O D U L E S >-------------------
  Sets local cfg table and imported functions table to same (live or sandbox) as
  that used by the other modules.
]]
local function set_selected_modules(cfg_table_ptr, utilities_page_ptr)
  cfg = cfg_table_ptr
  -- Import functions from selected Module:Citation/CS1/Utilities module.
  has_accept_as_written = utilities_page_ptr.has_accept_as_written
  is_set = utilities_page_ptr.is_set
  in_array = utilities_page_ptr.in_array
  remove_wiki_link = utilities_page_ptr.remove_wiki_link
  strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup
end

--[[-----------------< E X P O R T E D   F U N C T I O N S >------------------]]
return {
  make_coins_title = make_coins_title,
  get_coins_pages = get_coins_pages,
  COinS = COinS,
  set_selected_modules = set_selected_modules
}