Module:Citation/CS1/COinS
From Zoophilia Wiki
< Module:Citation | CS1
| This Lua module is used on approximately 4,560,000 pages. To avoid major disruption and server load, any changes should be tested in the module's /sandbox or /testcases subpages, or in your own module sandbox. The tested changes can be added to this page in a single edit. Consider discussing changes on the talk page before implementing them. |
| This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
This page contains various functions render a CS1/CS2 template's metadata.
These files comprise the module support for CS1/CS2 citation templates:
Other documentation:
--[[-----------------< F O R W A R D D E C L A R A T I O N S >--------------]]
-- Functions in Module:Citation/CS1/Utilities.
local has_accept_as_written
local in_array
local is_set
local remove_wiki_link
local strip_apostrophe_markup
-- Configuration supertable that's defined in Module:Citation/CS1/Configuration.
local cfg
--[[-------------------< M A K E _ C O I N S _ T I T L E >----------------------
Makes a title for COinS from Title and / or ScriptTitle (or any other
name-script pairs). Apostrophe markup (bold, italics) is stripped from each
value so that the COinS metadata isn't corrupted with strings of %27%27…
]]
local function make_coins_title(title, script)
title = has_accept_as_written(title)
if is_set(title) then
title = strip_apostrophe_markup(title) -- Strip any apostrophe markup.
else
title = "" -- If not set, make sure title is an empty string.
end
if is_set(script) then
-- Remove language prefix if present (script value may now be empty string).
script = script:gsub("^%l%l%s*:%s*", "")
script = strip_apostrophe_markup(script) -- Strip any apostrophe markup.
else
script = "" -- If not set, make sure script is an empty string.
end
if is_set(title) and is_set(script) then
script = " " .. script -- Add a space before we concatenate.
end
return title .. script -- Return the concatenation.
end
--[[---------------< E S C A P E _ L U A _ M A G I C _ C H A R S >--------------
Returns a string where all of Lua's magic characters have been escaped. This
is important because functions like string.gsub() treat their pattern and
replace strings as patterns, not literal strings.
]]
local function escape_lua_magic_chars(argument)
argument = argument:gsub("%%", "%%%%") -- Replace % with %%.
-- Replace all other Lua magic pattern characters.
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1")
return argument
end
--[[--------------------< G E T _ C O I N S _ P A G E S >-----------------------
Extract page numbers from external wikilinks in any of the |page=, |pages=, or
|at= parameters for use in COinS.
]]
local function get_coins_pages(pages)
local pattern
if not is_set(pages) then
return pages
end -- If there are no page numbers, then we're done.
while true do
-- Pattern is the opening bracket, the URL and following space(s): "[url ".
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]")
if nil == pattern then
break
end -- No more URLs.
-- Pattern is not a literal string; escape Lua's magic pattern characters.
pattern = escape_lua_magic_chars(pattern)
-- Remove as many instances of pattern as possible.
pages = pages:gsub(pattern, "")
end
pages = pages:gsub("[%[%]]", "") -- Remove the brackets.
pages = pages:gsub("–", "-") -- Replace endashes with hyphens…
-- …and replace HTML entities (– etc.) with hyphens; do we need to
-- replace numerical entities like   and the like, too?
pages = pages:gsub("&%w+;", "-")
-- Remove HTML-like tags; spans are added to <Pages> by
-- utilities.hyphen_to_dash(), which should not appear in COinS metadata.
pages = pages:gsub("%b<>", "")
return pages
end
--[=[-----< C O I N S _ R E P L A C E _ M A T H _ S T R I P M A R K E R >-------
There are three options for math markup rendering that depend on the editor's
math preference settings. These settings are at
[[Special:Preferences#mw-prefsection-rendering]] and are: PNG images, TeX
source and MathML (with SVG or PNG fallback). All three are heavy with HTML
and CSS which doesn't belong in the metadata.
Without this function, the metadata saved in the raw wikitext contained the
rendering determined by the settings of the last editor to save the page.
This function gets the rendered form of an equation according to the editor's
preference before the page is saved. It then searches the rendering for the
text equivalent of the rendered equation and replaces the rendering with that
so that the page is saved without extraneous HTML/CSS markup and with a
reasonably readable text form of the equation.
When a replacement is made, this function returns true and the value with
replacement, otherwise false and the initial value. To replace multipe
equations, it is necessary to call this function from within a loop.
]=]
local function coins_replace_math_stripmarker(value)
local stripmarker = cfg.stripmarkers["math"]
local rendering = value:match(stripmarker) -- Is there a math stripmarker?
-- When value doesn't have a math stripmarker, abandon this test.
if not rendering then
return false, value
end
-- Convert stripmarker into rendered value (or nil? when math render error).
rendering = mw.text.unstripNoWiki(rendering)
if rendering:match('alt="[^"]+"') then -- If PNG math option…
rendering = rendering:match('alt="([^"]+)"')
-- If TeX math option; $ is legit character that is escapes as \$.
elseif rendering:match("$%s+.+%s+%$") then
rendering = rendering:match("$%s+(.+)%s+%$")
elseif rendering:match("<annotation[^>]+>.+</annotation>") then -- If MathML.
rendering = rendering:match("<annotation[^>]+>(.+)</annotation>")
else
return false, value -- Had math stripmarker but not of the defined forms.
end
return true, value:gsub(stripmarker, rendering, 1)
end
--[[---------------------< C O I N S _ C L E A N U P >--------------------------
Cleanup parameter values for the metadata by removing or replacing invisible
characters and certain HTML entities.
2015-12-10: There is a bug in mw.text.unstripNoWiki(). It replaces math
stripmarkers with the appropriate content when it shouldn't. See phab:121085
and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29.
TODO: Move the replacement patterns and replacement values into a table in
/Configuration, similar to the invisible characters table?
]]
local function coins_cleanup(value)
local replaced = true -- Default state to get the do loop running.
while replaced do -- Loop until all math stripmarkers replaced.
-- Replace math stripmarker with text representation of the equation.
replaced, value = coins_replace_math_stripmarker(value)
end
-- One or more couldn't be replaced; insert vague error message.
value = value:gsub(cfg.stripmarkers["math"], "MATH RENDER ERROR")
-- Replace nowiki stripmarkers with their content.
value = mw.text.unstripNoWiki(value)
-- Replace {{'}} or {{'s}} with simple apostrophe or apostrophe-s.
value = value:gsub('<span class="nowrap" style="padding%-left: 0%.1em;">'(s?)</span>', "'%1")
value = value:gsub(" ", " ") -- Replace entity with plain space.
value = value:gsub("\226\128\138", " ") -- Replace hair space with space.
-- Don't remove zero-width joiner characters from Indic scripts.
if not mw.ustring.find(value, cfg.indic_script) then
value = value:gsub("‍", "") -- Remove ‍ entities.
-- Remove zero-width joiner, zero-width space and soft hyphen.
value = mw.ustring.gsub(value, "[\226\128\141\226\128\139\194\173]", "")
end
-- Replace horizontal tab, line feed and carriage return with plain space.
value = value:gsub("[\009\010\013 ]+", " ")
return value
end
--[[--------------------------< C O I N S >-------------------------------------
COinS metadata (see <https://ocoins.info/>) allows automated tools to parse
the citation information.
]]
local function COinS(data, class)
if "table" ~= type(data) or nil == next(data) then
return ""
end
for k, v in pairs(data) do -- Loop through all the metadata parameter values.
-- Except the ID_list and Author tables (author nowiki stripmarker done when
-- Author table processed).
if "ID_list" ~= k and "Authors" ~= k then
data[k] = coins_cleanup(v)
end
end
local ctx_ver = "Z39.88-2004"
-- Treat table strictly as an array with only set values.
local OCinSoutput =
setmetatable(
{},
{
__newindex = function(self, key, value)
if is_set(value) then
rawset(self, #self + 1, table.concat {key, "=", mw.uri.encode(remove_wiki_link(value))})
end
end
}
)
if
in_array(class, {"arxiv", "biorxiv", "citeseerx", "medrxiv", "ssrn", "journal", "news", "magazine"}) or
(in_array(class, {"conference", "interview", "map", "press release", "web"}) and is_set(data.Periodical)) or
("citation" == class and is_set(data.Periodical) and not is_set(data.Encyclopedia))
then
-- Journal metadata identifier.
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"
-- Set genre according to the type of citation template we are rendering
if in_array(class, {"arxiv", "biorxiv", "citeseerx", "medrxiv", "ssrn"}) then
-- Cite_arxiv, Cite_biorxiv, Cite_citeseerx, Cite_medrxiv, Cite_SSRN.
OCinSoutput["rft.genre"] = "preprint"
elseif "conference" == class then
-- Cite_conference (when Periodical set).
OCinSoutput["rft.genre"] = "conference"
elseif "web" == class then
OCinSoutput["rft.genre"] = "unknown" -- Cite_web, when Periodical set.
else
-- Journal and other 'periodical' articles.
OCinSoutput["rft.genre"] = "article"
end
OCinSoutput["rft.jtitle"] = data.Periodical -- Journal only.
OCinSoutput["rft.atitle"] = data.Title -- 'periodical' article titles
-- These used only for periodicals.
-- Keywords: winter, spring, summer, fall.
OCinSoutput["rft.ssn"] = data.Season
-- Single digits 1->first quarter, etc.
OCinSoutput["rft.quarter"] = data.Quarter
OCinSoutput["rft.chron"] = data.Chron -- Free-form date components.
OCinSoutput["rft.volume"] = data.Volume -- Does not apply to books.
OCinSoutput["rft.issue"] = data.Issue
OCinSoutput["rft.artnum"] = data.ArticleNumber -- {{Cite journal}} only
OCinSoutput["rft.pages"] = data.Pages -- Also used in book metadata.
-- All others except Cite_thesis are treated as 'book' metadata; genre
-- distinguishes.
elseif "thesis" ~= class then
-- Book metadata identifier.
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book"
-- Cite_report and Cite_techreport.
if "report" == class or "techreport" == class then
OCinSoutput["rft.genre"] = "report"
-- Cite_conference when Periodical not set.
elseif "conference" == class then
OCinSoutput["rft.genre"] = "conference"
-- Conference paper as chapter in proceedings (book).
OCinSoutput["rft.atitle"] = data.Chapter
elseif in_array(class, {"book", "citation", "encyclopaedia", "interview", "map"}) then
if is_set(data.Chapter) then
OCinSoutput["rft.genre"] = "bookitem"
-- Book chapter, encyclopedia article, book interview or map title.
OCinSoutput["rft.atitle"] = data.Chapter
else
if "map" == class or "interview" == class then
OCinSoutput["rft.genre"] = "unknown" -- Standalone map/interview.
else
OCinSoutput["rft.genre"] = "book" -- Book and encyclopedia.
end
end
-- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview',
-- 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release',
-- 'serial', 'sign', 'speech', 'web'}
else
OCinSoutput["rft.genre"] = "unknown"
end
OCinSoutput["rft.btitle"] = data.Title -- Book only.
OCinSoutput["rft.place"] = data.PublicationPlace -- Book only.
OCinSoutput["rft.series"] = data.Series -- Book only.
OCinSoutput["rft.pages"] = data.Pages -- Book, journal.
OCinSoutput["rft.edition"] = data.Edition -- Book only.
OCinSoutput["rft.pub"] = data.PublisherName -- Book and dissertation.
else -- Cite thesis.
-- Dissertation metadata identifier.
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"
-- Dissertation (also patent, but that is not yet supported).
OCinSoutput["rft.title"] = data.Title
OCinSoutput["rft.degree"] = data.Degree -- Dissertation only.
OCinSoutput["rft.inst"] = data.PublisherName -- Book and dissertation.
end
-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent",
-- "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc",
-- "info:ofi/fmt:kev:mtx:ctx" and now common parameters (as much as can be).
OCinSoutput["rft.date"] = data.Date -- Book, journal, dissertation.
-- What to do about these? For now, assume that they are common to all?
for k, v in pairs(data.ID_list) do
if k == "ISBN" then
v = v:gsub("[^-0-9X]", "")
end
local id = cfg.id_handlers[k].COinS
-- For IDs that are in the info:registry.
if string.sub(id or "", 1, 4) == "info" then
OCinSoutput["rft_id"] = table.concat {id, "/", v}
-- For ISBN, ISSN, eISSN, etc., that have defined COinS keywords.
elseif string.sub(id or "", 1, 3) == "rft" then
OCinSoutput[id] = v
-- For URLs that are assembled in ~/Identifiers; |asin= and |ol=.
elseif "url" == id then
OCinSoutput["rft_id"] = table.concat({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label})
-- When cfg.id_handlers[k].COinS is not nil so urls created here.
elseif id then
OCinSoutput["rft_id"] =
table.concat {
cfg.id_handlers[k].prefix,
v,
cfg.id_handlers[k].suffix or "",
"#id-name=",
cfg.id_handlers[k].label
}
-- Others; provide a URL and indicate identifier name as #fragment
-- (human-readable, but transparent to browsers).
end
end
local last, first
for k, v in ipairs(data.Authors) do
-- Replace any nowiki stripmarkers, non-printing or invisible characters.
last, first = coins_cleanup(v.last), coins_cleanup(v.first or "")
if k == 1 then -- For the first author name only.
-- Set these COinS values if |first= and |last= specify first author name.
if is_set(last) and is_set(first) then
OCinSoutput["rft.aulast"] = last -- Book, journal, dissertation.
OCinSoutput["rft.aufirst"] = first -- Book, journal, dissertation.
elseif is_set(last) then
-- Book, journal, dissertation; otherwise use this for the first name.
OCinSoutput["rft.au"] = last
end
else -- For all other authors.
-- TODO: At present we do not report "et al.". Add anything special if
-- this condition applies?
if is_set(last) and is_set(first) then
-- Book, journal, dissertation.
OCinSoutput["rft.au"] = table.concat {last, ", ", first}
elseif is_set(last) then
OCinSoutput["rft.au"] = last -- Book, journal, dissertation.
end
end
end
OCinSoutput.rft_id = data.URL
OCinSoutput.rfr_id = table.concat {"info:sid/", mw.site.server:match("[^/]*$"), ":", data.RawPage}
-- TODO: Add optional extra info:
-- rfr_dat=#REVISION<version> (referrer private data)
-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
OCinSoutput = setmetatable(OCinSoutput, nil)
-- sort with version string always first, and combine.
-- table.sort( OCinSoutput );
table.insert(OCinSoutput, 1, "ctx_ver=" .. ctx_ver) -- such as "Z39.88-2004"
return table.concat(OCinSoutput, "&")
end
--[[--------------< S E T _ S E L E C T E D _ M O D U L E S >-------------------
Sets local cfg table and imported functions table to same (live or sandbox) as
that used by the other modules.
]]
local function set_selected_modules(cfg_table_ptr, utilities_page_ptr)
cfg = cfg_table_ptr
-- Import functions from selected Module:Citation/CS1/Utilities module.
has_accept_as_written = utilities_page_ptr.has_accept_as_written
is_set = utilities_page_ptr.is_set
in_array = utilities_page_ptr.in_array
remove_wiki_link = utilities_page_ptr.remove_wiki_link
strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup
end
--[[-----------------< E X P O R T E D F U N C T I O N S >------------------]]
return {
make_coins_title = make_coins_title,
get_coins_pages = get_coins_pages,
COinS = COinS,
set_selected_modules = set_selected_modules
}