Module:Citation/CS1/Configuration: Difference between revisions

m
1 revision imported
m (1 revision imported)
m (1 revision imported)
 
(3 intermediate revisions by 2 users not shown)
Line 1: Line 1:
local lang_obj = mw.language.getContentLanguage(); -- make a language object for the local language; used here for languages and dates


--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------


List of namespaces that should not be included in citation error categories.
List of namespaces identifiers for namespaces that will not be included in citation error categories.
Same as setting notracking = true by default.
Same as setting notracking = true by default.


Note: Namespace names should use underscores instead of spaces.
For wikis that have a current version of Module:cs1 documentation support, this #invoke will return an unordered
list of namespace names and their associated identifiers:
{{#invoke:cs1 documentation support|uncategorized_namespace_lister|all=<anything>}}


]]
]]


local uncategorized_namespaces = { 'User', 'Talk', 'User_talk', 'Wikipedia_talk',
uncategorized_namespaces_t = {[2]=true}; -- init with user namespace id
'File_talk', 'Template_talk', 'Help_talk', 'Category_talk', 'Portal_talk',
for k, _ in pairs (mw.site.talkNamespaces) do -- add all talk namespace ids
'Book_talk', 'Draft_talk', 'Module_talk', 'MediaWiki_talk' };
uncategorized_namespaces_t[k] = true;
end
 
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases', '/[^/]*[Ll]og', '/[Aa]rchive'}; -- list of Lua patterns found in page names of pages we should not categorize
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases', '/[^/]*[Ll]og', '/[Aa]rchive'}; -- list of Lua patterns found in page names of pages we should not categorize


Line 29: Line 34:
['archived-dead'] = 'Archived from $1 on $2',
['archived-dead'] = 'Archived from $1 on $2',
['archived-live'] = '$1 from the original on $2',
['archived-live'] = '$1 from the original on $2',
['archived-missing'] = 'Archived from the original$1 on $2',
['archived-missing'] = 'Archived from the original $1 on $2',
['archived-unfit'] = 'Archived from the original on ',
['archived-unfit'] = 'Archived from the original on ',
['archived'] = 'Archived',
['archived'] = 'Archived',
Line 61: Line 66:


['vol'] = '$1 Vol.&nbsp;$2', -- $1 is sepc; bold journal style volume is in presentation{}
['vol'] = '$1 Vol.&nbsp;$2', -- $1 is sepc; bold journal style volume is in presentation{}
['vol-no'] = '$1 Vol.&nbsp;$2 no.&nbsp;$3', -- sepc, volume, issue
['vol-no'] = '$1 Vol.&nbsp;$2, no.&nbsp;$3', -- sepc, volume, issue (alternatively insert $1 after $2, but then we'd also have to change capitalization)
['issue'] = '$1 No.&nbsp;$2', -- $1 is sepc
['issue'] = '$1 No.&nbsp;$2', -- $1 is sepc
['art'] = '$1 Art.&nbsp;$2', -- $1 is sepc; for {{cite conference}} only
['vol-art'] = '$1 Vol.&nbsp;$2, art.&nbsp;$3', -- sepc, volume, article-number; for {{cite conference}} only


['j-vol'] = '$1 $2', -- sepc, volume; bold journal volume is in presentation{}
['j-vol'] = '$1 $2', -- sepc, volume; bold journal volume is in presentation{}
['j-issue'] = ' ($1)',
['j-issue'] = ' ($1)',
['j-article-num'] = ' $1', -- TODO: any punctuation here? static text?


['nopp'] = '$1 $2'; -- page(s) without prefix; $1 is sepc
['nopp'] = '$1 $2'; -- page(s) without prefix; $1 is sepc
Line 93: Line 102:
-- Internal errors (should only occur if configuration is bad)
-- Internal errors (should only occur if configuration is bad)
['undefined_error'] = 'Called with an undefined error condition',
['undefined_error'] = 'Called with an undefined error condition',
['unknown_ID_key'] = 'Unrecognized ID key', -- an ID key in id_handlers not found in ~/Identifiers func_map{}
['unknown_ID_key'] = 'Unrecognized ID key: ', -- an ID key in id_handlers not found in ~/Identifiers func_map{}
['unknown_ID_access'] = 'Unrecognized ID access keyword: ', -- an ID access keyword in id_handlers not found in keywords_lists['id-access']{}
['unknown_argument_map'] = 'Argument map not defined for this variable',
['unknown_argument_map'] = 'Argument map not defined for this variable',
['bare_url_no_origin'] = 'Bare URL found but origin indicator is nil or empty',
['bare_url_no_origin'] = 'Bare URL found but origin indicator is nil or empty',
['warning_msg_e'] = '<span style="color:#d33">One or more <code style="color: inherit; background: inherit; border: none; padding: inherit;">&#123;{$1}}</code> templates have errors</span>; messages may be hidden ([[Help:CS1_errors#Controlling_error_message_display|help]]).'; -- $1 is template link
['warning_msg_m'] = '<span style="color:#3a3">One or more <code style="color: inherit; background: inherit; border: none; padding: inherit;">&#123;{$1}}</code> templates have maintenance messages</span>; messages may be hidden ([[Help:CS1_errors#Controlling_error_message_display|help]]).'; -- $1 is template link
}
}
--[[--------------------------< C I T A T I O N _ C L A S S _ M A P >------------------------------------------
this table maps the value assigned to |CitationClass= in the cs1|2 templates to the canonical template name when
the value assigned to |CitationClass= is different from the canonical template name.  |CitationClass= values are
used as class attributes in the <cite> tag that encloses the citation so these names may not contain spaces while
the canonical template name may.  These names are used in warning_msg_e and warning_msg_m to create links to the
template's documentation when an article is displayed in preview mode.
Most cs1|2 template |CitationClass= values at en.wiki match their canonical template names so are not listed here.
]]
local citation_class_map_t = { -- TODO: if kept, these and all other config.CitationClass 'names' require some sort of i18n
['audio-visual'] = 'AV media',
['AV-media-notes'] = 'AV media notes',
['encyclopaedia'] = 'encyclopedia',
['mailinglist'] = 'mailing list',
['pressrelease'] = 'press release'
}




Line 107: Line 141:


local et_al_patterns = {
local et_al_patterns = {
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.\"']*$", -- variations on the 'et al' theme
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.;,\"']*$", -- variations on the 'et al' theme
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.\"']*$", -- variations on the 'et alia', 'et alii' and 'et aliae' themes (false positive 'et aliie' unlikely to match)
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.;,\"']*$", -- variations on the 'et alia', 'et alii' and 'et aliae' themes (false positive 'et aliie' unlikely to match)
"[;,]? *%f[%a]and [Oo]thers", -- an alternative to et al.
"[;,]? *%f[%a]and [Oo]thers", -- an alternative to et al.
"%[%[ *[Ee][Tt]%.? *[Aa][Ll]%.? *%]%]", -- a wikilinked form
"%[%[ *[Ee][Tt]%.? *[Aa][Ll]%.? *%]%]", -- a wikilinked form
"%(%( *[Ee][Tt]%.? *[Aa][Ll]%.? *%)%)", -- a double-bracketed form (to counter partial removal of ((...)) syntax)
"%(%( *[Ee][Tt]%.? *[Aa][Ll]%.? *%)%)", -- a double-bracketed form (to counter partial removal of ((...)) syntax)
"[%(%[] *[Ee][Tt]%.? *[Aa][Ll]%.? *[%)%]]", -- a bracketed form
"[%(%[] *[Ee][Tt]%.? *[Aa][Ll]%.? *[%)%]]", -- a bracketed form
}
--[[--------------------------< E D I T O R _ M A R K U P _ P A T T E R N S >----------------------------------
This table provides Lua patterns for the phrase "ed" and variants in name text
(author, editor, etc.). The main module uses these to identify and emit the
'extra_text_names' message. (It is not the only series of patterns for this message.)
]]
local editor_markup_patterns = { -- these patterns match annotations at end of name
'%f[%(%[][%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]?$', -- (ed) or (eds): leading '(', case insensitive 'ed', optional 's', '.' and/or ')'
'[,%.%s]%f[e]eds?%.?$', -- ed or eds: without '('or ')'; case sensitive (ED could be initials Ed could be name)
'%f[%(%[][%(%[]%s*[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%.?%s*[%)%]]?$', -- (editor) or (editors): leading '(', case insensitive, optional '.' and/or ')'
'[,%.%s]%f[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%.?$', -- editor or editors: without '('or ')'; case insensitive
-- these patterns match annotations at beginning of name
'^eds?[%.,;]', -- ed. or eds.: lower case only, optional 's', requires '.'
'^[%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]', -- (ed) or (eds): also sqare brackets, case insensitive, optional 's', '.'
'^[%(%[]?%s*[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%A', -- (editor or (editors: also square brackets, case insensitive, optional brackets, 's'
'^[%(%[]?%s*[Ee][Dd][Ii][Tt][Ee][Dd]%A', -- (edited: also square brackets, case insensitive, optional brackets
-- these patterns match annotations that are the 'name'
'^[Ee][Dd][Ii][Tt][Oo][Rr][Ss]?%.?$', -- editor or editors; case insensitive
}
}


Line 149: Line 159:
local presentation =  
local presentation =  
{
{
-- Error output
-- .error class is specified at https://git.wikimedia.org/blob/mediawiki%2Fcore.git/9553bd02a5595da05c184f7521721fb1b79b3935/skins%2Fcommon%2Fshared.css#L538
-- .citation-comment class is specified at Help:CS1_errors#Controlling_error_message_display
-- .citation-comment class is specified at Help:CS1_errors#Controlling_error_message_display
['hidden-error'] = '<span class="cs1-hidden-error error citation-comment">$1</span>',
['hidden-error'] = '<span class="cs1-hidden-error citation-comment">$1</span>',
['visible-error'] = '<span class="cs1-visible-error error citation-comment">$1</span>',
['visible-error'] = '<span class="cs1-visible-error citation-comment">$1</span>',
['hidden-maint'] = '<span class="cs1-maint citation-comment">$1</span>',
['hidden-maint'] = '<span class="cs1-maint citation-comment">$1</span>',
Line 160: Line 168:
['bdi'] = '<bdi$1>$2</bdi>', -- bidirectional isolation used with |script-title= and the like
['bdi'] = '<bdi$1>$2</bdi>', -- bidirectional isolation used with |script-title= and the like


['cite'] = '<cite class="$1">$2</cite>'; -- |ref= not set so no id="..." attribute
['cite'] = '<cite class="$1">$2</cite>'; -- for use when citation does not have a namelist and |ref= not set so no id="..." attribute
['cite-id'] = '<cite id="$1" class="$2">$3</cite>'; -- for use when |ref= is set
['cite-id'] = '<cite id="$1" class="$2">$3</cite>'; -- for use when when |ref= is set or when citation has a namelist


['format'] = ' <span class="cs1-format">($1)</span>', -- for |format=, |chapter-format=, etc.
['format'] = ' <span class="cs1-format">($1)</span>', -- for |format=, |chapter-format=, etc.
['interwiki'] = ' <span class="cs1-format">[in $1]</span>', -- for interwiki-language-linked author, editor, etc
['interproj'] = ' <span class="cs1-format">[at $1]</span>', -- for interwiki-project-linked author, editor, etc (:d: and :s: supported; :w: ignored)


-- various access levels, for |access=, |doi-access=, |arxiv=, ...
-- various access levels, for |access=, |doi-access=, |arxiv=, ...
Line 179: Line 189:
['italic-title'] = "''$1''",
['italic-title'] = "''$1''",


['kern-left'] = '<span class="cs1-kern-left">$1</span>$2', -- spacing to use when title contains leading single or double quote mark
['kern-left'] = '<span class="cs1-kern-left"></span>$1', -- spacing to use when title contains leading single or double quote mark
['kern-right'] = '$1<span class="cs1-kern-right">$2</span>', -- spacing to use when title contains trailing single or double quote mark
['kern-right'] = '$1<span class="cs1-kern-right"></span>', -- spacing to use when title contains trailing single or double quote mark
 
-- these for simple wikilinked titles [["text]], [[text"]] and [["text"]]
-- span wraps entire wikilink
['kern-wl-left'] = '<span class="cs1-kern-wl-left">$1</span>', -- when title contains leading single or double quote mark
['kern-wl-right'] = '<span class="cs1-kern-wl-left">$1</span>', -- when title contains trailing single or double quote mark
['kern-wl-both'] = '<span class="cs1-kern-wl-left cs1-kern-wl-right">$1</span>', -- when title contains leading and trailing single or double quote marks


['nowrap1'] = '<span class="nowrap">$1</span>', -- for nowrapping an item: <span ...>yyyy-mm-dd</span>
['nowrap1'] = '<span class="nowrap">$1</span>', -- for nowrapping an item: <span ...>yyyy-mm-dd</span>
Line 194: Line 198:
['parameter'] = '<code class="cs1-code">&#124;$1=</code>',
['parameter'] = '<code class="cs1-code">&#124;$1=</code>',
['ps_cs1'] = '.'; -- CS1 style postscript (terminal) character
['ps_cs1'] = '.'; -- CS1 style postscript (terminal) character
['ps_cs2'] = ''; -- CS2 style postscript (terminal) character (empty string)
['ps_cs2'] = ''; -- CS2 style postscript (terminal) character (empty string)
Line 235: Line 239:
['ArchiveFormat'] = 'archive-format',
['ArchiveFormat'] = 'archive-format',
['ArchiveURL'] = {'archive-url', 'archiveurl'}, -- Used by InternetArchiveBot
['ArchiveURL'] = {'archive-url', 'archiveurl'}, -- Used by InternetArchiveBot
['ArticleNumber'] = 'article-number',
['ASINTLD'] = 'asin-tld',
['ASINTLD'] = 'asin-tld',
['At'] = 'at', -- Used by InternetArchiveBot
['At'] = 'at', -- Used by InternetArchiveBot
Line 243: Line 248:
['ChapterFormat'] = {'chapter-format', 'contribution-format', 'entry-format',
['ChapterFormat'] = {'chapter-format', 'contribution-format', 'entry-format',
'article-format', 'section-format'};
'article-format', 'section-format'};
['ChapterURL'] = {'chapter-url', 'contribution-url', 'entry-url', 'article-url',
['ChapterURL'] = {'chapter-url', 'contribution-url', 'entry-url', 'article-url', 'section-url', 'chapterurl'}, -- Used by InternetArchiveBot
'section-url', 'chapterurl'}, -- Used by InternetArchiveBot
['ChapterUrlAccess'] = {'chapter-url-access', 'contribution-url-access',
['ChapterUrlAccess'] = {'chapter-url-access', 'contribution-url-access',
'entry-url-access', 'article-url-access', 'section-url-access'}, -- Used by InternetArchiveBot
'entry-url-access', 'article-url-access', 'section-url-access'}, -- Used by InternetArchiveBot
['Class'] = 'class', -- cite arxiv and arxiv identifiers
['Class'] = 'class', -- cite arxiv and arxiv identifier
['Collaboration'] = 'collaboration',
['Collaboration'] = 'collaboration',
['Conference'] = {'conference', 'event'},
['Conference'] = {'conference', 'event'},
Line 386: Line 390:
]]
]]


local punct_skip = {};
local punct_meta_params = { -- table of aliases[] keys (meta parameters); each key has a table of parameter names for a value
 
local meta_params = { -- table of aliases[] keys (meta parameters); each key has a table of parameter names for a value
'BookTitle', 'Chapter', 'ScriptChapter', 'ScriptTitle', 'Title', 'TransChapter', 'Transcript', 'TransMap', 'TransTitle', -- title-holding parameters
'BookTitle', 'Chapter', 'ScriptChapter', 'ScriptTitle', 'Title', 'TransChapter', 'Transcript', 'TransMap', 'TransTitle', -- title-holding parameters
'AuthorList-Mask', 'ContributorList-Mask', 'EditorList-Mask', 'InterviewerList-Mask', 'TranslatorList-Mask', -- name-list mask may have name separators
'AuthorList-Mask', 'ContributorList-Mask', 'EditorList-Mask', 'InterviewerList-Mask', 'TranslatorList-Mask', -- name-list mask may have name separators
'PostScript', 'Quote', 'ScriptQuote', 'TransQuote', 'Ref', -- miscellaneous
'PostScript', 'Quote', 'ScriptQuote', 'TransQuote', 'Ref', -- miscellaneous
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'LayURL', 'MapURL', 'TranscriptURL', 'URL', -- URL-holding parameters
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'LayURL', 'MapURL', 'TranscriptURL', 'URL', -- URL-holding parameters
}
}


local url_meta_params = { -- table of aliases[] keys (meta parameters); each key has a table of parameter names for a value
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'ID', 'LayURL', 'MapURL', 'TranscriptURL', 'URL', -- parameters allowed to hold urls
'Page', 'Pages', 'At', 'QuotePage', 'QuotePages', -- insource locators allowed to hold urls
}
local function build_skip_table (skip_t, meta_params)
for _, meta_param in ipairs (meta_params) do -- for each meta parameter key
for _, meta_param in ipairs (meta_params) do -- for each meta parameter key
local params = aliases[meta_param]; -- get the parameter or the table of parameters associated with the meta parameter name
local params = aliases[meta_param]; -- get the parameter or the table of parameters associated with the meta parameter name
if 'string' == type (params) then
if 'string' == type (params) then
punct_skip[params] = 1; -- just a single parameter
skip_t[params] = 1; -- just a single parameter
else
else
for _, param in ipairs (params) do -- get the parameter name
for _, param in ipairs (params) do -- get the parameter name
punct_skip[param] = 1; -- add the parameter name to the skip table
skip_t[param] = 1; -- add the parameter name to the skip table
local count;
local count;
param, count = param:gsub ('#', ''); -- remove enumerator marker from enumerated parameters
param, count = param:gsub ('#', ''); -- remove enumerator marker from enumerated parameters
if 0 ~= count then -- if removed
if 0 ~= count then -- if removed
punct_skip[param] = 1; -- add param name without enumerator marker
skip_t[param] = 1; -- add param name without enumerator marker
end
end
end
end
end
end
end
end
return skip_t;
end


local punct_skip = {};
local url_skip = {};


--[[-----------< S P E C I A L  C A S E  T R A N S L A T I O N S >------------
 
--[[--------------------------< S I N G L E - L E T T E R  S E C O N D - L E V E L  D O M A I N S >----------
 
this is a list of tlds that are known to have single-letter second-level domain names.  This list does not include
ccTLDs which are accepted in is_domain_name().
 
]]
 
local single_letter_2nd_lvl_domains_t = {'cash', 'company', 'foundation', 'org', 'today'};
 
 
--[[-----------< S P E C I A L  C A S E  T R A N S L A T I O N S >------------


This table is primarily here to support internationalization.  Translations in
This table is primarily here to support internationalization.  Translations in
Line 420: Line 443:


]]
]]
 
local is_Latn = 'A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143';
local special_case_translation = {
local special_case_translation = {
['AuthorList'] = 'authors list', -- used to assemble maintenance category names
['AuthorList'] = 'authors list', -- used to assemble maintenance category names
['ContributorList'] = 'contributors list', -- translation of these names plus translation of the base mainenance category names in maint_cats{} table below
['ContributorList'] = 'contributors list', -- translation of these names plus translation of the base maintenance category names in maint_cats{} table below
['EditorList'] = 'editors list', -- must match the names of the actual categories
['EditorList'] = 'editors list', -- must match the names of the actual categories
['InterviewerList'] = 'interviewers list', -- this group or translations used by name_has_ed_markup() and name_has_mult_names()
['InterviewerList'] = 'interviewers list', -- this group or translations used by name_has_ed_markup() and name_has_mult_names()
Line 436: Line 459:
-- Lua patterns to match generic titles; usually created by bots or reference filling tools
-- Lua patterns to match generic titles; usually created by bots or reference filling tools
-- translators: replace ['local'] = nil with lowercase translation only when bots or tools create generic titles in your language
-- translators: replace ['local'] = nil with lowercase translation only when bots or tools create generic titles in your language
['generic_titles'] = {
-- generic titles and patterns in this table should be lowercase only
-- patterns in this table should be lowercase only
-- leave ['local'] nil except when there is a matching generic title in your language
-- leave ['local'] nil except when there is a matching generic title in your language
-- generic titles must be lowercase
-- boolean 'true' for plain-text searches; 'false' for pattern searches
-- boolean 'true' for plain-text searches; 'false' for pattern searches
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
 
{['en'] = {'are you a robot', true}, ['local'] = nil},
['generic_titles'] = {
{['en'] = {'hugedomains.com', true}, ['local'] = nil},
['accept'] = {
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
},
{['en'] = {'page not found', true}, ['local'] = nil},
['reject'] = {
{['en'] = {'subscribe to read', true}, ['local'] = nil},
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?unknown[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'are you a robot', true}, ['local'] = nil},
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'hugedomains.com', true}, ['local'] = nil},
{['en'] = {'^404', true}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'page not found', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
{['en'] = {'subscribe to read', true}, ['local'] = nil},
{['en'] = {'redirecting...', true}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?unknown[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'webcite query result', true}, ['local'] = nil},
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'wikiwix\'s cache', true}, ['local'] = nil},
{['en'] = {'^404', false}, ['local'] = nil},
}
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
{['en'] = {'login • instagram', true}, ['local'] = nil},
{['en'] = {'redirecting...', true}, ['local'] = nil},
{['en'] = {'usurped title', true}, ['local'] = nil}, -- added by a GreenC bot
{['en'] = {'webcite query result', true}, ['local'] = nil},
{['en'] = {'wikiwix\'s cache', true}, ['local'] = nil},
}
},
 
-- boolean 'true' for plain-text searches, search string must be lowercase only
-- boolean 'false' for pattern searches
-- leave ['local'] nil except when there is a matching generic name in your language
 
['generic_names'] = {
['accept'] = {
{['en'] = {'%[%[[^|]*%(author%) *|[^%]]*%]%]', false}, ['local'] = nil},
},
['reject'] = {
{['en'] = {'about us', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]dvisor%f[%A]', false}, ['local'] = nil},
{['en'] = {'allmusic', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil},
{['en'] = {'business', true}, ['local'] = nil},
{['en'] = {'cnn', true}, ['local'] = nil},
{['en'] = {'collaborator', true}, ['local'] = nil},
{['en'] = {'contributor', true}, ['local'] = nil},
{['en'] = {'contact us', true}, ['local'] = nil},
{['en'] = {'directory', true}, ['local'] = nil},
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil},
{['en'] = {'[,%.%s]%f[e]eds?%.?$', false}, ['local'] = nil},
{['en'] = {'^eds?[%.,;]', false}, ['local'] = nil},
{['en'] = {'^[%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]', false}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]dited%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]ditors?%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a]]Ee]mail%f[%A]', false}, ['local'] = nil},
{['en'] = {'facebook', true}, ['local'] = nil},
{['en'] = {'google', true}, ['local'] = nil},
{['en'] = {'home page', true}, ['local'] = nil},
{['en'] = {'^[Ii]nc%.?$', false}, ['local'] = nil},
{['en'] = {'instagram', true}, ['local'] = nil},
{['en'] = {'interviewer', true}, ['local'] = nil},
{['en'] = {'linkedIn', true}, ['local'] = nil},
{['en'] = {'^[Nn]ews$', false}, ['local'] = nil},
{['en'] = {'pinterest', true}, ['local'] = nil},
{['en'] = {'policy', true}, ['local'] = nil},
{['en'] = {'privacy', true}, ['local'] = nil},
{['en'] = {'reuters', true}, ['local'] = nil},
{['en'] = {'translator', true}, ['local'] = nil},
{['en'] = {'tumblr', true}, ['local'] = nil},
{['en'] = {'twitter', true}, ['local'] = nil},
{['en'] = {'site name', true}, ['local'] = nil},
{['en'] = {'statement', true}, ['local'] = nil},
{['en'] = {'submitted', true}, ['local'] = nil},
{['en'] = {'super.?user', false}, ['local'] = nil},
{['en'] = {'%f['..is_Latn..'][Uu]ser%f[^'..is_Latn..']', false}, ['local'] = nil},
{['en'] = {'verfasser', true}, ['local'] = nil},
}
}
}
}


Line 480: Line 559:
Easter and Christmas are defined here as 98 and 99, which should be out of the
Easter and Christmas are defined here as 98 and 99, which should be out of the
ISO 8601 (EDTF) range of uses for a while.
ISO 8601 (EDTF) range of uses for a while.
local_date_names_from_mediawiki is a boolean.  When set to:
true – module will fetch local month names from MediaWiki for both date_names['local']['long'] and date_names['local']['short']
false – module will *not* fetch local month names from MediaWiki
Caveat lector:  There is no guarantee that MediaWiki will provide short month names.  At your wiki you can test
the results of the MediaWiki fetch in the debug console with this command (the result is alpha sorted):
=mw.dumpObject (p.date_names['local'])
While the module can fetch month names from MediaWiki, it cannot fetch the quarter, season, and named date names
from MediaWiki.  Those must be translated manually.


]]
]]


local local_date_names_from_mediawiki = true; -- when false, manual translation required for date_names['local']['long'] and date_names['local']['short']
-- when true, module fetches long and short month names from MediaWiki
local date_names = {
local date_names = {
['en'] = { -- English
['en'] = { -- English
Line 491: Line 583:
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
},
},
-- when local_date_names_from_mediawiki = false
['local'] = { -- replace these English date names with the local language equivalents
['local'] = { -- replace these English date names with the local language equivalents
['long'] = {['January'] = 1, ['February'] = 2, ['March'] = 3, ['April'] = 4, ['May'] = 5, ['June'] = 6, ['July'] = 7, ['August'] = 8, ['September'] = 9, ['October'] = 10, ['November'] = 11, ['December'] = 12},
['long'] = {['January'] = 1, ['February'] = 2, ['March'] = 3, ['April'] = 4, ['May'] = 5, ['June'] = 6, ['July'] = 7, ['August'] = 8, ['September'] = 9, ['October'] = 10, ['November'] = 11, ['December'] = 12},
Line 498: Line 591:
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
['named'] = {['Easter'] = 98, ['Christmas'] = 99},
},
},
['inv_local_l'] = {}, -- used in date reformatting; copy of date_names['local'].long where k/v are inverted: [1]='<local name>' etc.
['inv_local_long'] = {}, -- used in date reformatting & translation; copy of date_names['local'].long where k/v are inverted: [1]='<local name>' etc.
['inv_local_s'] = {}, -- used in date reformatting; copy of date_names['local'].short where k/v are inverted: [1]='<local name>' etc.
['inv_local_short'] = {}, -- used in date reformatting & translation; copy of date_names['local'].short where k/v are inverted: [1]='<local name>' etc.
['inv_local_quarter'] = {}, -- used in date translation; copy of date_names['local'].quarter where k/v are inverted: [1]='<local name>' etc.
['inv_local_season'] = {}, -- used in date translation; copy of date_names['local'].season where k/v are inverted: [1]='<local name>' etc.
['inv_local_named'] = {}, -- used in date translation; copy of date_names['local'].named where k/v are inverted: [1]='<local name>' etc.
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9
['xlate_digits'] = {},
['xlate_digits'] = {},
}
}


for name, i in pairs (date_names['local'].long) do -- this table is ['name'] = i
if local_date_names_from_mediawiki then -- if fetching local month names from MediaWiki is enabled
date_names['inv_local_l'][i] = name; -- invert to get [i] = 'name' for conversions from ymd
local long_t = {};
local short_t = {};
for i=1, 12 do -- loop 12x and
local name = lang_obj:formatDate('F', '2022-' .. i .. '-1'); -- get long month name for each i
long_t[name] = i; -- save it
name = lang_obj:formatDate('M', '2022-' .. i .. '-1'); -- get short month name for each i
short_t[name] = i; -- save it
end
date_names['local']['long'] = long_t; -- write the long table – overwrites manual translation
date_names['local']['short'] = short_t; -- write the short table – overwrites manual translation
end
end
 
-- create inverted date-name tables for reformatting and/or translation
for name, i in pairs (date_names['local'].short) do -- this table is ['name'] = i
for _, invert_t in pairs {{'long', 'inv_local_long'}, {'short', 'inv_local_short'}, {'quarter', 'inv_local_quarter'}, {'season', 'inv_local_season'}, {'named', 'inv_local_named'}} do
date_names['inv_local_s'][i] = name; -- invert to get [i] = 'name' for conversions from ymd
for name, i in pairs (date_names['local'][invert_t[1]]) do -- this table is ['name'] = i
date_names[invert_t[2]][i] = name; -- invert to get [i] = 'name' for conversions from ymd
end
end
end


Line 533: Line 640:


local function get_date_format ()
local function get_date_format ()
local content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
local title_object = mw.title.getCurrentTitle();
if title_object.namespace == 10 then -- not in template space so that unused templates appear in unused-template-reports;
return nil; -- auto-formatting does not work in Template space so don't set global_df
end
local content = title_object:getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format
Line 547: Line 658:
end
end


local global_df = get_date_format ();
local global_df;




Line 561: Line 672:
local templates_using_issue = {'citation', 'conference', 'episode', 'interview', 'journal', 'magazine', 'map', 'news', 'podcast'}
local templates_using_issue = {'citation', 'conference', 'episode', 'interview', 'journal', 'magazine', 'map', 'news', 'podcast'}
local templates_not_using_page = {'audio-visual', 'episode', 'mailinglist', 'newsgroup', 'podcast', 'serial', 'sign', 'speech'}
local templates_not_using_page = {'audio-visual', 'episode', 'mailinglist', 'newsgroup', 'podcast', 'serial', 'sign', 'speech'}


--[[
--[[


Patterns for finding extra text in |volume=, |issue=, |page=, |pages=
These tables control when it is appropriate for {{citation}} to render |volume= and/or |issue=.  The parameter
names in the tables constrain {{citation}} so that its renderings match the renderings of the equivalent cs1
templates.  For example, {{cite web}} does not support |volume= so the equivalent {{citation |website=...}} must
not support |volume=.


]]
]]


local vol_iss_pg_patterns = {
local citation_no_volume_t = { -- {{citation}} does not render |volume= when these parameters are used
'website', 'mailinglist', 'script-website',
}
local citation_issue_t = { -- {{citation}} may render |issue= when these parameters are used
'journal', 'magazine', 'newspaper', 'periodical', 'work',
'script-journal', 'script-magazine', 'script-newspaper', 'script-periodical', 'script-work',
}
 
--[[
 
Patterns for finding extra text in |volume=, |issue=, |page=, |pages=
 
]]
 
local vol_iss_pg_patterns = {
good_ppattern = '^P[^%.PpGg]', -- OK to begin with uppercase P: P7 (page 7 of section P), but not p123 (page 123); TODO: this allows 'Pages' which it should not
good_ppattern = '^P[^%.PpGg]', -- OK to begin with uppercase P: P7 (page 7 of section P), but not p123 (page 123); TODO: this allows 'Pages' which it should not
bad_ppatterns = { -- patterns for |page= and |pages=
bad_ppatterns = { -- patterns for |page= and |pages=
'^[Pp][PpGg]?%.?[ %d]',
'^[Pp][PpGg]?%.?[ %d]',
'^[Pp][Pp]?%.&nbsp;', -- from {{p.}} and {{pp.}} templates
'^[Pp]ages?',
'^[Pp]ages?',
'^[Pp]gs.?',
'^[Pp]gs.?',
Line 618: Line 746:
['cs1'] = {'cs1'}, -- |mode=
['cs1'] = {'cs1'}, -- |mode=
['cs2'] = {'cs2'}, -- |mode=
['cs2'] = {'cs2'}, -- |mode=
['dead'] = {'dead'}, -- |url-status= -- Used by InternetArchiveBot
['dead'] = {'dead', 'deviated'}, -- |url-status= -- Used by InternetArchiveBot
['dmy'] = {'dmy'}, -- |df=
['dmy'] = {'dmy'}, -- |df=
['dmy-all'] = {'dmy-all'}, -- |df=
['dmy-all'] = {'dmy-all'}, -- |df=
['foreword'] = {'foreword'}, -- |contribution=
['foreword'] = {'foreword'}, -- |contribution=
['free'] = {'free'}, -- |url-access= -- Used by InternetArchiveBot
['free'] = {'free'}, -- |<id>-access= -- Used by InternetArchiveBot
['harv'] = {'harv'}, -- |ref=; this no longer supported; is_valid_parameter_value() called with <invert> = true
['introduction'] = {'introduction'}, -- |contribution=
['introduction'] = {'introduction'}, -- |contribution=
['limited'] = {'limited'}, -- |url-access= -- Used by InternetArchiveBot
['limited'] = {'limited'}, -- |url-access= -- Used by InternetArchiveBot
Line 714: Line 843:
['mode'] = make_keywords_list ({keywords.cs1, keywords.cs2}),
['mode'] = make_keywords_list ({keywords.cs1, keywords.cs2}),
['name-list-style'] = make_keywords_list ({keywords.amp, keywords['and'], keywords.vanc}),
['name-list-style'] = make_keywords_list ({keywords.amp, keywords['and'], keywords.vanc}),
['ref'] = make_keywords_list ({keywords.harv}), -- inverted check; |ref=harv no longer supported
['url-access'] = make_keywords_list ({keywords.subscription, keywords.limited, keywords.registration}),
['url-access'] = make_keywords_list ({keywords.subscription, keywords.limited, keywords.registration}),
['url-status'] = make_keywords_list ({keywords.dead, keywords.live, keywords.unfit, keywords.usurped, keywords['bot: unknown']}),
['url-status'] = make_keywords_list ({keywords.dead, keywords.live, keywords.unfit, keywords.usurped, keywords['bot: unknown']}),
Line 775: Line 905:


--[[
--[[
Indic script makes use of zero width joiner as a character modifier so zwj
Indic script makes use of zero width joiner as a character modifier so zwj
characters must be left in.  This pattern covers all of the unicode characters
characters must be left in.  This pattern covers all of the unicode characters
Line 799: Line 930:
local indic_script = '[\224\164\128-\224\181\191\224\163\160-\224\183\191\225\128\128-\225\130\159\234\167\160-\234\167\191\234\169\160-\234\169\191]';
local indic_script = '[\224\164\128-\224\181\191\224\163\160-\224\183\191\225\128\128-\225\130\159\234\167\160-\234\167\191\234\169\160-\234\169\191]';


-- list of emoji that use zwj character (U+200D) to combine with another emoji
-- list of emoji that use a zwj character (U+200D) to combine with another emoji
local emoji = { -- indexes are decimal forms of the hex values in U+xxxx
-- from: https://unicode.org/Public/emoji/15.0/emoji-zwj-sequences.txt; version: 15.0; 2022-05-06
-- table created by: [[:en:Module:Make emoji zwj table]]
local emoji_t = { -- indexes are decimal forms of the hex values in U+xxxx
[9760] = true, -- U+2620 ☠ skull and crossbones
[9792] = true, -- U+2640 ♀ female sign
[9794] = true, -- U+2642 ♂ male sign
[9877] = true, -- U+2695 ⚕ staff of aesculapius
[9878] = true, -- U+2696 ⚖ scales
[9895] = true, -- U+26A7 ⚧ male with stroke and male and female sign
[9992] = true, -- U+2708 ✈ airplane
[10052] = true, -- U+2744 ❄ snowflake
[10084] = true, -- U+2764 ❤ heavy black heart
[11035] = true, -- U+2B1B ⬛ black large square
[127752] = true, -- U+1F308 🌈 rainbow
[127752] = true, -- U+1F308 🌈 rainbow
[127787] = true, -- U+1F32B 🌫 fog
[127806] = true, -- U+1F33E 🌾 ear of rice
[127806] = true, -- U+1F33E 🌾 ear of rice
[127859] = true, -- U+1F373 🍳 cooking
[127859] = true, -- U+1F373 🍳 cooking
[127868] = true, -- U+1F37C 🍼 baby bottle
[127876] = true, -- U+1F384 🎄 christmas tree
[127891] = true, -- U+1F393 🎓 graduation cap
[127891] = true, -- U+1F393 🎓 graduation cap
[127908] = true, -- U+1F3A4 🎤 microphone
[127908] = true, -- U+1F3A4 🎤 microphone
Line 814: Line 960:
[128105] = true, -- U+1F469 👩 woman
[128105] = true, -- U+1F469 👩 woman
[128139] = true, -- U+1F48B 💋 kiss mark
[128139] = true, -- U+1F48B 💋 kiss mark
[128168] = true, -- U+1F4A8 💨 dash symbol
[128171] = true, -- U+1F4AB 💫 dizzy symbol
[128187] = true, -- U+1F4BB 💻 personal computer
[128187] = true, -- U+1F4BB 💻 personal computer
[128188] = true, -- U+1F4BC 💼 brief case
[128188] = true, -- U+1F4BC 💼 brief case
[128293] = true, -- U+1F525 🔥 fire
[128295] = true, -- U+1F527 🔧 wrench
[128295] = true, -- U+1F527 🔧 wrench
[128300] = true, -- U+1F52C 🔬 microscope
[128300] = true, -- U+1F52C 🔬 microscope
Line 831: Line 980:
[129469] = true, -- U+1F9BD 🦽 manual wheelchair
[129469] = true, -- U+1F9BD 🦽 manual wheelchair
[129489] = true, -- U+1F9D1 🧑 adult
[129489] = true, -- U+1F9D1 🧑 adult
[9760] = true, -- U+2620 ☠ skull and crossbones
[129657] = true, -- U+1FA79 🩹 adhesive bandage
[9792] = true, -- U+2640 ♀ female sign
[129778] = true, -- U+1FAF2 🫲 leftwards hand
[9794] = true, -- U+2642 ♂ male sign
[9877] = true, -- U+2695 ⚕ staff of aesculapius
[9878] = true, -- U+2696 ⚖ scales
[9992] = true, -- U+2708 ✈ airplane
[10084] = true, -- U+2764 ❤ heavy black heart
}
}


Line 847: Line 991:
]]
]]


local this_wiki_code = mw.getContentLanguage():getCode(); -- get this wiki's language code
--local this_wiki_code = mw.getContentLanguage():getCode(); -- get this wiki's language code
if string.match (mw.site.server, 'wikidata') then
local this_wiki_code = lang_obj:getCode(); -- get this wiki's language code
if string.match (mw.site.server, 'wikidata') then
this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on Wikidata so use interface language setting instead
this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on Wikidata so use interface language setting instead
end
end


local languages = mw.language.fetchLanguageNames (this_wiki_code, 'all'); -- get a list of language names known to Wikimedia; used for |language= and interwiki tests
local mw_languages_by_tag_t = mw.language.fetchLanguageNames (this_wiki_code, 'all'); -- get a table of language tag/name pairs known to Wikimedia; used for interwiki tests
local mw_languages_by_name_t = {};
for k, v in pairs (mw_languages_by_tag_t) do -- build a 'reversed' table name/tag language pairs know to MediaWiki; used for |language=
v = mw.ustring.lower (v); -- lowercase for tag fetch; get name's proper case from mw_languages_by_tag_t[<tag>]
if mw_languages_by_name_t[v] then -- when name already in the table
if 2 == #k or 3 == #k then -- if tag does not have subtags
mw_languages_by_name_t[v] = k; -- prefer the shortest tag for this name
end
else -- here when name not in the table
mw_languages_by_name_t[v] = k; -- so add name and matching tag
end
end


local inter_wiki_map = {}; -- map of interwiki prefixes that are language-code prefixes
local inter_wiki_map = {}; -- map of interwiki prefixes that are language-code prefixes
for k, v in pairs (mw.site.interwikiMap ('local')) do -- spin through the base interwiki map (limited to local)
for k, v in pairs (mw.site.interwikiMap ('local')) do -- spin through the base interwiki map (limited to local)
if languages[v["prefix"]] then -- if the prefix matches a known language code
if mw_languages_by_tag_t[v["prefix"]] then -- if the prefix matches a known language tag
inter_wiki_map[v["prefix"]] = true; -- add it to our local map
inter_wiki_map[v["prefix"]] = true; -- add it to our local map
end
end
end
end
local local_lang_cat_enable = false; -- set to true to categorize pages where |language=<local wiki's language>




Line 872: Line 1,026:


local script_lang_codes = {
local script_lang_codes = {
'am', 'ar', 'be', 'bg', 'bn', 'bo', 'bs', 'dv', 'dz', 'el', 'fa', 'gu',  
'ab', 'am', 'ar', 'be', 'bg', 'bn', 'bo', 'bs', 'dv', 'dz', 'el', 'fa', 'gu',  
'he', 'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'lo',
'he', 'hi', 'hy', 'ja', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'lo', 'mk',
'mk', 'ml', 'mn', 'mr', 'my', 'ne', 'or', 'ota', 'ps', 'ru', 'sd', 'si',
'ml', 'mn', 'mr', 'my', 'ne', 'or', 'ota', 'pa', 'ps', 'ru', 'sd', 'si', 'sr',
'sr', 'ta', 'te', 'tg', 'th', 'ti', 'ug', 'uk', 'ur', 'uz', 'yi', 'zh'
'syc', 'ta', 'te', 'tg', 'th', 'ti', 'tt', 'ug', 'uk', 'ur', 'uz', 'yi', 'yue', 'zh'
};
};


Line 905: Line 1,059:
['bn'] = 'Bengali', -- MediaWiki returns Bangla
['bn'] = 'Bengali', -- MediaWiki returns Bangla
['ca-valencia'] = 'Valencian', -- IETF variant of Catalan
['ca-valencia'] = 'Valencian', -- IETF variant of Catalan
['crh'] = 'Crimean Tatar', -- synonymous with Crimean Turkish (return value from {{#language:crh|en}})
['ilo'] = 'Ilocano', -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['ilo'] = 'Ilocano', -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['ksh'] = 'Kölsch', -- MediaWiki: Colognian; use IANA/ISO 639 preferred name
['ksh'] = 'Kölsch', -- MediaWiki: Colognian; use IANA/ISO 639 preferred name
Line 913: Line 1,066:
}
}


local lang_name_remap = { -- used for |language=
local lang_name_remap = { -- used for |language=; names require proper capitalization; tags must be lowercase
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
Line 921: Line 1,074:
['blackfoot'] = {'Blackfoot', 'bla'}, -- MediaWiki/IANA/ISO 639: Siksika; use en.wiki preferred name
['blackfoot'] = {'Blackfoot', 'bla'}, -- MediaWiki/IANA/ISO 639: Siksika; use en.wiki preferred name
['colognian'] = {'Colognian', 'ksh-x-colog'}, -- MediaWiki preferred name for ksh
['colognian'] = {'Colognian', 'ksh-x-colog'}, -- MediaWiki preferred name for ksh
['crimean tatar'] = {'Crimean Tatar', 'crh'}, -- MediaWiki uses 'crh' as a subdomain name for Crimean Tatar Wikipedia: crh.wikipedia.org
['ilocano'] = {'Ilocano', 'ilo'}, -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['ilocano'] = {'Ilocano', 'ilo'}, -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['kolsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name (use non-diacritical o instead of umlaut ö)
['kolsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name (use non-diacritical o instead of umlaut ö)
['kölsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name
['kölsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan-TW'}, -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese  
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan-tw'}, -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese  
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found
['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan
['valencian'] = {'Valencian', 'ca-valencia'}, -- variant of Catalan; categorizes as Valencian
}
}


Line 939: Line 1,091:


local prop_cats = {
local prop_cats = {
['foreign_lang_source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is foreign-language name, $2 is ISO639-1 code
['foreign-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is foreign-language name, $2 is ISO639-1 code
['foreign_lang_source_2'] = 'CS1 foreign language sources (ISO 639-2)|$1', -- |language= category; a cat for ISO639-2 languages; $1 is the ISO 639-2 code used as a sort key
['foreign-lang-source-2'] = 'CS1 foreign language sources (ISO 639-2)|$1', -- |language= category; a cat for ISO639-2 languages; $1 is the ISO 639-2 code used as a sort key
['local_lang_source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is local-language name, $2 is ISO639-1 code; not emitted when local_lang_cat_enable is false
['jul-greg-uncertainty'] = 'CS1: Julian–Gregorian uncertainty', -- probably temporary cat to identify scope of template with dates 1 October 1582 – 1 January 1926
['location test'] = 'CS1 location test',
['local-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is local-language name, $2 is ISO639-1 code; not emitted when local_lang_cat_enable is false
['script'] = 'CS1 uses foreign language script', -- when language specified by |script-title=xx: doesn't have its own category
['location-test'] = 'CS1 location test',
['script_with_name'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is ISO639-1 code
['long-vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4 characters
['jul_greg_uncertainty'] = 'CS1: Julian–Gregorian uncertainty', -- probably temporary cat to identify scope of template with dates 1 October 1582 – 1 January 1926
['script'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is ISO639-1 code
['long_vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4 charachters
['tracked-param'] = 'CS1 tracked parameter: $1', -- $1 is base (enumerators removed) parameter name
['year_range_abbreviated'] = 'CS1: abbreviated year range', -- probably temporary cat to identify scope of |date=, |year= values using YYYY–YY form
['year-range-abbreviated'] = 'CS1: abbreviated year range', -- probably temporary cat to identify scope of |date=, |year= values using YYYY–YY form
}
}


Line 965: Line 1,117:
['pressrelease'] = 'Press release',
['pressrelease'] = 'Press release',
['report'] = 'Report',
['report'] = 'Report',
['speech'] = 'Speech',
['techreport'] = 'Technical report',
['techreport'] = 'Technical report',
['thesis'] = 'Thesis',
['thesis'] = 'Thesis',
Line 988: Line 1,141:
['group'] = 'invalid group id', -- |isbn=
['group'] = 'invalid group id', -- |isbn=
['initials'] = 'initials', -- Vancouver
['initials'] = 'initials', -- Vancouver
['invalid language code'] = 'invalid language code', -- |script-<param>=
['journal'] = 'journal', -- |bibcode=
['journal'] = 'journal', -- |bibcode=
['length'] = 'length', -- |isbn=, |bibcode=, |sbn=
['length'] = 'length', -- |isbn=, |bibcode=, |sbn=
['liveweb'] = 'liveweb', -- |archive-url=
['liveweb'] = 'liveweb', -- |archive-url=
['missing comma'] = 'missing comma', -- Vancouver
['missing comma'] = 'missing comma', -- Vancouver
['missing prefix'] = 'missing prefix', -- |script-<param>=
['missing title part'] = 'missing title part', -- |script-<param>=
['name'] = 'name', -- Vancouver
['name'] = 'name', -- Vancouver
['non-Latin char'] = 'non-Latin character', -- Vancouver
['non-Latin char'] = 'non-Latin character', -- Vancouver
Line 1,000: Line 1,156:
['suffix'] = 'suffix', -- Vancouver
['suffix'] = 'suffix', -- Vancouver
['timestamp'] = 'timestamp', -- |archive-url=
['timestamp'] = 'timestamp', -- |archive-url=
['unknown language code'] = 'unknown language code', -- |script-<param>=
['value'] = 'value', -- |bibcode=
['value'] = 'value', -- |bibcode=
['year'] = 'year', -- |bibcode=
['year'] = 'year', -- |bibcode=
Line 1,355: Line 1,512:
message = '<code class="cs1-code">&#124;$1=</code> missing <code class="cs1-code">&#124;$2=</code>', -- $1 is first alias, $2 is matching last alias
message = '<code class="cs1-code">&#124;$1=</code> missing <code class="cs1-code">&#124;$2=</code>', -- $1 is first alias, $2 is matching last alias
anchor = 'first_missing_last',
anchor = 'first_missing_last',
category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator
category = 'CS1 errors: missing name', -- author, contributor, editor, interviewer, translator
hidden = false
hidden = false
},
},
Line 1,363: Line 1,520:
category = 'CS1 errors: format without URL',
category = 'CS1 errors: format without URL',
hidden = false
hidden = false
},
err_generic_name = {
message = '<code class="cs1-code">&#124;$1=</code> has generic name', -- $1 is parameter name
anchor = 'generic_name',
category = 'CS1 errors: generic name',
hidden = false,
},
},
err_generic_title = {
err_generic_title = {
Line 1,455: Line 1,618:
},
},
err_param_unknown_empty = {
err_param_unknown_empty = {
message = 'Cite has empty unknown parameter$1: $2', -- $1 is 's' or empty space; $2 is emty unknown param list
message = 'Cite has empty unknown parameter$1: $2', -- $1 is 's' or empty space; $2 is empty unknown param list
anchor = 'param_unknown_empty',
anchor = 'param_unknown_empty',
category = 'CS1 errors: empty unknown parameters',
category = 'CS1 errors: empty unknown parameters',
Line 1,480: Line 1,643:


]]
]]
maint_archived_copy = {
maint_archived_copy = {
message = nil,
message = nil,
Line 1,496: Line 1,660:
anchor = 'bot:_unknown',
anchor = 'bot:_unknown',
category = 'CS1 maint: bot: original URL status unknown',
category = 'CS1 maint: bot: original URL status unknown',
hidden = true,
},
maint_date_auto_xlated = { -- date auto-translation not supported by en.wiki
message = nil,
anchor = 'date_auto_xlated',
category = 'CS1 maint: date auto-translated',
hidden = true,
hidden = true,
},
},
Line 1,508: Line 1,678:
anchor = 'date_year',
anchor = 'date_year',
category = 'CS1 maint: date and year',
category = 'CS1 maint: date and year',
hidden = true,
},
maint_discouraged = {
message = nil,
anchor = 'discouraged',
category = 'CS1 maint: discouraged parameter',
hidden = true,
hidden = true,
},
},
Line 1,538: Line 1,702:
anchor = 'extra_punct',
anchor = 'extra_punct',
category = 'CS1 maint: extra punctuation',
category = 'CS1 maint: extra punctuation',
hidden = true,
},
maint_extra_text_names = {
message = nil,
anchor = 'extra_text_names',
category = 'CS1 maint: extra text: $1', -- $1 is '<name>s list'; gets value from special_case_translation table
hidden = true,
hidden = true,
},
},
Line 1,569: Line 1,727:
category = 'CS1 maint: location',
category = 'CS1 maint: location',
hidden = true,
hidden = true,
},
},
maint_mr_format = {
maint_mr_format = {
message = nil,
message = nil,
Line 1,575: Line 1,733:
category = 'CS1 maint: MR format',
category = 'CS1 maint: MR format',
hidden = true,
hidden = true,
},
},
maint_mult_names = {
maint_mult_names = {
message = nil,
message = nil,
Line 1,599: Line 1,757:
category = 'CS1 maint: others in cite AV media (notes)',
category = 'CS1 maint: others in cite AV media (notes)',
hidden = true,
hidden = true,
},
},
maint_pmc_embargo = {
maint_pmc_embargo = {
message = nil,
message = nil,
Line 1,624: Line 1,782:
hidden = true,
hidden = true,
},
},
maint_ref_harv = {
message = nil,
anchor = 'ref_harv',
category = 'CS1 maint: ref=harv',
hidden = true,
},
maint_unfit = {
maint_unfit = {
message = nil,
message = nil,
Line 1,646: Line 1,798:
anchor = 'untitled',
anchor = 'untitled',
category = 'CS1 maint: untitled periodical',
category = 'CS1 maint: untitled periodical',
hidden = true,
},
maint_url_status = {
message = nil,
anchor = 'url_status',
category = 'CS1 maint: url-status',
hidden = true,
hidden = true,
},
},
Line 1,667: Line 1,825:
redirect: a local redirect to a local Wikipedia article name;  at en.wiki, 'ISBN (identifier)' is a redirect to 'International Standard Book Number'
redirect: a local redirect to a local Wikipedia article name;  at en.wiki, 'ISBN (identifier)' is a redirect to 'International Standard Book Number'
q: Wikidata q number for the identifier
q: Wikidata q number for the identifier
label: the label preceeding the identifier; label is linked to a Wikipedia article (in this order):
label: the label preceding the identifier; label is linked to a Wikipedia article (in this order):
redirect from id_handlers['<id>'].redirect when use_identifier_redirects is true
redirect from id_handlers['<id>'].redirect when use_identifier_redirects is true
Wikidata-supplied article name for the local wiki from id_handlers['<id>'].q
Wikidata-supplied article name for the local wiki from id_handlers['<id>'].q
Line 1,686: Line 1,844:
custom_access: to enable custom access level for an identifier, set this parameter
custom_access: to enable custom access level for an identifier, set this parameter
to the parameter that should control it (normally 'id-access')
to the parameter that should control it (normally 'id-access')
]]
]]


Line 1,695: Line 1,854:
q = 'Q118398',
q = 'Q118398',
label = 'arXiv',
label = 'arXiv',
prefix = '//arxiv.org/abs/', -- protocol-relative tested 2013-09-04
prefix = 'https://arxiv.org/abs/', -- protocol-relative tested 2013-09-04
encode = false,
encode = false,
COinS = 'info:arxiv',
COinS = 'info:arxiv',
Line 1,707: Line 1,866:
q = 'Q1753278',
q = 'Q1753278',
label = 'ASIN',
label = 'ASIN',
prefix = '//www.amazon.',
prefix = 'https://www.amazon.',
COinS = 'url',
COinS = 'url',
separator = '&nbsp;',
separator = '&nbsp;',
Line 1,730: Line 1,889:
q = 'Q19835482',
q = 'Q19835482',
label = 'bioRxiv',
label = 'bioRxiv',
prefix = '//doi.org/',
prefix = 'https://doi.org/',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
access = 'free', -- free to read
access = 'free', -- free to read
Line 1,742: Line 1,901:
q = 'Q2715061',
q = 'Q2715061',
label = 'CiteSeerX',
label = 'CiteSeerX',
prefix = '//citeseerx.ist.psu.edu/viewdoc/summary?doi=',
prefix = 'https://citeseerx.ist.psu.edu/viewdoc/summary?doi=',
COinS =  'pre', -- use prefix value
COinS =  'pre', -- use prefix value
access = 'free', -- free to read
access = 'free', -- free to read
Line 1,754: Line 1,913:
q = 'Q25670',
q = 'Q25670',
label = 'doi',
label = 'doi',
prefix = '//doi.org/',
prefix = 'https://doi.org/',
COinS = 'info:doi',
COinS = 'info:doi',
separator = ':',
separator = ':',
Line 1,766: Line 1,925:
q = 'Q46339674',
q = 'Q46339674',
label = 'eISSN',
label = 'eISSN',
prefix = '//www.worldcat.org/issn/',
prefix = 'https://www.worldcat.org/issn/',
COinS = 'rft.eissn',
COinS = 'rft.eissn',
encode = false,
encode = false,
Line 1,777: Line 1,936:
q = 'Q3126718',
q = 'Q3126718',
label = 'hdl',
label = 'hdl',
prefix = '//hdl.handle.net/',
prefix = 'https://hdl.handle.net/',
COinS = 'info:hdl',
COinS = 'info:hdl',
separator = ':',
separator = ':',
Line 1,784: Line 1,943:
},
},
['ISBN'] = { -- Used by InternetArchiveBot
['ISBN'] = { -- Used by InternetArchiveBot
parameters = {'isbn', 'ISBN', 'isbn13', 'ISBN13'},
parameters = {'isbn', 'ISBN'},
link = 'International Standard Book Number',
link = 'International Standard Book Number',
redirect = 'ISBN (identifier)',
redirect = 'ISBN (identifier)',
Line 1,809: Line 1,968:
q = 'Q131276',
q = 'Q131276',
label = 'ISSN',
label = 'ISSN',
prefix = '//www.worldcat.org/issn/',
prefix = 'https://www.worldcat.org/issn/',
COinS = 'rft.issn',
COinS = 'rft.issn',
encode = false,
encode = false,
Line 1,820: Line 1,979:
q = '',
q = '',
label = 'JFM',
label = 'JFM',
prefix = '//zbmath.org/?format=complete&q=an:',
prefix = 'https://zbmath.org/?format=complete&q=an:',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
Line 1,831: Line 1,990:
q = 'Q1420342',
q = 'Q1420342',
label = 'JSTOR',
label = 'JSTOR',
prefix = '//www.jstor.org/stable/', -- protocol-relative tested 2013-09-04
prefix = 'https://www.jstor.org/stable/', -- protocol-relative tested 2013-09-04
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = false,
encode = false,
Line 1,843: Line 2,002:
q = 'Q620946',
q = 'Q620946',
label = 'LCCN',
label = 'LCCN',
prefix = '//lccn.loc.gov/', -- protocol-relative tested 2015-12-28
prefix = 'https://lccn.loc.gov/', -- protocol-relative tested 2015-12-28
COinS = 'info:lccn',
COinS = 'info:lccn',
encode = false,
encode = false,
Line 1,854: Line 2,013:
q = 'Q211172',
q = 'Q211172',
label = 'MR',
label = 'MR',
prefix = '//www.ams.org/mathscinet-getitem?mr=', -- protocol-relative tested 2013-09-04
prefix = 'https://mathscinet.ams.org/mathscinet-getitem?mr=',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
Line 1,865: Line 2,024:
q = 'Q190593',
q = 'Q190593',
label = 'OCLC',
label = 'OCLC',
prefix = '//www.worldcat.org/oclc/',
prefix = 'https://www.worldcat.org/oclc/',
COinS = 'info:oclcnum',
COinS = 'info:oclcnum',
encode = true,
encode = true,
Line 1,877: Line 2,036:
q = 'Q1201876',
q = 'Q1201876',
label = 'OL',
label = 'OL',
prefix = '//openlibrary.org/',
prefix = 'https://openlibrary.org/',
COinS = 'url',
COinS = 'url',
separator = '&nbsp;',
separator = '&nbsp;',
Line 1,889: Line 2,048:
q = 'Q2015776',
q = 'Q2015776',
label = 'OSTI',
label = 'OSTI',
prefix = '//www.osti.gov/biblio/', -- protocol-relative tested 2018-09-12
prefix = 'https://www.osti.gov/biblio/', -- protocol-relative tested 2018-09-12
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 23000000,
id_limit = 23010000,
custom_access = 'osti-access',
custom_access = 'osti-access',
},
},
Line 1,902: Line 2,061:
q = 'Q229883',
q = 'Q229883',
label = 'PMC',
label = 'PMC',
prefix = '//www.ncbi.nlm.nih.gov/pmc/articles/PMC',
prefix = 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC',
suffix = '',
suffix = '',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 8500000,
id_limit = 10300000,
access = 'free', -- free to read
access = 'free', -- free to read
},
},
Line 1,916: Line 2,075:
q = 'Q2082879',
q = 'Q2082879',
label = 'PMID',
label = 'PMID',
prefix = '//pubmed.ncbi.nlm.nih.gov/',
prefix = 'https://pubmed.ncbi.nlm.nih.gov/',
COinS = 'info:pmid',
COinS = 'info:pmid',
encode = false,
encode = false,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 34900000,
id_limit = 37400000,
},
},
['RFC'] = {
['RFC'] = {
Line 1,928: Line 2,087:
q = 'Q212971',
q = 'Q212971',
label = 'RFC',
label = 'RFC',
prefix = '//tools.ietf.org/html/rfc',
prefix = 'https://tools.ietf.org/html/rfc',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = false,
encode = false,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 9000,
id_limit = 9300,
access = 'free', -- free to read
access = 'free', -- free to read
},
},
Line 1,950: Line 2,109:
q = 'Q7550801',
q = 'Q7550801',
label = 'SSRN',
label = 'SSRN',
prefix = '//ssrn.com/abstract=', -- protocol-relative tested 2013-09-04
prefix = 'https://papers.ssrn.com/sol3/papers.cfm?abstract_id=',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 4000000,
id_limit = 4500000,
access = 'free', -- always free to read
custom_access = 'ssrn-access',
},
},
['S2CID'] = {
['S2CID'] = {
Line 1,967: Line 2,126:
encode = false,
encode = false,
separator = '&nbsp;',
separator = '&nbsp;',
id_limit = 240000000,
id_limit = 260000000,
custom_access = 's2cid-access',
custom_access = 's2cid-access',
},
},
Line 1,987: Line 2,146:
q = 'Q190269',
q = 'Q190269',
label = 'Zbl',
label = 'Zbl',
prefix = '//zbmath.org/?format=complete&q=an:',
prefix = 'https://zbmath.org/?format=complete&q=an:',
COinS = 'pre', -- use prefix value
COinS = 'pre', -- use prefix value
encode = true,
encode = true,
Line 1,999: Line 2,158:


return {
return {
use_identifier_redirects = true, -- when true use redirect name for identifier label links
use_identifier_redirects = true, -- when true use redirect name for identifier label links; always true at en.wiki
local_lang_cat_enable = false; -- when true categorizes pages where |language=<local wiki's language>; always false at en.wiki
date_name_auto_xlate_enable = false; -- when true translates English month-names to the local-wiki's language month names; always false at en.wiki
date_digit_auto_xlate_enable = false; -- when true translates Western date digit to the local-wiki's language digits (date_names['local_digits']); always false at en.wiki
-- tables and variables created when this module is loaded
global_df = get_date_format (), -- this line can be replaced with "global_df = 'dmy-all'," to have all dates auto translated to dmy format.
punct_skip = build_skip_table (punct_skip, punct_meta_params),
url_skip = build_skip_table (url_skip, url_meta_params),


aliases = aliases,
aliases = aliases,
Line 2,008: Line 2,175:
editor_markup_patterns = editor_markup_patterns,
editor_markup_patterns = editor_markup_patterns,
et_al_patterns = et_al_patterns,
et_al_patterns = et_al_patterns,
global_df = global_df,
id_handlers = id_handlers,
id_handlers = id_handlers,
keywords_lists = keywords_lists,
keywords_lists = keywords_lists,
keywords_xlate = keywords_xlate,
keywords_xlate = keywords_xlate,
stripmarkers=stripmarkers,
stripmarkers = stripmarkers,
invisible_chars = invisible_chars,
invisible_chars = invisible_chars,
invisible_defs = invisible_defs,
invisible_defs = invisible_defs,
indic_script = indic_script,
indic_script = indic_script,
emoji = emoji,
emoji_t = emoji_t,
local_lang_cat_enable = local_lang_cat_enable,
maint_cats = maint_cats,
maint_cats = maint_cats,
messages = messages,
messages = messages,
presentation = presentation,
presentation = presentation,
prop_cats = prop_cats,
prop_cats = prop_cats,
punct_skip = punct_skip,
script_lang_codes = script_lang_codes,
script_lang_codes = script_lang_codes,
lang_code_remap = lang_code_remap,
lang_code_remap = lang_code_remap,
Line 2,028: Line 2,192:
this_wiki_code = this_wiki_code,
this_wiki_code = this_wiki_code,
title_types = title_types,
title_types = title_types,
uncategorized_namespaces = uncategorized_namespaces,
uncategorized_namespaces = uncategorized_namespaces_t,
uncategorized_subpages = uncategorized_subpages,
uncategorized_subpages = uncategorized_subpages,
templates_using_volume = templates_using_volume,
templates_using_volume = templates_using_volume,
Line 2,034: Line 2,198:
templates_not_using_page = templates_not_using_page,
templates_not_using_page = templates_not_using_page,
vol_iss_pg_patterns = vol_iss_pg_patterns,
vol_iss_pg_patterns = vol_iss_pg_patterns,
single_letter_2nd_lvl_domains_t = single_letter_2nd_lvl_domains_t,
inter_wiki_map = inter_wiki_map,
inter_wiki_map = inter_wiki_map,
languages = languages,
mw_languages_by_tag_t = mw_languages_by_tag_t,
mw_languages_by_name_t = mw_languages_by_name_t,
citation_class_map_t = citation_class_map_t,
 
citation_issue_t = citation_issue_t,
citation_no_volume_t = citation_no_volume_t,
}
}