Module:String2: Difference between revisions
m
1 revision imported
(sync from sandbox - handling UTF-8 in ucfirst) |
Johnrdorazio (talk | contribs) m (1 revision imported) |
||
(4 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
local p = {} | local p = {} | ||
p.trim = function(frame) | |||
p. | return mw.text.trim(frame.args[1] or "") | ||
return | |||
end | end | ||
p.sentence = function (frame ) | p.sentence = function (frame) | ||
frame.args[1] = | -- {{lc:}} is strip-marker safe, string.lower is not. | ||
frame.args[1] = frame:callParserFunction('lc', frame.args[1]) | |||
return p.ucfirst(frame) | return p.ucfirst(frame) | ||
end | end | ||
Line 59: | Line 53: | ||
local words = mw.text.split( s, " ") | local words = mw.text.split( s, " ") | ||
for i, s in ipairs(words) do | for i, s in ipairs(words) do | ||
-- {{lc:}} is strip-marker safe, string.lower is not. | |||
if | s = frame:callParserFunction('lc', s) | ||
if i == 1 or alwayslower[s] ~= 1 then | |||
s = mw.getContentLanguage():ucfirst(s) | s = mw.getContentLanguage():ucfirst(s) | ||
end | end | ||
Line 68: | Line 61: | ||
end | end | ||
return table.concat(words, " ") | return table.concat(words, " ") | ||
end | |||
-- findlast finds the last item in a list | |||
-- the first unnamed parameter is the list | |||
-- the second, optional unnamed parameter is the list separator (default = comma space) | |||
-- returns the whole list if separator not found | |||
p.findlast = function(frame) | |||
local s = mw.text.trim( frame.args[1] or "" ) | |||
local sep = frame.args[2] or "" | |||
if sep == "" then sep = ", " end | |||
local pattern = ".*" .. sep .. "(.*)" | |||
local a, b, last = s:find(pattern) | |||
if a then | |||
return last | |||
else | |||
return s | |||
end | |||
end | end | ||
Line 93: | Line 103: | ||
-- any double quotes " are stripped out. | -- any double quotes " are stripped out. | ||
p.posnq = function(frame) | p.posnq = function(frame) | ||
local str = mw.text.trim( | local args = frame.args | ||
local match = mw.text.trim( | local pargs = frame:getParent().args | ||
if | for k, v in pairs(pargs) do | ||
args[k] = v | |||
end | |||
local str = mw.text.trim(args[1] or args.source or "") | |||
local match = mw.text.trim(args[2] or args.target or ""):gsub('"', '') | |||
if str == "" or match == "" then return nil end | |||
local plain = mw.text.trim(args[3] or args.plain or "") | |||
if plain == "false" then plain = false else plain = true end | |||
local nomatch = mw.text.trim(args[4] or args.nomatch or "") | |||
-- just take the start position | -- just take the start position | ||
local pos = | local pos = mw.ustring.find(str, match, 1, plain) or nomatch | ||
return pos | return pos | ||
end | end | ||
Line 147: | Line 165: | ||
end | end | ||
-- findpagetext returns the position of a piece of text in a page | |||
-- First positional parameter or |text is the search text | |||
-- Optional parameter |title is the page title, defaults to current page | |||
-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search | |||
-- Optional parameter |nomatch is the return value when no match is found; default is nil | |||
p._findpagetext = function(args) | |||
-- process parameters | |||
local nomatch = args.nomatch or "" | |||
if nomatch == "" then nomatch = nil end | |||
-- | |||
local text = mw.text.trim(args[1] or args.text or "") | |||
if text == "" then return nil end | |||
-- | |||
local title = args.title or "" | |||
local titleobj | |||
if title == "" then | |||
titleobj = mw.title.getCurrentTitle() | |||
else | |||
titleobj = mw.title.new(title) | |||
end | |||
-- | |||
local plain = args.plain or "" | |||
if plain:sub(1, 1) == "f" then plain = false else plain = true end | |||
-- get the page content and look for 'text' - return position or nomatch | |||
local content = titleobj:getContent() | |||
return mw.ustring.find(content, text, 1, plain) or nomatch -- returns multiple values | |||
end | |||
p.findpagetext = function(frame) | |||
local args = frame.args | |||
local pargs = frame:getParent().args | |||
for k, v in pairs(pargs) do | |||
args[k] = v | |||
end | |||
if not (args[1] or args.text) then return nil end | |||
-- just the first value | |||
return (p._findpagetext(args)) | |||
end | |||
-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}} | |||
-- Type is: | |||
-- QUERY decodes + to space (default) | |||
-- PATH does no extra decoding | |||
-- WIKI decodes _ to space | |||
p._urldecode = function(url, type) | |||
url = url or "" | |||
type = (type == "PATH" or type == "WIKI") and type | |||
return mw.uri.decode( url, type ) | |||
end | |||
-- {{#invoke:String2|urldecode|url=url|type=type}} | |||
p.urldecode = function(frame) | |||
return mw.uri.decode( frame.args.url, frame.args.type ) | |||
end | |||
-- what follows was merged from Module:StringFunc | |||
-- helper functions | |||
p._GetParameters = require('Module:GetParameters') | |||
-- Argument list helper function, as per Module:String | |||
p._getParameters = p._GetParameters.getParameters | |||
-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String | |||
function p._escapePattern( pattern_str) | |||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); | |||
end | |||
-- Helper Function to interpret boolean strings, as per Module:String | |||
p._getBoolean = p._GetParameters.getBoolean | |||
--[[ | |||
Strip | |||
This function Strips characters from string | |||
Usage: | |||
{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}} | |||
Parameters | |||
source: The string to strip | |||
chars: The pattern or list of characters to strip from string, replaced with '' | |||
plain: A flag indicating that the chars should be understood as plain text. defaults to true. | |||
Leading and trailing whitespace is also automatically stripped from the string. | |||
]] | |||
function p.strip( frame ) | |||
local new_args = p._getParameters( frame.args, {'source', 'chars', 'plain'} ) | |||
local source_str = new_args['source'] or ''; | |||
local chars = new_args['chars'] or '' or 'characters'; | |||
source_str = mw.text.trim(source_str); | |||
if source_str == '' or chars == '' then | |||
return source_str; | |||
end | |||
local l_plain = p._getBoolean( new_args['plain'] or true ); | |||
if l_plain then | |||
chars = p._escapePattern( chars ); | |||
end | |||
local result; | |||
result = mw.ustring.gsub(source_str, "["..chars.."]", '') | |||
return result; | |||
end | |||
--[[ | |||
Match any | |||
Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered. | |||
Returns the empty string if nothing matches for use in {{#if:}} | |||
Usage: | |||
{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'. | |||
Parameters: | |||
source: the string to search | |||
plain: A flag indicating that the patterns should be understood as plain text. defaults to true. | |||
1, 2, 3, ...: the patterns to search for | |||
]] | |||
function p.matchAny(frame) | |||
local source_str = frame.args['source'] or error('The source parameter is mandatory.') | |||
local l_plain = p._getBoolean( frame.args['plain'] or true ) | |||
for i = 1, math.huge do | |||
local pattern = frame.args[i] | |||
if not pattern then return '' end | |||
if mw.ustring.find(source_str, pattern, 1, l_plain) then | |||
return tostring(i) | |||
end | |||
end | |||
end | |||
--[[--------------------------< H Y P H E N _ T O _ D A S H >-------------------------------------------------- | |||
Converts a hyphen to a dash under certain conditions. The hyphen must separate | |||
like items; unlike items are returned unmodified. These forms are modified: | |||
letter - letter (A - B) | |||
digit - digit (4-5) | |||
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5) | |||
letterdigit - letterdigit (A1-A5) (an optional separator between letter and | |||
digit is supported – a.1-a.5 or a-1-a-5) | |||
digitletter - digitletter (5a - 5d) (an optional separator between letter and | |||
digit is supported – 5.a-5.d or 5-a-5-d) | |||
any other forms are returned unmodified. | |||
str may be a comma- or semicolon-separated list | |||
]] | |||
function p.hyphen_to_dash( str ) | |||
if (str == nil or str == '') then | |||
return str; | |||
end | |||
local accept; | |||
str = mw.text.decode(str, true ) -- replace html entities with their characters; semicolon mucks up the text.split | |||
local out = {}; | |||
local list = mw.text.split (str, '%s*[,;]%s*'); -- split str at comma or semicolon separators if there are any | |||
for _, item in ipairs (list) do -- for each item in the list | |||
item = mw.text.trim(item) -- trim whitespace | |||
item, accept = item:gsub ('^%(%((.+)%)%)$', '%1'); | |||
if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators | |||
if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit) | |||
item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter) | |||
item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit | |||
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit | |||
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter | |||
item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2'); -- replace hyphen, remove extraneous space characters | |||
else | |||
item = mw.ustring.gsub (item, '%s*[–—]%s*', '–'); -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace | |||
end | |||
end | |||
table.insert (out, item); -- add the (possibly modified) item to the output table | |||
end | |||
local temp_str = table.concat (out, ', '); -- concatenate the output table into a comma separated string | |||
temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1'); -- remove accept-this-as-written markup when it wraps all of concatenated out | |||
if accept ~= 0 then | |||
temp_str = str:gsub ('^%(%((.+)%)%)$', '%1'); -- when global markup removed, return original str; do it this way to suppress boolean second return value | |||
return temp_str; | |||
else | |||
return temp_str; -- else, return assembled temp_str | |||
end | |||
end | |||
function p.hyphen2dash( frame ) | |||
local str = frame.args[1] or '' | |||
return p.hyphen_to_dash(str) | |||
end | |||
return p | return p |