Jump to content

Module:String2: Difference between revisions

m
1 revision imported
(sync from sandbox - handling UTF-8 in ucfirst)
 
m (1 revision imported)
(4 intermediate revisions by 3 users not shown)
Line 1: Line 1:
local p = {}
local p = {}


 
p.trim = function(frame)
p.upper = function(frame)
return mw.text.trim(frame.args[1] or "")
local s = mw.text.trim(frame.args[1] or "")
return string.upper(s)
end
 
p.lower = function(frame)
local s = mw.text.trim(frame.args[1] or "")
return string.lower(s)
end
end


p.sentence = function (frame )
p.sentence = function (frame)
frame.args[1] = string.lower(frame.args[1])
-- {{lc:}} is strip-marker safe, string.lower is not.
frame.args[1] = frame:callParserFunction('lc', frame.args[1])
return p.ucfirst(frame)
return p.ucfirst(frame)
end
end
Line 59: Line 53:
local words = mw.text.split( s, " ")
local words = mw.text.split( s, " ")
for i, s in ipairs(words) do
for i, s in ipairs(words) do
s = string.lower( s )
-- {{lc:}} is strip-marker safe, string.lower is not.
if( i > 1 and alwayslower[s] == 1) then
s = frame:callParserFunction('lc', s)
-- leave in lowercase
if i == 1 or alwayslower[s] ~= 1 then
else
s = mw.getContentLanguage():ucfirst(s)
s = mw.getContentLanguage():ucfirst(s)
end
end
Line 68: Line 61:
end
end
return table.concat(words, " ")
return table.concat(words, " ")
end
-- findlast finds the last item in a list
-- the first unnamed parameter is the list
-- the second, optional unnamed parameter is the list separator (default = comma space)
-- returns the whole list if separator not found
p.findlast = function(frame)
local s =  mw.text.trim( frame.args[1] or "" )
local sep = frame.args[2] or ""
if sep == "" then sep = ", " end
local pattern = ".*" .. sep .. "(.*)"
local a, b, last = s:find(pattern)
if a then
return last
else
return s
end
end
end


Line 93: Line 103:
-- any double quotes " are stripped out.
-- any double quotes " are stripped out.
p.posnq = function(frame)
p.posnq = function(frame)
local str = mw.text.trim(frame.args[1] or "")
local args = frame.args
local match = mw.text.trim(frame.args[2] or ""):gsub('"', '')
local pargs = frame:getParent().args
if str == "" or match == "" then return nil end
for k, v in pairs(pargs) do
args[k] = v
end
local str = mw.text.trim(args[1] or args.source or "")
local match = mw.text.trim(args[2] or args.target or ""):gsub('"', '')
if str == "" or match == "" then return nil end
local plain = mw.text.trim(args[3] or args.plain or "")
if plain == "false" then plain = false else plain = true end
local nomatch = mw.text.trim(args[4] or args.nomatch or "")
-- just take the start position
-- just take the start position
local pos = str:find(match, 1, true)
local pos = mw.ustring.find(str, match, 1, plain) or nomatch
return pos
return pos
end
end
Line 147: Line 165:
end
end


-- findpagetext returns the position of a piece of text in a page
-- First positional parameter or |text is the search text
-- Optional parameter |title is the page title, defaults to current page
-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search
-- Optional parameter |nomatch is the return value when no match is found; default is nil
p._findpagetext = function(args)
-- process parameters
local nomatch = args.nomatch or ""
if nomatch == "" then nomatch = nil end
--
local text = mw.text.trim(args[1] or args.text or "")
if text == "" then return nil end
--
local title = args.title or ""
local titleobj
if title == "" then
titleobj = mw.title.getCurrentTitle()
else
titleobj = mw.title.new(title)
end
--
local plain = args.plain or ""
if plain:sub(1, 1) == "f" then plain = false else plain = true end
-- get the page content and look for 'text' - return position or nomatch
local content = titleobj:getContent()
return mw.ustring.find(content, text, 1, plain) or nomatch -- returns multiple values
end
p.findpagetext = function(frame)
local args = frame.args
local pargs = frame:getParent().args
for k, v in pairs(pargs) do
args[k] = v
end
if not (args[1] or args.text) then return nil end
-- just the first value
return (p._findpagetext(args))
end
-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}
-- Type is:
-- QUERY decodes + to space (default)
-- PATH does no extra decoding
-- WIKI decodes _ to space
p._urldecode = function(url, type)
url = url or ""
type = (type == "PATH" or type == "WIKI") and type
return mw.uri.decode( url, type )
end
-- {{#invoke:String2|urldecode|url=url|type=type}}
p.urldecode = function(frame)
return mw.uri.decode( frame.args.url, frame.args.type )
end
-- what follows was merged from Module:StringFunc
-- helper functions
p._GetParameters = require('Module:GetParameters')
-- Argument list helper function, as per Module:String
p._getParameters = p._GetParameters.getParameters
-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:String
function p._escapePattern( pattern_str)
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" );
end
-- Helper Function to interpret boolean strings, as per Module:String
p._getBoolean = p._GetParameters.getBoolean
--[[
Strip
This function Strips characters from string
Usage:
{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}}
Parameters
source: The string to strip
chars:  The pattern or list of characters to strip from string, replaced with ''
plain:  A flag indicating that the chars should be understood as plain text. defaults to true.
Leading and trailing whitespace is also automatically stripped from the string.
]]
function p.strip( frame )
local new_args = p._getParameters( frame.args,  {'source', 'chars', 'plain'} )
local source_str = new_args['source'] or '';
local chars = new_args['chars'] or '' or 'characters';
source_str = mw.text.trim(source_str);
if source_str == '' or chars == '' then
return source_str;
end
local l_plain = p._getBoolean( new_args['plain'] or true );
if l_plain then
chars = p._escapePattern( chars );
end
local result;
result = mw.ustring.gsub(source_str, "["..chars.."]", '')
return result;
end
--[[
Match any
Returns the index of the first given pattern to match the input. Patterns must be consecutively numbered.
Returns the empty string if nothing matches for use in {{#if:}}
Usage:
{{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.
Parameters:
source: the string to search
plain:  A flag indicating that the patterns should be understood as plain text. defaults to true.
1, 2, 3, ...: the patterns to search for
]]
function p.matchAny(frame)
local source_str = frame.args['source'] or error('The source parameter is mandatory.')
local l_plain = p._getBoolean( frame.args['plain'] or true )
for i = 1, math.huge do
local pattern = frame.args[i]
if not pattern then return '' end
if mw.ustring.find(source_str, pattern, 1, l_plain) then
return tostring(i)
end
end
end
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------
Converts a hyphen to a dash under certain conditions.  The hyphen must separate
like items; unlike items are returned unmodified.  These forms are modified:
letter - letter (A - B)
digit - digit (4-5)
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
letterdigit - letterdigit (A1-A5) (an optional separator between letter and
digit is supported – a.1-a.5 or a-1-a-5)
digitletter - digitletter (5a - 5d) (an optional separator between letter and
digit is supported – 5.a-5.d or 5-a-5-d)
any other forms are returned unmodified.
str may be a comma- or semicolon-separated list
]]
function p.hyphen_to_dash( str )
if (str == nil or str == '') then
return str;
end
local accept;
str = mw.text.decode(str, true ) -- replace html entities with their characters; semicolon mucks up the text.split
local out = {};
local list = mw.text.split (str, '%s*[,;]%s*'); -- split str at comma or semicolon separators if there are any
for _, item in ipairs (list) do -- for each item in the list
item = mw.text.trim(item) -- trim whitespace
item, accept = item:gsub ('^%(%((.+)%)%)$', '%1');
if accept == 0 and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators
if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit)
item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter)
item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter
item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2'); -- replace hyphen, remove extraneous space characters
else
item = mw.ustring.gsub (item, '%s*[–—]%s*', '–'); -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace
end
end
table.insert (out, item); -- add the (possibly modified) item to the output table
end
local temp_str = table.concat (out, ', '); -- concatenate the output table into a comma separated string
temp_str, accept = temp_str:gsub ('^%(%((.+)%)%)$', '%1'); -- remove accept-this-as-written markup when it wraps all of concatenated out
if accept ~= 0 then
temp_str = str:gsub ('^%(%((.+)%)%)$', '%1'); -- when global markup removed, return original str; do it this way to suppress boolean second return value
return temp_str;
else
return temp_str; -- else, return assembled temp_str
end
end
function p.hyphen2dash( frame )
local str = frame.args[1] or ''
return p.hyphen_to_dash(str)
end


return p
return p