Module:Citation/CS1/Date validation: Difference between revisions

better;
m (1 revision imported)
(better;)
Line 3: Line 3:
]]
]]


local add_prop_cat, is_set, in_array, wrap_style; -- imported functions from selected Module:Citation/CS1/Utilities
local add_prop_cat, is_set, in_array, set_message, substitute, wrap_style; -- imported functions from selected Module:Citation/CS1/Utilities
local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration
local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration


Line 44: Line 44:
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison;
access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison;
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
else
else
Line 85: Line 85:


local function get_month_number (month)
local function get_month_number (month)
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names
0; -- not a recognized month name
0; -- not a recognized month name
end
end


Line 102: Line 102:
which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
numbers 21-41.  cs1|2 only supports generic seasons.  EDTF does support the distinction between north and south
numbers 21-41.  cs1|2 only supports generic seasons.  EDTF does support the distinction between north and south
hemispere seasons but cs1|2 has no way to make that distinction.
hemisphere seasons but cs1|2 has no way to make that distinction.


These additional divisions not currently supported:
These additional divisions not currently supported:
Line 228: Line 228:
month = tonumber(month); -- required for YYYY-MM-DD dates
month = tonumber(month); -- required for YYYY-MM-DD dates
if (2 == month) then -- if February
if (2 == month) then -- if February
month_length = 28; -- then 28 days unless
month_length = 28; -- then 28 days unless
if 1582 > tonumber(year) then -- Julian calendar
if 1582 > tonumber(year) then -- Julian calendar
if 0 == (year%4) then -- is a leap year?
if 0 == (year%4) then -- is a leap year?
month_length = 29; -- if leap year then 29 days in February
month_length = 29; -- if leap year then 29 days in February
end
end
else -- Gregorian calendar
else -- Gregorian calendar
if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then -- is a leap year?
if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then -- is a leap year?
month_length = 29; -- if leap year then 29 days in February
month_length = 29; -- if leap year then 29 days in February
end
end
Line 299: Line 299:
-- here when range_start is a month
-- here when range_start is a month
range_end_number = get_month_number (range_end); -- get end month number
range_end_number = get_month_number (range_end); -- get end month number
if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end?
if range_start_number < range_end_number and -- range_start is a month; does range_start precede range_end?
if is_valid_month_range_style (range_start, range_end) then -- do months have the same style?
is_valid_month_range_style (range_start, range_end) then -- do months have the same style?
return true; -- proper order and same style
return true; -- proper order and same style
end
end
end
return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
Line 429: Line 428:
['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'},
['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'},
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
-- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
-- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
-- day-range-initial: day–day month year; days are separated by endash
-- day-range-initial: day–day month year; days are separated by endash
['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'},
['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'},
Line 454: Line 453:
['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash
['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash
['ymx'] = {'^(%d%d%d%d)%-(%d%d)%-XX$', 'y', 'm'}, -- edtf year-initial numerical year-month-XX
['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY
['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY
}
}
Line 482: Line 482:
local function check_date (date_string, param, tCOinS_date)
local function check_date (date_string, param, tCOinS_date)
local year; -- assume that year2, months, and days are not used;
local year; -- assume that year2, months, and days are not used;
local year2 = 0; -- second year in a year range
local year2 = 0; -- second year in a year range
local month = 0;
local month = 0;
local month2 = 0; -- second month in a month range
local month2 = 0; -- second month in a month range
local day = 0;
local day = 0;
local day2 = 0; -- second day in a day range
local day2 = 0; -- second day in a day range
Line 495: Line 495:
anchor_year = year;
anchor_year = year;
elseif date_string:match (patterns['ymx'][1]) then -- year-initial numerical year month edtf format
year, month = date_string:match (patterns['ymx'][1]);
if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or not is_valid_year(year) then return false; end -- month number not valid or not Gregorian calendar or future year
anchor_year = year;
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year
month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]);
month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]);
Line 562: Line 567:
month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]);
month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]);
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
year2 = century..year2; -- add the century to year2 for comparisons
year2 = century..year2; -- add the century to year2 for comparisons
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
Line 578: Line 583:
elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash
month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]);
month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]);
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
Line 612: Line 617:
elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]);
year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]);
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
Line 619: Line 624:
local century;
local century;
year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]);
year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]);
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years


if in_array (param, {'date', 'publication-date', 'year'}) then
if in_array (param, {'date', 'publication-date', 'year'}) then
Line 630: Line 635:
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year


elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY
elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY
anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]);
anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]);
if false == is_valid_year(year) then
if false == is_valid_year(year) then
Line 737: Line 742:
--[[--------------------------< Y E A R _ D A T E _ C H E C K >------------------------------------------------
--[[--------------------------< Y E A R _ D A T E _ C H E C K >------------------------------------------------


Compare the value provided in |year= with the year value(s) provided in |date=.  This function returns a numeric value:
Compare the value provided in |year= with the year value(s) provided in |date=.  This function sets a local numeric value:
0 - year value does not match the year value in date
0 - year value does not match the year value in date
1 - (default) year value matches the year value in date or one of the year values when date contains two years
1 - (default) year value matches the year value in date or one of the year values when date contains two years
2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx)
2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx)
the numernic value in <result> determines the 'output' if any from this function:
0 – adds error message to error_list sequence table
1 – adds maint cat
2 – does nothing


]]
]]


local function year_date_check (year_string, date_string)
local function year_date_check (year_string, year_origin, date_string, date_origin, error_list)
local year;
local year;
local date1;
local date1;
local date2;
local date2;
local result = 1; -- result of the test; assume that the test passes
local result = 1; -- result of the test; assume that the test passes
 
year = year_string:match ('(%d%d%d%d?)');
year = year_string:match ('(%d%d%d%d?)');


Line 780: Line 790:
result = 0;
result = 0;
end
end
else
else -- should never get here; this function called only when no other date errors
result = 0; -- no recognizable year in date
result = 0; -- no recognizable year in date
end
end
return result;
 
if 0 == result then -- year / date mismatch
table.insert (error_list, substitute (cfg.messages['mismatch'], {year_origin, date_origin})); -- add error message to error_list sequence table
elseif 1 == result then -- redundant year / date
set_message ('maint_date_year'); -- add a maint cat
end
end
end


Line 819: Line 834:
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
},
},
['Mdy'] = { -- date format is Mdy; reformat to:
['Mdy'] = { -- date format is Mdy; reformat to:
Line 825: Line 840:
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
},
},
['dMy'] = { -- date format is dMy; reformat to:
['dMy'] = { -- date format is dMy; reformat to:
Line 831: Line 846:
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki
},
},
['Md-dy'] = { -- date format is Md-dy; reformat to:
['Md-dy'] = { -- date format is Md-dy; reformat to:
Line 866: Line 881:
['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic
['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic
},
},
-- ['yMd'] = { -- not supported at en.wiki
-- ['yMd'] = { -- not supported at en.wiki
-- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
-- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy
-- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
-- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy
-- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
-- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd
-- },
-- },
}
}


Line 887: Line 902:
end
end


-- yMd is not supported at en.wiki
-- yMd is not supported at en.wiki; if yMd is supported at your wiki, uncomment the next line
-- if yMd is supported at your wiki, uncomment the next line
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki
-- if yMd is supported at your wiki, remove or comment-out the next line
-- if yMd is supported at your wiki, remove or comment-out the next line
if 'yMd' == format_param then -- yMd not supported at en.wiki
if 'yMd' == format_param then -- yMd not supported at en.wiki
Line 920: Line 934:
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present
elseif 'ymd' == format_param then -- when converting to ymd
elseif 'ymd' == format_param then -- when converting to ymd
if 1582 > tonumber(t.y) then -- ymd format dates not allowed before 1582
t.y = t.y:gsub ('%a', ''); -- strip CITREF disambiguator if present; anchor year already known so process can proceed
if 1582 > tonumber (t.y) then -- ymd format dates not allowed before 1582
return;
return;
end
end
Line 1,034: Line 1,049:
local n;
local n;
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
if is_set (param_val.val) then
if is_set (param_val.val) and
if not mw.ustring.match (param_val.val, '%d%d%d%d%-%d%d%-%d%d') then -- for those that are not ymd dates (ustring because here digits may not be Western)
not mw.ustring.match (param_val.val, patterns.ymd[1]) then -- for those that are not ymd dates (ustring because here digits may not be Western)
param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash
param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash
if 0 ~= n then
if 0 ~= n then
Line 1,041: Line 1,056:
result = true;
result = true;
end
end
end
end
end
end
end
return result; -- so we know if any hyphens were replaced
return result; -- so we know if any hyphens were replaced
end
--[[--------------------------< E D T F _ T R A N S F O R M >--------------------------------------------------
Loops through the list of date-holding parameters and converts any EDTF formatted dates to MOS compliant dates.
Only YYY-MM-XX supported at this time. Not called if the cs1|2 template has any date errors.
must be done before reformat_dates() and before date_hyphen_to_dash()
Modifies the date_parameters_list and returns true if transformation is performed, else returns false.
]]
local function edtf_transform (date_parameters_list)
local result = false;
local source_date = {};
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list
if is_set(param_val.val) and param_val.val:match (patterns.ymx[1]) then -- if parameter is set and is an EDTF dates
source_date.year, source_date.month = param_val.val:match (patterns.ymx[1]); -- get year and month number
source_date.day = 1; -- required by os.time()
date_parameters_list[param_name].val = mw.text.trim (os.date ('%B %Y', os.time (source_date)));
result = true;
end
end
return result; -- so we know if a transform was done
end
end


Line 1,093: Line 1,134:
return modified;
return modified;
end
end




Line 1,106: Line 1,147:
is_set = utilities_page_ptr.is_set;
is_set = utilities_page_ptr.is_set;
in_array = utilities_page_ptr.in_array;
in_array = utilities_page_ptr.in_array;
-- set_message = utilities_page_ptr.set_message;
set_message = utilities_page_ptr.set_message;
substitute = utilities_page_ptr.substitute;
wrap_style = utilities_page_ptr.wrap_style;
wrap_style = utilities_page_ptr.wrap_style;


Line 1,122: Line 1,164:
date_hyphen_to_dash = date_hyphen_to_dash,
date_hyphen_to_dash = date_hyphen_to_dash,
date_name_xlate = date_name_xlate,
date_name_xlate = date_name_xlate,
edtf_transform = edtf_transform,
set_selected_modules = set_selected_modules
set_selected_modules = set_selected_modules
}
}
Anonymous user