Module:D'ni Time/surface

From Guild of Archivists

Documentation for this module may be created at Module:D'ni Time/surface/doc

local p = {}

local args = {}
local origArgs
local root


local function displaySurfaceTime(surface)
-- Return surfaceTime (table) as string of table

   return '{'..'year = '..surface['year']..', month = '..surface['month']..', day = '..surface['day']..', hour = '..surface['hour']..', min = '..surface['min']..', sec = '..surface['sec']..', isdst = '..tostring(surface['isdst'])..'}'
end


-- The dateparse module
-- Copyright (c) 2011-2015 iNTERFACEWARE Inc. ALL RIGHTS RESERVED
-- iNTERFACEWARE permits you to use, modify, and distribute this file in accordance
-- with the terms of the iNTERFACEWARE license agreement accompanying the software
-- in which it is used.
-- http://help.interfaceware.com/code/details/dateparse-lua
 
local wdays = { 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
                'Saturday', 'Sunday' }
 
local months = { 'January', 'February', 'March', 'April', 'May', 'June', 'July',
                 'August', 'September', 'October', 'November', 'December' }
 
local wdays_by_name, months_by_name = {}
if true then
   local function index_by_name(array)
      local dict = {}
      for i,name in pairs(array) do
         name = name:lower()
         dict[name] = i
         dict[name:sub(1,3)] = i  -- Abbrev.
      end
      return dict
   end
   wdays_by_name  = index_by_name(wdays)
   months_by_name = index_by_name(months)
end
 
-- Validate week-day names and abbreviations.
local function lookup_wday(s)
   local wday = wdays_by_name[s:lower()]
   if not wday then error('expected week-day, got "'..s..'"') end
   return wday
end
 
-- Translate month names and abbreviations to numbers.  E.g., Jan -> 1.
local function lookup_month(s)
   local month = months_by_name[s:lower()]
   if not month then error('expected month, got "'..s..'"') end
   return month
end
 
-- If we find PM (or P), we need to adjust the hour that was read.
local function fix_hour(AM,PM)
   return function(s,d)
             if s:upper() == PM then
                if d.hour ~= 12 then d.hour = d.hour + 12 end
             elseif s:upper() == AM then
                if d.hour == 12 then d.hour = 0 end
             else
                error('expected '..AM..' or '..PM..', got "'..s..'"')
             end
          end
end
 
-- Time zone information can be parsed and stored in the date/time
-- table.  It is not used to adjust the time value returned.
local known_tzs = {
   ACDT='+10:30', ACST='+09:30', ACT ='+08:00', ADT  ='-03:00', AEDT ='+11:00',
   AEST='+10:00', AFT ='+04:30', AKDT='-08:00', AKST ='-09:00', AMST ='+05:00',
   AMT ='+04:00', ART ='-03:00', AST ='+03:00', AST  ='+04:00', AST  ='+03:00',
   AST ='-04:00', AWDT='+09:00', AWST='+08:00', AZOST='-01:00', AZT  ='+04:00',
   BDT ='+08:00', BIOT='+06:00', BIT ='-12:00', BOT  ='-04:00', BRT  ='-03:00',
   BST ='+06:00', BST ='+01:00', BTT ='+06:00', CAT  ='+02:00', CCT  ='+06:30',
   CDT ='-05:00', CEDT='+02:00', CEST='+02:00', CET  ='+01:00', CHAST='+12:45',
   CIST='-08:00', CKT ='-10:00', CLST='-03:00', CLT  ='-04:00', COST ='-04:00',
   COT ='-05:00', CST ='-06:00', CST ='+08:00', CVT  ='-01:00', CXT  ='+07:00',
   CHST='+10:00', DFT ='+01:00', EAST='-06:00', EAT  ='+03:00', ECT  ='-04:00',
   ECT ='-05:00', EDT ='-04:00', EEDT='+03:00', EEST ='+03:00', EET  ='+02:00',
   EST ='-05:00', FJT ='+12:00', FKST='-03:00', FKT  ='-04:00', GALT ='-06:00',
   GET ='+04:00', GFT ='-03:00', GILT='+12:00', GIT  ='-09:00', GMT  ='+00:00',
   GST ='-02:00', GYT ='-04:00', HADT='-09:00', HAST ='-10:00', HKT  ='+08:00',
   HMT ='+05:00', HST ='-10:00', IRKT='+08:00', IRST ='+03:30', IST  ='+05:30',
   IST ='+01:00', IST ='+02:00', JST ='+09:00', KRAT ='+07:00', KST  ='+09:00',
   LHST='+10:30', LINT='+14:00', MAGT='+11:00', MDT  ='-06:00', MIT  ='-09:30',
   MSD ='+04:00', MSK ='+03:00', MST ='+08:00', MST  ='-07:00', MST  ='+06:30',
   MUT ='+04:00', NDT ='-02:30', NFT ='+11:30', NPT  ='+05:45', NST  ='-03:30',
   NT  ='-03:30', OMST='+06:00', PDT ='-07:00', PETT ='+12:00', PHOT ='+13:00',
   PKT ='+05:00', PST ='-08:00', PST ='+08:00', RET  ='+04:00', SAMT ='+04:00',
   SAST='+02:00', SBT ='+11:00', SCT ='+04:00', SLT  ='+05:30', SST  ='-11:00',
   SST ='+08:00', TAHT='-10:00', THA ='+07:00', UTC  ='+00:00', UYST ='-02:00',
   UYT ='-03:00', VET ='-04:30', VLAT='+10:00', WAT  ='+01:00', WEDT ='+01:00',
   WEST='+01:00', WET ='+00:00', YAKT='+09:00', YEKT ='+05:00',
   -- US Millitary (for RFC-822)
   Z='+00:00', A='-01:00', M='-12:00', N='+01:00', Y='+12:00',
}
 
-- Compute the tz_offset in minute given (+/-)HH:MM or (+/-)HHMM.
local function parse_tz_offset(s,d)
   local sign, hour, min = s:match('([-+])(%d%d):?(%d%d)')
   d.tz = 'UTC'..s:gsub('([-+]%d%d):?(%d%d)', '%1:%2')
   return (d.tz_offset or 0) + (sign .. (hour*60 + min))
end
 
-- Set tz_offset given a time zone name.
local function parse_tz(s,d)
   local offset = known_tzs[s:upper()]
   if not offset then error('expected time zone, got "'..s..'"') end
   d.tz_offset = parse_tz_offset(offset,d)
   return s:upper()
end
 
-- HL7 timestamps can specify very accurate time values, up to one
-- tenth of a millisecond (four decimal points).
local function parse_sec_fraction(s)
   return tonumber('.'..s)
end
 
-- We do not want to pass the date table to tonumber(), just the string.
local function parseint(s)
   return tonumber(s)
end
 
-- The known date/time format codes.  The default action is
-- parseint(), since most values are just integers, exactly as we need
-- them.
local fmt_details = {
   yy   = { '%d%d', 'year',
            function(s)
               local year = tonumber(s) + 1900
               if year < 1969 then  -- POSIX
                  year = year + 100
               end
               return year
            end };
   yyyy = { '%d%d%d%d', 'year' };
   m    = { '%d+',    'month' };
   mm   = { '%d%d',   'month' };
   mmm  = { '%a%a%a', 'month', lookup_month }; -- Abbrev month name.
   mmmm = { '%a+',    'month', lookup_month }; -- Abbrev or full month.
   d    = { '%d+',  'day' };
   dd   = { '%d%d', 'day' };
   ddd  = { '%a%a%a', 'wday', lookup_wday };
   dddd = { '%a+',    'wday', lookup_wday };
   H    = { '%d+',  'hour' };
   HH   = { '%d%d', 'hour' };
   M    = { '%d+',  'min' };
   MM   = { '%d%d', 'min' };
   S    = { '%d+',  'sec' };
   SS   = { '%d%d', 'sec' };
   ssss = { '%d+',  'sec_fraction', parse_sec_fraction };
   t    = { '%a',   'A or P',   fix_hour('A', 'P' ) };
   tt   = { '%a%a', 'AM or PM', fix_hour('AM','PM') };
   zzzz = { '[-+]%d%d:?%d%d', 'tz_offset', parse_tz_offset };
   ZZZ  = { '%a+',            'tz',        parse_tz };
   [' '] = { '%s*', 'whitespace' }; -- Allow omission.
   [','] = { '%s*,?', 'a comma' };  -- Allow omission and leading whitespace.
   w     = { '%a+', 'a word' };     -- Value ignored.
   n     = { '%d+', 'a number' };   -- Value ignored.
}
 
-- Splits one part of a format string off; returns that and the rest.
local function split_fmt(fmt)
   local c = fmt:match('^(%a)')
   if c then
      return fmt:match('^('..c..'+)(.*)')
   elseif #fmt > 0 then
      return fmt:sub(1,1), fmt:sub(2)
   end
end
 
-- Parses the string, s, according to the format, fmt.
local function parse_date(s, fmt)
   local matched, d = '', {year=1969,day=1,month=1,hour=0,min=0,sec=0}
   local function fail(what, pattern)
      if pattern then what = what..' ('..pattern..')' end
      if matched ~= '' then what = what..' after "'..matched..'"' end
      error('expected '..what..', got "'..s..'"')
   end
   while fmt ~= '' do
      local head_fmt, rest_fmt = split_fmt(fmt)
      local pattern, field, fun = unpack(fmt_details[head_fmt] or {})
      local part, rest
      if pattern then
         part, rest = s:match('^('..pattern..')(.*)')
         if not part then fail(field,head_fmt) end
         d[field] = (fun or parseint)(part,d)
         matched = matched .. part
      elseif head_fmt:find('^%a') then
         error('unknown date/time pattern: '..head_fmt)
      elseif s:sub(1,#head_fmt) ~= head_fmt then
         fail('"'..head_fmt..'"')
      else
         matched = matched .. s:sub(1,#head_fmt)
         rest = s:sub(#head_fmt + 1)
      end
      s, fmt = rest, rest_fmt
   end
   if s ~= '' then fail('nothing') end
   return d
end
 
-- Expands all valid combinations of a string with optional areas
-- denoted by brackets.  E.g., "a[b]" expands to { "ab", "a" }.
local function expand_fmt(s, out)
   if not out then out = {} end
   local function add_fmt(s)
      local i, j = s:find('%b[]')
      if i then
         add_fmt(s:sub(1,i-1)..s:sub(i+1,j-1)..s:sub(j+1))
         add_fmt(s:sub(1,i-1)        ..        s:sub(j+1))
      else
         out[#out+1] = s
      end
   end
   add_fmt(s)
   return out
end
 
-- Date/time formats for the fuzzy parser.  These patterns must be
-- structurally unambiguous.  E.g., "mm/dd/yy" will match everything
-- that "yy/mm/dd" would, including "77/10/20".  This is intentional
-- as numerical differentiation is problematic.  E.g., which pattern
-- would match "02/03/04"?
local known_fmts = {}
if true then
   local templates = {
      -- HL7 Standard
      'yyyy[mm[dd[HHMM[SS[.ssss]]]]][zzzz]',
      -- Common formats (US)
      'm/d/yy[yy][ H:MM[:SS][ tt][ ZZZ]]',
      'yyyy-m-d[ w][ H:MM[:SS][ tt][ ZZZ]]',
      '[dddd, ]mmmm d[w], yyyy[ H:MM[:SS][ tt][ ZZZ]]',
      'H:MM[:SS][ tt][ ZZZ][, dddd], mmmm d[w], yyyy',
      -- Internet Standards (relaxed; RFC-822 and RFC-1123)
      '[dddd, ]d[w] mmmm yy[yy][ HH:MM[:SS][ tt][ ZZZ]]',
      '[ddd, ]dd mmm yy[yy] HH:MM:SS zzzz',
      -- The os.date('%c') Format
      '[dddd, ]mmmm [d]d, H:MM[:SS][ tt] yyyy',
      -- Other common formats
      'd-mmmm-yy[yy][ H:MM[:SS][ tt][ ZZZ]]',
   }
   for _,s in pairs(templates) do
      expand_fmt(s, known_fmts)
   end
end
 
-- The fuzzy date/time parser.  We try all the patterns we know, until
-- we find one that matches the given string.
local function fuzzy_parse(s)
   for _,fmt in pairs(known_fmts) do
      local ok, result = pcall(parse_date, s, fmt)
      if ok then
         return result
      end
   end
   error('unknown date/time: '..s, 3)
end
 
-- Once we find a matching date/time pattern, we just have to ensure
-- each value (e.g., minute) is in the allowed range.  If validation
-- fails, no other patterns are tried; see the note above as to why.
local function mktime(din,s)
   local t = os.time(din)
   if not t then
      error('invalid date/time: '..s)
   end
   local dout = os.date('*t',t)
   for _,k in pairs({'sec','min','hour','day','month','year'}) do
      if din[k] ~= dout[k] then
         error('invalid '..k..', '..din[k]..', in '..s, 3)
      end
   end
   for _,k in pairs({'tz','tz_offset','sec_fraction'}) do
      if din[k] then dout[k] = din[k] end
   end
   return t, dout
end


local function preprocessSingleArg(argName)
    -- If the argument exists and isn't blank, add it to the argument table.
    -- Blank arguments are treated as nil to match the behaviour of ParserFunctions.
    if origArgs[argName] and origArgs[argName] ~= '' then
        args[argName] = origArgs[argName]
    end
end

--
-- Public API
--
 
dateparse = {}
function p.parseSurfaceDate(frame)

    -- If called via #invoke, use the args passed into the invoking template.
    -- Otherwise, for testing purposes, assume args are being passed directly in.
    if frame == mw.getCurrentFrame() then
        origArgs = frame:getParent().args
    else
        origArgs = frame
    end

   preprocessSingleArg('datetime')
   preprocessSingleArg('format')

   s = args['datetime']
   fmt = args['format']

   if type(s) ~= 'string' or s == '' or s == 'NULL' then
      return nil
   end
   local function strip(s)
      return s:gsub('^%s+',''):gsub('%s+$',''):gsub('%s+',' ')
   end
   s = strip(s)
   if not fmt then
      d, dout = mktime(fuzzy_parse(s), s)
      return displaySurfaceTime(dout)
   end
   fmt = strip(fmt)
   local all_errors = {}
   for i,fmt in ipairs(expand_fmt(fmt)) do
      local ok, result = pcall(parse_date, s, fmt)
      if ok then return mktime(result,s) end
      local clean_message = result:match('^.-:.-:%s*(.*)') or result
      all_errors[i] = '"'..fmt..'" '..clean_message
   end
   error('"'..s..'" does not match:\n'..
         table.concat(all_errors, '\n'), 2)
end

return p