Модуль:SummaryII/parsers

Материал из свободной русской энциклопедии «Традиция»
Перейти к: навигация, поиск

Для документации этого модуля может быть создана страница Модуль:SummaryII/parsers/doc

--[[

	Модуль, поставляющий парсеры значений для SummaryII.

	Зависимости:
		Модуль:Class,
		lpeg
		Модуль:Re
		Модуль:SummaryII/service	
--]]

local strip				= require "Модуль:SummaryII/service".strip

local presets = {}

function presets.page (property, id)
	return id or strip (property), {{
		name	= property
	  , open	= '[['
	  , escape	= ':'
	  , pipe	= '|'
	  , close	= ']]'
	  , output	= '@prefix@[[' .. property .. '::@value@|@alias@]]@suffix@@separator@'
	}}
end		-- function presets.page (property, id)

function presets.string (property, id)
	return id or strip (property), {{
		name	= property
	  , open	= '[['
	  , escape	= ':'
	  , pipe	= '|'
	  , close	= ']]'
	  , output	= '@prefix@[[' .. property .. '::@value@|@alias@]]@suffix@@separator@'
	}}
end		-- function presets.string (property, id)

function presets.quantity (property, additional, id)
	local decimal = mw.ustring.sub (mw.getContentLanguage ():formatNum (1.1), 2, 2)
	local top_rule = (additional.preunits and 'preunit float / ' or '') .. 'float unit'
	return id or strip (property), {{
		name	= property
	  , re		= 
	  	  ' quantity		<- {~ ' .. top_rule .. ' ~}\n' .. [==[
	   		float			<- sign? spaced_int ( decimal int )? exponent?
			sign			<- [+-] remove_spaces
			spaced_int		<- int (remove_spaces int)*
			int				<- %d+
			exponent		<- [eE] -> 'e' sign? int
				 		 	 / '' -> 'e' remove_spaces ( '×' / '*' ) -> '' remove_spaces '10' -> ''
				   					( '<sup>' -> '' sign? int '</sup>' -> '' / ( '^' / '**' ) -> '' sign? int )
			remove_spaces	<- ( !%nl %s )* -> ''
			one_space		<- ( !%nl %s )* -> ' '
			decimal			<- [.,] -> "]==] .. decimal .. '"\n'
		.. 'unit			<- one_space ( "' .. table.concat (additional.units or {}, '" / "') .. '" / "" -> "' .. (additional.default_unit or '') .. '" )\n'
		.. 'preunit			<- ( "' .. table.concat (additional.preunits or {}, '" / "') .. '" ) remove_spaces'
	  , open = '[[', escape = ':', pipe = '|', close = ']]'
	  , output = '@prefix@[[' .. property .. '::@value@|@alias@]]@suffix@@separator@'	
	}}
end		-- function presets.quantity (property, additional, id)


function presets.date (genitive, id)
	-- Months' names in genitive case:
	local lang = mw.getContentLanguage ()
	local month31 = '"' .. lang:formatDate ('xg', 'January')	.. '" / '
				 .. '"' .. lang:formatDate ('xg', 'March')		.. '" / '
				 .. '"' .. lang:formatDate ('xg', 'May')		.. '" / '				 
				 .. '"' .. lang:formatDate ('xg', 'July')		.. '" / '
				 .. '"' .. lang:formatDate ('xg', 'August')		.. '" / '
				 .. '"' .. lang:formatDate ('xg', 'October')	.. '" / '
				 .. '"' .. lang:formatDate ('xg', 'December')	.. '"'
	local month30 = '"' .. lang:formatDate ('xg', 'April')		.. '" / '
				 .. '"' .. lang:formatDate ('xg', 'June')		.. '" / '
				 .. '"' .. lang:formatDate ('xg', 'September')	.. '" / '				 
				 .. '"' .. lang:formatDate ('xg', 'November')	.. '"'
	local month29 = '"' .. lang:formatDate ('xg', 'February')	.. '"'

	local day = [==[
		day			<- {~ day31 one_space month31 / day30 one_space month30 / day29 one_space month29 ~}
		one_space	<- ( !%nl %s )* -> ' '		
		day31		<- day30 / '31'
		day30		<- day29 / '30'
		day29		<- [12] [0-9] / '0'? [1-9]
	]==] .. '\n'
	.. 'month31		<- ' .. month31 .. '\n'
	.. 'month30		<- ' .. month30 .. '\n'
	.. 'month29		<- ' .. month29
	
	local year = [==[
		year		<- {~ number yr? era? ~}
		number		<- %d+
		yr			<- ( %s* ( 'г.' / 'года' / 'гг.' ) ) -> ''
		era			<- AD / BC
		AD			<- ( %s* ( 'н.' %s* 'э.' / 'от' %s* 'Р.' %s* 'Х.' / 'AD' ) ) -> ''
		BC			<- one_space ( 'до' %s* 'н.' %s* 'э.' / 'до' %s* 'Р.' %s* 'Х.' / 'BC' ) -> 'до н.э.'
		one_space	<- ( !%nl %s )* -> ' '
	]==]		
	
	return id or strip ('Дата' .. genitive), {
		{	name		= 'День ' .. genitive
		  , re			= day
		  , open 		= '[[', escape = ':', pipe = '|', close = ']]'
		  , output 		= '@prefix@[[День ' .. genitive .. '::@value@|@alias@]]@suffix@@separator@'	
		}	-- { name = 'День ' .. genitive ... }
	  , {	name		= 'Год ' .. genitive
	  	  , re			= year
		  , open 		= '[[', escape = ':', pipe = '|', close = ']]'
		  , output 		= '@prefix@[[Год ' .. genitive .. '::@value@|@alias@]]@suffix@@separator@'	
		}	-- { name = 'Год ' .. genitive ... }
	  , {	name		= 'Дата ' .. genitive
	  	  , property	= '@День ' .. genitive .. '@ @Год ' .. genitive .. '@'
	  	  , output		= '{{#set:Дата ' .. genitive .. ' = @День ' .. genitive .. '@ @Год ' .. genitive .. '@}}'
	  	  , inferred	= true
	  	}	-- { name = 'Дата ' .. genitive ... }
	}	-- return {...}
end		-- function presets.date (genitive, id)

function presets.URL (property, additional, id)
	return id or strip (property), {{
		name	= property
	  , re		=
		   'url			<- {~ ( protocol "://" / "" -> "' .. (additional.default_protocol or 'http') .. '://" )'
	 .. [==[
			domain port? path? args? ~}
			protocol	<- 'http' 's'?
			domain		<- word ( '.' word )+
			word		<- ( ![.,;:?/%s] . )+
			port		<- ':' %d+
			path		<- ( ![,;:?%s] . )+
			args		<- '?' ( ![,;%s] . )+
		]==]
	  , open = '[', pipe = ' ', close = ']'
	  , output = '@prefix@[[' .. property .. '::@value@|@alias@]]@suffix@@separator@'
	}}
end		-- function presets.URL (property, additional, id)

-- end of presets

-- Class Parser

local Class = require ('Модуль:Class').create

-- Ускорители:
local lpeg		= lpeg
local any		= lpeg.P (1)
local always	= lpeg.P (true)
local never		= lpeg.P (false)
local P			= lpeg.P
local C			= lpeg.C
local Cc		= lpeg.Cc
local Ct		= lpeg.Ct
local Cg		= lpeg.Cg
local Cs		= lpeg.Cs

local spaces			= lpeg.S '  \n' ^ 1
local optional_spaces	= lpeg.S '  ' ^ 0
local double_colon		= '::'
local digit 			= lpeg.locale ().digit
  
local re				= require ('Модуль:Re').compile
local serialise_lpeg	= lpeg.serialize.save

local gsub				= mw.ustring.gsub


local Parser = Class {
	-- Атрибуты:
}	-- 	local Parser = Class {...}
	-- Конструктор:
function Parser:_init (id, params, properties, separators)

  	self.separators = separators
	local property_names = {}  	
  	local unframed_any = any

 	-- Framed properties:
	local framed_properties		= {}
	for i, property in ipairs (properties) do
		property_names [#property_names + 1] = property.name or ''
		if not property.inferred then
			-- Defaults for properties:
			property.escape	= property.escape	or ''
			property.no		= i
			
			-- Framed property format:
			local framed = mw.clone (property)
			framed.open_lpeg	= P (framed.open	or '')
			framed.close_lpeg	= P (framed.close	or '')
			framed.pipe_lpeg	= P (framed.pipe	or '')
			framed.format		= self:format (framed.re
											 , any
											 , P (framed.escape)
											 , framed.close_lpeg + framed.pipe_lpeg)
			framed_properties [i] = framed
			
			unframed_any = unframed_any - framed.open_lpeg
		else
			-- An internally inferred property, like date <- day year:
			self.inject		= (self.inject or '') .. property.output
			self.inferred	= self.inferred or {}
			self.inferred [property.name] = property.property
		end	-- if not property.inferred
	end		-- for i, property in ipairs (properties) do
	
	local unframed_properties	= {}
	for i, property in ipairs (properties) do
		if not property.inferred then
			-- Unframed property formats:
			local unframed = mw.clone (property)
			unframed.open_lpeg	= always
			unframed.close_lpeg	= always
			unframed.format		= self:format (unframed.re, unframed_any, spaces, separators)
			unframed_properties [i] = unframed
		end	-- if not property.inferred
	end		

	-- TODO: hash id.
	self.lpeg = self:capture ('id', nil, id)
			  * self:param (params) * '=' * (
					self:property_chain (framed_properties,		any)
	  			  + self:property_chain (unframed_properties,	unframed_any)
				)
	if self.inferred then
		self.lpeg = self.lpeg * self:inject_inferred ()
	end
end	-- function Parser:_init (id, params, properties, separators)

function Parser:param (variants)
	local lpeg = never
	for _, name in ipairs (variants) do
		lpeg = lpeg + P (strip (name))
	end
	-- Индекс в конце имени параметра:
	local index	= self:capture ('index', digit ^ 1 * P"-" ^ -1)
				+ P"(" * self:capture ('index', (any - ')') ^ 1)  * ')'

	return lpeg * index ^ -1
end		-- function Parser:param (variants)

function Parser:inject_inferred ()
	return self:capture ('inject'	, nil, self.inject) * Cg (Cc (self.inferred), 'inferred')
end		-- function Parser:inject_inferred ()

function Parser:property_chain (properties, any)
	return self:any_property_with_affixes (properties, any) ^ 1 * self:tail (any)
end		-- function Parser:property_chain (properties, any)
	
function Parser:any_property_with_affixes (properties, any)
	-- Two passes through properties:
	-- I: We need to know all value LPEGs beforehand; for complex property chains:
	local non_value = any
	for _, property in ipairs (properties) do
		property.value = self:value (property.format, property.open_lpeg, property.pipe_lpeg, property.close_lpeg, any)
		non_value = non_value - property.value
	end
			
	-- II: build a common grammar for properties:
	local any_property = never
	for _, property in ipairs (properties) do
		any_property = any_property + self:property_with_affixes (
			property.no
		  ,	property.name
		  , property.value
		  , non_value
		  , property.output
		)
	end		-- for _, property in ipairs (properties) do
	return any_property
end		-- function Parser:any_property_with_affixes (properties, any)

function Parser:value (format, open_lpeg, pipe_lpeg, close_lpeg, any)
	return open_lpeg
		 * self:capture ('processed', Ct (C (format)))
		 * (pipe_lpeg and (pipe_lpeg * self:capture ('alias', (any - close_lpeg) ^ 0)) ^ -1 or true)
		 * close_lpeg
end		-- function Parser:value (format, open_lpeg, pipe_lpeg, close_lpeg, any)
	
function Parser:property_with_affixes (no, name, value, non_value, output)
	local prefix = non_value ^ 0
	local suffix = (non_value - self.separators) ^ 0

	return Ct (
		self:capture ('prefix',		prefix)
	  * value
	  * self:capture ('suffix',		suffix)
	  * self:capture ('separator',	self.separators ^ 0)
	  * self:capture ('property',	nil,	name)
	  * self:capture ('no'		,	nil,	tostring (no))
	  * self:capture ('output',		nil,	output)	  
	)
end		-- function Parser:property_with_affixes (no, name, value, non_value, output)

function Parser:capture (label, where, inject)
	local lpeg = where or always
  	return Cg (inject and lpeg / inject or lpeg, label)
end		-- function Parser:capture (label, where, inject)

function Parser:tail (any)
	return self:capture ('tail', any ^ 0) * -1
end		-- function Parser:tail (any)

function Parser:format (format, any, cant_start, stop)
	return format and lpeg.type (format) and format
		or format and re (format)
		or -cant_start * (any - stop) ^ 1
end		-- Parser:format (format, any, cant_start, stop)

function Parser:serialise ()
	return serialise_lpeg (self.lpeg)
end		-- function Parser:serialise ()

-- end of class Parser.

-- Exports
local m = {}

function m.parser (params, datatype, property, separators, additional, id)
	local id, properties = presets [datatype] (property, additional, id)
	return Parser (id, params, properties, separators)
end		-- function m.parser (params, datatype, property, separators, additional, id)

return m