Модуль:Ru-common

Для документации этого модуля может быть создана страница Модуль:Ru-common/doc
--[[
This module holds some commonly used functions for Russian language.
It's generally for use from other modules, not #invoke, although it can
be invoked from a template using export.main().

NOTE NOTE NOTE: All functions assume that transliteration (but not Russian)
has had its acute and grave accents decomposed using export.decompose().
This is the first thing that should be done to all user-specified
transliteration and any transliteration we compute that we expect to work with.
]]

local export = {}

local lang = require("Module:languages2").getByCode("ru")

local strutils = require("Module:string utilities")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local BREVE = u(0x0306) -- breve  ̆
local DIA = u(0x0308) -- diaeresis =  ̈
local CARON = u(0x030C) -- caron  ̌

-- any accent
export.accent = AC .. GR .. DIA .. BREVE .. CARON
-- regex for any optional accent(s)
export.opt_accent = "[" .. export.accent .. "]*"
-- any composed Cyrillic vowel with grave accent
export.composed_grave_vowel = "ѐЀѝЍ"
-- any Cyrillic vowel except ёЁ
export.vowel_no_jo = "аеиоуяэыюіѣѵАЕИОУЯЭЫЮІѢѴ" .. export.composed_grave_vowel
-- any Cyrillic vowel, including ёЁ
export.vowel = export.vowel_no_jo .. "ёЁ"
-- any vowel in transliteration
export.tr_vowel = "aeěɛiouyAEĚƐIOUY"
-- any consonant in transliteration, omitting soft/hard sign
export.tr_cons_no_sign = "bcčdfghjklmnpqrsštvwxzžBCČDFGHJKLMNPQRSŠTVWXZŽ"
-- any consonant in transliteration, including soft/hard sign
export.tr_cons = export.tr_cons_no_sign .. "ʹʺ"
-- regex for any consonant in transliteration, including soft/hard sign,
-- optionally followed by any accent
export.tr_cons_acc_re = "[" .. export.tr_cons .. "]" .. export.opt_accent
-- any Cyrillic consonant except sibilants and ц
export.cons_except_sib_c = "бдфгйклмнпрствхзьъБДФГЙКЛМНПРСТВХЗЬЪ"
-- Cyrillic sibilant consonants
export.sib = "шщчжШЩЧЖ"
-- Cyrillic sibilant consonants and ц
export.sib_c = export.sib .. "цЦ"
-- any Cyrillic consonant
export.cons = export.cons_except_sib_c .. export.sib_c
-- Cyrillic velar consonants
export.velar = "кгхКГХ"
-- uppercase Cyrillic consonants
export.uppercase = "АЕИОУЯЭЫЁЮІѢѴБДФГЙКЛМНПРСТВХЗЬЪШЩЧЖЦ"

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- this function enables the module to be called from a template;
-- FIXME, does this actually work?
function export.main(frame)
    if type(export[frame.args[1]]) == 'function' then
        return export[frame.args[1]](frame.args[2], frame.args[3])
    else
        return export[frame.args[1]][frame.args[2]]
    end
end

-- selects preposition о, об or обо for next phrase, which can start from
-- punctuation
function export.obo(phr)
    --Algorithm design is mainly inherited from w:ru:template:Обо
    local w = rmatch(phr,"[%p%s%c]*(.-)[%p%s%c]") or rmatch(phr,"[%p%s%c]*(.-)$")
    if not w then return nil end
    if string.find(" всей всём всех мне ",' '..ulower(w)..' ',1,true) then return 'обо' end
    local ws=usub(w,1,2)
    if ws==uupper(ws) then -- abbrev
        if rmatch(ws,"^[ЙУНФЫАРОЛЭСМИRYUIOASFHLXNMÖÜÄΑΕΟΥΩ]") then return 'об' else return 'о' end
    elseif rmatch(uupper(w),"^[АОЭИУЫAOIEÖÜÄΑΕΟΥΩ]") then
        return 'об'
    else
        return 'о'
    end
end

-- Apply Proto-Slavic iotation. This is the change that is affected by a
-- Slavic -j- after a consonant.
function export.iotation(stem, shch)
    stem = rsub(stem, "[сх]$", "ш")
    stem = rsub(stem, "ск$", "щ")
    stem = rsub(stem, "ст$", "щ")
    stem = rsub(stem, "[кц]$", "ч")

    -- normally "т" is iotated as "ч" but there are many verbs that are iotated with "щ"
    if shch == "щ" then
        stem = rsub(stem, "т$", "щ")
    else
        stem = rsub(stem, "т$", "ч")
    end

    stem = rsub(stem, "[гдз]$", "ж")

    stem = rsub(stem, "([бвмпф])$", "%1л")

    return stem
end

-- Does a set of Cyrillic words in connected text need accents? We need to
-- split by word and check each one.
function export.needs_accents(text)
	local function word_needs_accents(word)
		-- A word needs accents if it is unstressed and contains more than
		-- one vowel
		return export.is_unstressed(word) and not export.is_monosyllabic(word)
	end
	local words = rsplit(text, "%s")
	for _, word in ipairs(words) do
		if word_needs_accents(word) then
			return true
		end
	end
	return false
end

-- True if Cyrillic word is stressed (acute or diaeresis)
function export.is_stressed(word)
	-- A word that has ё in it is inherently stressed.
	-- diaeresis occurs in сѣ̈дла plural of сѣдло́
	return rfind(word, "[́̈ёЁ]")
end

-- True if Cyrillic word has no stress mark (acute or diaeresis)
function export.is_unstressed(word)
	return not export.is_stressed(word)
end

-- True if Cyrillic word is stressed on the last syllable
function export.is_ending_stressed(word)
	return rfind(word, "[ёЁ][^" .. export.vowel .. "]*$") or
		rfind(word, "[" .. export.vowel .. "][́̈][^" .. export.vowel .. "]*$")
end

-- True if a Cyrillic word has two or more stresses (acute or diaeresis)
function export.is_multi_stressed(word)
	word = rsub(word, "[ёЁ]", "е́")
	return rfind(word, "[" .. export.vowel .. "][́̈].*[" .. export.vowel .. "][́̈]")
end

-- True if Cyrillic word is stressed on the first syllable
function export.is_beginning_stressed(word)
	return rfind(word, "^[^" .. export.vowel .. "]*[ёЁ]") or
		rfind(word, "^[^" .. export.vowel .. "]*[" .. export.vowel .. "]́")
end

-- True if Cyrillic word has no vowel
function export.is_nonsyllabic(word)
	return not rfind(word, "[" .. export.vowel .. "]")
end

-- True if Cyrillic word has no more than one vowel; includes non-syllabic
-- stems such as льд-
function export.is_monosyllabic(word)
	return not rfind(word, "[" .. export.vowel .. "].*[" .. export.vowel .. "]")
end

local recomposer = {
	["и" .. BREVE] = "й",
	["И" .. BREVE] = "Й",
	["е" .. DIA] = "ё", -- WARNING: Cyrillic е and Е
	["Е" .. DIA] = "Ё",
	["e" .. CARON] = "ě", -- WARNING: Latin e and E
	["E" .. CARON] = "Ě",
	["c" .. CARON] = "č",
	["C" .. CARON] = "Č",
	["s" .. CARON] = "š",
	["S" .. CARON] = "Š",
	["z" .. CARON] = "ž",
	["Z" .. CARON] = "Ž",
	-- used in ru-pron:
	["ж" .. BREVE] = "ӂ", -- used in ru-pron
	["Ж" .. BREVE] = "Ӂ",
	["j" .. CFLEX] = "ĵ",
	["J" .. CFLEX] = "Ĵ",
	["j" .. CARON] = "ǰ",
	-- no composed uppercase equivalent of J-caron
	["ʒ" .. CARON] = "ǯ",
	["Ʒ" .. CARON] = "Ǯ",
}

-- Decompose acute, grave, etc. on letters (esp. Latin) into individivual
-- character + combining accent. But recompose Cyrillic and Latin characters
-- that we want to treat as units and get caught in the crossfire. We mostly
-- want acute and grave decomposed; perhaps should just explicitly decompose
-- those and no others.
function export.decompose(text)
	text = mw.ustring.toNFD(text)
	text = rsub(text, ".[" .. BREVE .. DIA .. CARON .. "]", recomposer)
	return text
end

function export.assert_decomposed(text)
	assert(not rfind(text, "[áéíóúýàèìòùỳäëïöüÿÁÉÍÓÚÝÀÈÌÒÙỲÄËÏÖÜŸ]"))
end

-- Transliterate text and then apply acute/grave decomposition.
function export.translit(text)
	return export.decompose(lang:transliterate(text))
end

-- Recompose acutes and graves into preceding vowels. Probably not necessary.
function export.recompose(text)
	return mw.ustring.toNFC(text)
end

local grave_decomposer = {
    ["ѐ"] = "е" .. GR,
    ["Ѐ"] = "Е" .. GR,
    ["ѝ"] = "и" .. GR,
    ["Ѝ"] = "И" .. GR,
}

-- decompose precomposed Cyrillic chars w/grave accent; not necessary for
-- acute accent as there aren't precomposed Cyrillic chars w/acute accent,
-- and undesirable for precomposed ё and Ё
function export.decompose_grave(word)
    return rsub(word, "[ѐЀѝЍ]", grave_decomposer)
end

local grave_deaccenter = {
    [GR] = "", -- grave accent
    ["ѐ"] = "е", -- composed Cyrillic chars w/grave accent
    ["Ѐ"] = "Е",
    ["ѝ"] = "и",
    ["Ѝ"] = "И",
}

local deaccenter = mw.clone(grave_deaccenter)
deaccenter[AC] = "" -- acute accent

-- remove acute and grave accents; don't affect composed diaeresis in ёЁ or
-- uncomposed diaeresis in -ѣ̈- (as in plural сѣ̈дла of сѣдло́)
function export.remove_accents(word, tr)
    local ru_removed = rsub(word, "[́̀ѐЀѝЍ]", deaccenter)
	if not tr then
		return ru_removed, nil
	end
	return ru_removed, rsub(tr, "[" .. AC .. GR .. "]", deaccenter)
end

-- remove acute and grave accents in monosyllabic words; don't affect
-- diaeresis (composed or uncomposed) because it indicates a change in vowel
-- quality, which still applies to monosyllabic words
function export.remove_monosyllabic_accents(word, tr)
	if export.is_monosyllabic(word) then
		return export.remove_accents(word, tr)
	else
		return word, tr
	end
end

local destresser = mw.clone(deaccenter)
destresser["ё"] = "е"
destresser["Ё"] = "Е"
destresser["̈"] = "" -- diaeresis

-- Subfunction of split_syllables(). On input we get sections of text
-- consisting of CONSONANT - VOWEL - CONSONANT - VOWEL ... - CONSONANT,
-- where CONSONANT consists of zero or more consonants and VOWEL consists
-- of exactly one vowel plus any following accent(s); we combine these into
-- syllables as required by split_syllables().
local function combine_captures(captures)
	if #captures == 1 then
		return captures
	end
	local combined = {}
	for i = 1,(#captures-1),2 do
		table.insert(combined, captures[i] .. captures[i+1])
	end
	combined[#combined] = combined[#combined] .. captures[#captures]
	return combined
end

-- Split Russian text and transliteration into syllables. Syllables end with
-- vowel + accent(s), except for the last syllable, which includes any
-- trailing consonants.
local function split_syllables(ru, tr)
	export.assert_decomposed(tr)
	-- Split into alternating consonant/vowel sequences, as described in
	-- combine_captures(). Uses capturing_split(), which is like rsplit()
	-- but also includes any capturing groups in the split pattern.
	local rusyllables = combine_captures(strutils.capturing_split(ru, "([" .. export.vowel .. "]" .. export.opt_accent .. ")"))
	local trsyllables = combine_captures(strutils.capturing_split(tr, "([" .. export.tr_vowel .. "]" .. export.opt_accent .. ")"))
	if #rusyllables ~= #trsyllables then
		error("Russian " .. ru .. " doesn't have same number of syllables as translit " .. tr)
	end
	return rusyllables, trsyllables
end

-- Apply j correction, converting je to e after consonants, jo to o after
-- a sibilant, ju to u after hard sibilant.
function export.j_correction(tr)
	tr = rsub(tr, "([" .. export.tr_cons_no_sign .. "]" .. export.opt_accent ..")[Jj]([EeĚě])", "%1%2")
	tr = rsub(tr, "([žščŽŠČ])[Jj]([Oo])", "%1%2")
	tr = rsub(tr, "([žšŽŠ])[Jj]([Uu])", "%1%2")
	return tr
end

local function make_unstressed_ru(ru)
	-- The following regexp has grave+acute+diaeresis after the bracket
	--
	return rsub(ru, "[̀́̈ёЁѐЀѝЍ]", destresser)
end

-- remove all stress marks (acute, grave, diaeresis)
function export.make_unstressed(ru, tr)
	if not tr then
		return make_unstressed_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way: Splitting
	-- into syllables and only converting Latin o to e opposite a ё.
	rusyl, trsyl = split_syllables(ru, tr)
	for i=1,#rusyl do
		if rfind(rusyl[i], "[ёЁ]") then
			trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
		end
		rusyl[i] = make_unstressed_ru(rusyl[i])
		-- the following should still work as it will affect accents only
		trsyl[i] = make_unstressed_ru(trsyl[i])
	end
	-- Also need to apply j correction as otherwise we'll have je after cons, etc.
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

function remove_jo_ru(word)
    return rsub(word, "[̈ёЁ]", destresser)
end

-- remove diaeresis stress marks only
function export.remove_jo(ru, tr)
	if not tr then
		return remove_jo_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way: Splitting
	-- into syllables and only converting Latin o to e opposite a ё.
	rusyl, trsyl = split_syllables(ru, tr)
	for i=1,#rusyl do
		if rfind(rusyl[i], "[ёЁ]") then
			trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
		end
		rusyl[i] = remove_jo_ru(rusyl[i])
		-- the following should still work as it will affect accents only
		trsyl[i] = make_unstressed_once_ru(trsyl[i])
	end
	-- Also need to apply j correction as otherwise we'll have je after cons, etc.
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

local function make_unstressed_once_ru(word)
	-- leave graves alone
    return rsub(word, "([́̈ёЁ])([^́̈ёЁ]*)$", function(x, rest) return destresser[x] .. rest; end, 1)
end

-- make last stressed syllable (acute or diaeresis) unstressed; leave
-- graves alone; if NOCONCAT, return individual syllables
function export.make_unstressed_once(ru, tr, noconcat)
	if not tr then
		return make_unstressed_once_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way, as with
	-- make_unstressed().
	rusyl, trsyl = split_syllables(ru, tr)
	for i=#rusyl,1,-1 do
		local stressed = export.is_stressed(rusyl[i])
		if stressed then
			if rfind(rusyl[i], "[ёЁ]") then
				trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
			end
			rusyl[i] = make_unstressed_once_ru(rusyl[i])
			-- the following should still work as it will affect accents only
			trsyl[i] = make_unstressed_once_ru(trsyl[i])
			break
		end
	end
	if noconcat then
		return rusyl, trsyl
	end
	-- Also need to apply j correction as otherwise we'll have je after cons
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

local function make_unstressed_once_at_beginning_ru(word)
	-- leave graves alone
    return rsub(word, "^([^́̈ёЁ]*)([́̈ёЁ])", function(rest, x) return rest .. destresser[x]; end, 1)
end

-- make first stressed syllable (acute or diaeresis) unstressed; leave
-- graves alone; if NOCONCAT, return individual syllables
function export.make_unstressed_once_at_beginning(ru, tr, noconcat)
	if not tr then
		return make_unstressed_once_at_beginning_ru(ru), nil
	end
	-- In the presence of TR, we need to do things the hard way, as with
	-- make_unstressed().
	rusyl, trsyl = split_syllables(ru, tr)
	for i=1,#rusyl do
		local stressed = export.is_stressed(rusyl[i])
		if stressed then
			if rfind(rusyl[i], "[ёЁ]") then
				trsyl[i] = rsub(trsyl[i], "[Oo]", {["O"] = "E", ["o"] = "e"})
			end
			rusyl[i] = make_unstressed_once_at_beginning_ru(rusyl[i])
			-- the following should still work as it will affect accents only
			trsyl[i] = make_unstressed_once_at_beginning_ru(trsyl[i])
			break
		end
	end
	if noconcat then
		return rusyl, trsyl
	end
	-- Also need to apply j correction as otherwise we'll have je after cons
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

-- subfunction of make_ending_stressed(), make_beginning_stressed(), which
-- add an acute accent to a syllable that may already have a grave accent;
-- in such a case, remove the grave
function export.correct_grave_acute_clash(word, tr)
	word = rsub(word, "([̀ѐЀѝЍ])́", function(x) return grave_deaccenter[x] .. AC; end)
	word = rsub(word, AC .. GR, AC)
	if not tr then
		return word, nil
	end
	tr = rsub(tr, GR .. AC, AC)
	tr = rsub(tr, AC .. GR, AC)
	return word, tr
end

local function make_ending_stressed_ru(word)
	-- If already ending stressed, just return word so we don't mess up ё
	if export.is_ending_stressed(word) then
		return word
	end
	-- Destress the last stressed syllable
	word = make_unstressed_once_ru(word)
	-- Add an acute to the last syllable
	word = rsub(word, "([" .. export.vowel_no_jo .. "])([^" .. export.vowel .. "]*)$",
		"%1́%2")
	-- If that caused an acute and grave next to each other, remove the grave
	return export.correct_grave_acute_clash(word)
end

-- Remove the last primary stress from the word and put it on the final
-- syllable. Leave grave accents alone except in the last syllable.
-- If final syllable already has primary stress, do nothing.
function export.make_ending_stressed(ru, tr)
	if not tr then
		return make_ending_stressed_ru(ru), nil
	end
	-- If already ending stressed, just return ru/tr so we don't mess up ё
	if export.is_ending_stressed(ru) then
		return ru, tr
	end
	-- Destress the last stressed syllable; pass in "noconcat" so we get
	-- the individual syllables back
	rusyl, trsyl = export.make_unstressed_once(ru, tr, "noconcat")
	-- Add an acute to the last syllable of both Russian and translit
	rusyl[#rusyl] = rsub(rusyl[#rusyl], "([" .. export.vowel_no_jo .. "])",
		"%1" .. AC)
	trsyl[#trsyl] = rsub(trsyl[#trsyl], "([" .. export.tr_vowel .. "])",
		"%1" .. AC)
	-- If that caused an acute and grave next to each other, remove the grave
	rusyl[#rusyl], trsyl[#trsyl] =
		export.correct_grave_acute_clash(rusyl[#rusyl], trsyl[#trsyl])
	-- j correction didn't get applied in make_unstressed_once because
	-- we short-circuited it and made it return lists of syllables
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

local function make_beginning_stressed_ru(word)
	-- If already beginning stressed, just return word so we don't mess up ё
	if export.is_beginning_stressed(word) then
		return word
	end
	-- Destress the first stressed syllable
	word = make_unstressed_once_at_beginning_ru(word)
	-- Add an acute to the first syllable
	word = rsub(word, "^([^" .. export.vowel .. "]*)([" .. export.vowel_no_jo .. "])",
		"%1%2́")
	-- If that caused an acute and grave next to each other, remove the grave
	return export.correct_grave_acute_clash(word)
end

-- Remove the first primary stress from the word and put it on the initial
-- syllable. Leave grave accents alone except in the first syllable.
-- If initial syllable already has primary stress, do nothing.
function export.make_beginning_stressed(ru, tr)
	if not tr then
		return make_beginning_stressed_ru(ru), nil
	end
	-- If already beginning stressed, just return ru/tr so we don't mess up ё
	if export.is_beginning_stressed(ru) then
		return ru, tr
	end
	-- Destress the first stressed syllable; pass in "noconcat" so we get
	-- the individual syllables back
	rusyl, trsyl = export.make_unstressed_once_at_beginning(ru, tr, "noconcat")
	-- Add an acute to the first syllable of both Russian and translit
	rusyl[1] = rsub(rusyl[1], "([" .. export.vowel_no_jo .. "])",
		"%1" .. AC)
	trsyl[1] = rsub(trsyl[1], "([" .. export.tr_vowel .. "])",
		"%1" .. AC)
	-- If that caused an acute and grave next to each other, remove the grave
	rusyl[1], trsyl[1] = export.correct_grave_acute_clash(rusyl[1], trsyl[1])
	-- j correction didn't get applied in make_unstressed_once_at_beginning
	-- because we short-circuited it and made it return lists of syllables
	return table.concat(rusyl, ""),
		export.j_correction(table.concat(trsyl, ""))
end

-- used for tracking and categorization
trailing_letter_type = {
	["ш"] = {"sibilant", "cons"},
	["щ"] = {"sibilant", "cons"},
	["ч"] = {"sibilant", "cons"},
	["ж"] = {"sibilant", "cons"},
	["ц"] = {"c", "cons"},
	["к"] = {"velar", "cons"},
	["г"] = {"velar", "cons"},
	["х"] = {"velar", "cons"},
	["ь"] = {"soft-cons", "cons"},
	["ъ"] = {"hard-cons", "cons"},
	["й"] = {"palatal", "cons"},
	["а"] = {"vowel", "hard-vowel"},
	["я"] = {"vowel", "soft-vowel"},
	["э"] = {"vowel", "hard-vowel"},
	["е"] = {"vowel", "soft-vowel"},
	["ѣ"] = {"vowel", "soft-vowel"},
	["и"] = {"i", "vowel", "soft-vowel"},
	["і"] = {"i", "vowel", "soft-vowel"},
	["ѵ"] = {"i", "vowel", "soft-vowel"},
	["ы"] = {"vowel", "hard-vowel"},
	["о"] = {"vowel", "hard-vowel"},
	["ё"] = {"vowel", "soft-vowel"},
	["у"] = {"vowel", "hard-vowel"},
	["ю"] = {"vowel", "soft-vowel"},
}

function export.get_stem_trailing_letter_type(stem)
	local hint = ulower(usub(export.remove_accents(stem), -1))
	local hint_types = trailing_letter_type[hint] or {"hard-cons", "cons"}
	return hint_types
end

-- Reduce stem by eliminating the "epenthetic" vowel. Applies to
-- nominative singular masculine 2nd-declension hard and soft, and
-- 3rd-declension feminine in -ь (e.g. любовь). STEM should be the
-- result after calling detect_stem_type(), but with final -й if
-- present.
function export.reduce_stem(stem, tr)
	local pre, letter, post
	local pretr, lettertr, posttr

	pre, letter, post = rmatch(stem, "^(.*)([оОеЕёЁ])́?([" .. export.cons .. "]+)$")
	if not pre then
		return nil, nil
	end
	if tr then
		pretr, lettertr, posttr = rmatch(tr, "^(.*)([oOeE])́?([" .. export.tr_cons .. "][" .. export.tr_cons .. export.accent .. "]*)$")
		if not pretr then
			return nil, nil -- should not happen unless tr is really messed up
		end
	end
	if letter == "О" or letter == "о" then
		-- FIXME, what about when the accent is on the removed letter?
		if post == "й" or post == "Й" then
			-- FIXME, is this correct?
			return nil, nil
		end
		letter = ""
	else
		local is_upper = rfind(post, "[" .. export.uppercase .. "]")
		if rfind(pre, "[" .. export.vowel .. "]́?$") then
			letter = is_upper and "Й" or "й"
		elseif post == "й" or post == "Й" then
			letter = is_upper and "Ь" or "ь"
			post = ""
		elseif (rfind(post, "[" .. export.velar .. "]$") and
			 rfind(pre, "[" .. export.cons_except_sib_c .. "]$")) or
			(rfind(post, "[^йЙ" .. export.velar .. "]$") and
			 rfind(pre, "[лЛ]$")) then
			letter = is_upper and "Ь" or "ь"
		else
			letter = ""
		end
	end
	stem = pre .. letter .. post
	if tr then
		tr = pretr .. export.translit(letter) .. posttr
		-- the following is necessary to deal with cases where ё gets
		-- replaced with ь
		tr = rsub(tr, "[jJ]ʹ", "ʹ")
	end
	return stem, tr
end

-- Generate the dereduced stem given STEM and EPENTHETIC_STRESS (which
-- indicates whether the epenthetic vowel should be stressed); this is
-- without any terminating non-syllabic ending, which is added if needed by
-- the calling function. Returns nil if unable to dereduce.
function export.dereduce_stem(stem, tr, epenthetic_stress)
	if epenthetic_stress then
		stem, tr = export.make_unstressed_once(stem, tr)
	end

	local pre, letter, post
	local pretr, lettertr, posttr
	-- FIXME!!! Deal with this special case
	--if not (z.stem_type == 'soft' and _.equals(z.stress_type, {'b', 'f'}) -- we should ignore asterix for 2*b and 2*f (so to process it just like 2b or 2f)
	--		 or _.contains(z.specific, '(2)') and _.equals(z.stem_type, {'velar', 'letter-ц', 'vowel'}))  -- and also the same for (2)-specific and 3,5,6 stem-types
	--then

	-- I think this corresponds to our -ья and -ье types, which we
	-- handle separately
	--if z.stem_type == 'vowel' then  -- 1).
	--	if _.equals(z.stress_type, {'b', 'c', 'e', 'f', "f'", "b'" }) then  -- gen_pl ending stressed  -- TODO: special vars for that
	--		z.stems['gen_pl'] = _.replace(z.stems['gen_pl'], 'ь$', 'е́')
	--	else
	--		z.stems['gen_pl'] = _.replace(z.stems['gen_pl'], 'ь$', 'и')
	--	end
	--end

	pre, letter, post = rmatch(stem, "^(.*)([" .. export.cons .. "])([" .. export.cons .. "])$")
	if tr then
		pretr, lettertr, posttr = rmatch(tr, "^(.*)(" .. export.tr_cons_acc_re .. ")(" .. export.tr_cons_acc_re .. ")$")
		if pre and not pretr then
			return nil, nil -- should not happen unless tr is really messed up
		end
	end
	if pre then
		local is_upper = rfind(post, "[" .. export.uppercase .. "]")
		local epvowel
		if rfind(letter, "[ьйЬЙ]") then
			letter = ""
			lettertr = ""
			if rfind(post, "[цЦ]$") or not epenthetic_stress then
				epvowel = is_upper and "Е" or "е"
			else
				epvowel = is_upper and "Ё" or "ё"
			end
		elseif rfind(letter, "[" .. export.cons_except_sib_c .. "]") and rfind(post, "[" .. export.velar .. "]") or
				rfind(letter, "[" .. export.velar .. "]") then
			epvowel = is_upper and "О" or "о"
		elseif post == "ц" or post == "Ц" then
			epvowel = is_upper and "Е" or "е"
		elseif epenthetic_stress then
			if rfind(letter, "[" .. export.sib .. "]") then
				epvowel = is_upper and "О́" or "о́"
			else
				epvowel = is_upper and "Ё" or "ё"
			end
		else
			epvowel = is_upper and "Е" or "е"
		end
		assert(epvowel)
		stem = pre .. letter .. epvowel .. post
		if tr then
			tr = pretr .. lettertr .. export.translit(epvowel) .. posttr
			tr = export.j_correction(tr)
		end

		if epenthetic_stress then
			stem, tr = export.make_ending_stressed(stem, tr)
		end
		return stem, tr
	end
	return nil, nil
end

return export

-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet:
Модуль:Ru-common

Навигация

Поиск