模組:Zh-usex

下列說明文檔位於Module:Zh-usex/doc。^[編輯]

{{zh-x}}基於本模塊，例子見模板文檔。本模塊用到的數據位於Module:zh-usex/data。

a，o，e 開頭的音節連接在其它音節後面的時候，如果音節的界限發生混淆，用隔音符號（’）隔開，例如：pi ’ao（皮襖）。 [現代標準漢語，繁體]
a，o，e 开头的音节连接在其它音节后面的时候，如果音节的界限发生混淆，用隔音符号（’）隔开，例如：pi ’ao（皮袄）。 [現代標準漢語，簡體]
出自：1958, 汉语拼音方案，第五章
a, o, e kāitóu de yīnjié liánjiē zài qítā yīnjié hòumiàn de shíhou, rúguǒ yīnjié de jièxiàn fāshēng hùnxiáo, yòng géyīn fúhào (’) gékāi, lìrú: pi’ ao (pí'ǎo). [漢語拼音]
室外非常寒冷，大家都把身體蜷縮起來保暖。 [現代標準漢語，繁體]
室外非常寒冷，大家都把身体蜷缩起来保暖。 [現代標準漢語，簡體]
Shìwài fēicháng hánlěng, dàjiā dōu bǎ shēntǐ quánsuō qǐlái bǎonuǎn. [漢語拼音]
謀生／谋生 ― móushēng ―
恩來／恩来 ― ēnlái ―
人手一冊／人手一册 ― rénshǒu yīcè ―
武林高手 ― wǔlíngāoshǒu ―
有一 e5 歐巴桑去美國 chit4-tho5，欲去便所 e5 時，因為 m7 捌字，煞行入去查甫 e0 彼間，無外久，一 e5 阿督仔行入去，隨擱闖出來，一直喝講：「I am sorry，I am sorry。」尾 a0，彼e5 阿婆仔行出來氣chua3chua3 講：「夭壽哦！一 e5 阿督仔真無禮貌，行入來人 e5 便所，也擱怪人門「抑 m7 鎖咧！」 [臺語，繁體]
有一 e5 欧巴桑去美国 chit4-tho5，欲去便所 e5 时，因为 m7 捌字，煞行入去查甫 e0 彼间，无外久，一 e5 阿督仔行入去，随搁闯出来，一直喝讲：「I am sorry，I am sorry。」尾 a0，彼e5 阿婆仔行出来气chua3chua3 讲：「夭寿哦！一 e5 阿督仔真无礼貌，行入来人 e5 便所，也搁怪人门「抑 m7 锁咧！」 [臺語，簡體]
出自：曹麗華 (ed.), 笑詼一則抑m7鎖咧
Ū chi̍t ê o͘-bá-sáng khì Bí-kok chhit-thô, beh khì piān-só͘ ê sî, in-ūi m̄ bat-jī, soah kiâⁿ ji̍p-khì cha-po͘ ê hit keng, bô-gōa-kú, chi̍t ê a-tok-á kiâⁿ ji̍p-khì, sûi koh chhoàng chhut-lâi, it-ti̍t hoah kóng: “I am sorry, I am sorry.” Bóe--á, hit-ê a-pô-á kiâⁿ chhut-lâi khì-chhòachhòa kóng: “Iáu-siū ô͘! Chi̍t ê a-tok-á chin bô lé-māu, kiâⁿ ji̍p-lâi lâng ê piān-só͘, iá-koh koài lâng mn̂g “a̍h m̄ só--leh!” [白話字]
其子服之，令人狂狼放宕，故名。 [文言文，繁體和簡體]
出自：公元1578年，李時珍，《本草綱目》
Qí zǐ fú zhī, lìng rén kuángláng fàngdàng, gù míng. [漢語拼音]
陳塸／陈𫭟 ― Chén'ōu ―
𥔲嘉 ― Èjiā ―

自私自利性 ― zìsīzìlìxìng ―
第一次世界大戰／第一次世界大战 ― Dì-yī Cì Shìjiè Dàzhàn ―
歐美日韓／欧美日韩 ― Ōu-Měi-Rì-Hán ― )
前奧巴馬粉／前奥巴马粉 ― qián-Àobāmǎfěn ―
青-青 [臺語] ― chheⁿ-chheⁿ [白話字] ― 美麗

local export = {}

local m_zh = require("Module:zh")
local m_languages = require("Module:languages")
local m_string_utils = require("Module:string utilities")

local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local split = m_string_utils.split
local sub = m_string_utils.sub
local upper = m_string_utils.upper

-- Use this when the actual title needs to be known.
local actual_title = mw.title.getCurrentTitle()

-- Use this when testcases need to be able to override the title (for bolding,
-- for instance).
local title = actual_title
local PAGENAME = PAGENAME or title.text

local data = mw.loadData("Module:Zh-usex/data")
local punctuation = data.punctuation
local ref_list = data.ref_list
local pron_correction = data.pron_correction
local polysyllable_pron_correction = data.polysyllable_pron_correction

local zh_format_end = "<!-- -->}-</span>"

--local Han_pattern = "[" .. require("Module:scripts").getByCode("Hani"):getCharacters() .. "]"
local Han_pattern = "[一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮹟𰀀-𲎯]"
local UTF8_char = '[^\128-\191][\128-\191]*'
local UTF8_char2 = '[^[%]\128-\191][\128-\191]*' -- not "[" or "]"

local tr_data = {
	cmn = {
		segment_c = " %-",
		separator_conv = {[""]="",[" "]=" ",["-"]="",["--"]="-"},
		link_ignore = "\1.^",
		tr_cap = true,
		combine = function(t)
			return t:gsub("^%f[aoe\195-\199]","\3") -- temporary substitute of the apostrophe
		end,
	},
	yue = {
		segment_c = " ",
		separator_conv = {[""]="",[" "]=" "},
		link_ignore = "\1",
		tr_cap = false,
		combine = function(t) return t:gsub(",.+","") end,
	},
	["nan-hbl"] = {
		segment_c = " ~",
		separator_conv = {[""]="",[" "]=" ",["~"]="-"},
		link_ignore = "\1%%.^",
		tr_cap = true,
		combine = function(t) return "-"..t:gsub("/.+","") end,
	},
	hak = {
		segment_c = " ~",
		separator_conv = {[""]="",[" "]=" ",["~"]="-"},
		link_ignore = "\1.^",
		tr_cap = true,
		combine = function(t) return "-"..t:gsub("/.+","") end,
	},
	default = {
		segment_c = " ",
		separator_conv = {[""]="",[" "]=" "},
		link_ignore = "\1",
		tr_cap = false,
	},
}

local function get_tr(display, norm_code)
	local given, given_pos = {}, 1 -- record the characters with given transcription
	local punc, punc_pos = {}, 1  -- record the punctuations with given transcription
	local tr_datapoint = tr_data[norm_code]
	local word_regex = "[^"..tr_datapoint.link_ignore.." \2{}".."]+" -- regex that matches words
	local tr_word = display:gsub("\1", " ")
		:gsub("%["..UTF8_char2.."%]", "")
		:gsub("("..UTF8_char.."){([^{}]*)}", function(a,b) -- record given tr and replace with "{"
			given[given_pos] = a:find("^%w$") and b or tr_datapoint.combine(b)
			given_pos = given_pos + 1
			return "{"
		end)
		:gsub("%f[^ ][^ ]+%f[ ]", function(a) -- record punctuation and replace with "}"
			if punctuation[a] then
				punc[punc_pos] = punctuation[a]
				punc_pos = punc_pos + 1
				return "}"
			end
			return a
		end)
		:gsub("<b>","\1"):gsub("</b>","\2") -- substitute bold tags for further processing
		:gsub(word_regex,function(word)
			-- first attempt to get the pronunciation of the whole word
			local res = polysyllable_pron_correction[norm_code][word]
				or pron_correction[norm_code][word]
			if res then return res end
			local length = 0 -- for check_pron (a bit hacky because check_pron only checks if length == 1)
			if word:find("^"..UTF8_char.."$") then length = 1 end
			res = m_zh.check_pron(word, norm_code, length, true)
			if res then return tr_datapoint.combine(res) end
			-- if it fails, get pronunciation of each character
			return word:gsub(UTF8_char, function(ch)
				local ch_res = pron_correction[norm_code][ch]
				if ch_res then return ch_res end
				ch_res = m_zh.check_pron(ch, norm_code, 1, true)
				return ch_res and tr_datapoint.combine(ch_res) or ch
			end)
		end)
	if norm_code == "cmn" then
		tr_word = tr_word:gsub("%.%.","-")
	end
	if norm_code ~= "yue" then
		tr_word = tr_word:gsub("%."," ")
	end
	given_pos, punc_pos = 0,0
	tr_word = tr_word:gsub("{",function() -- substitute back the stored results
			given_pos = given_pos + 1
			return given[given_pos]
		end)
		:gsub("}",function() -- substitute back the punctuations
			punc_pos = punc_pos + 1
			return punc[punc_pos]
		end)
	if norm_code == "yue" then
		tr_word = tr_word:gsub("[a-z][1-9]%-?[1-9]?", "%0 ")
	end
	return tr_word
end

local function make_link(target, display)
	target = target == "" and display or target
	-- Remove bold tags from target
	target = target:gsub("</?b>","")
	-- Generate link to Chinese section
	local result = "[[" .. target .. "#漢語|" .. display .. "]]"
	-- For debugging purposes
	--if actual_title.nsText == "Module" then mw.log(display, target, "->", result) end
	return result
end

local function convert(conv_fun, text)
	return (text .. "A[A]")
		:gsub("([^%[%]]*)"..UTF8_char2.."%[("..UTF8_char2..")%]",
			function(a,b) return conv_fun(a)..b end)
		:sub(1,-2)
end

function export.show(frame)
	local params = {
		[1] = { required = true },	-- example
		[2] = {},					-- translation
		[3] = {},					-- variety
		lit = {},
		tr = {},
		ref = {}, r = { alias_of = "ref" },
		inline = {},
		audio = {}, a = { alias_of = "audio" },
		collapsed = { type = "boolean" },
		-- Allow specifying pagename in testcases on documentation page.
		pagename = actual_title.nsText == "Template" and {} or nil,
		nocat = { type = "boolean" },
		tr_nocap = { type = "boolean" },
		simp = { type = "boolean" }
	}
	
	local category = frame.args["category"] or "使用例" -- or error("Please specify the category.")
	
	local args, unrecognized_args = require("Module:parameters").process(frame:getParent().args, params, true)
	
	if args.pagename then
		-- Override title in Module namespace.
		title = mw.title.new(args.pagename)
		PAGENAME = title.text
	end
	
	local example = args[1] or error("Example unspecified.")
	local translation = args[2]
	local literal = args["lit"]
	local reference = args["ref"]
	local manual_tr = args["tr"]
	local display = args["display_type"]
	local inline = args["inline"]
	local audio_file = args["audio"]
	local collapsed = args["collapsed"]
	local simp = args["simp"]
	local phonetic = ""
	local original_length = example:gsub("[^\194-\244]+",""):len()
	local variety = args[3] or frame.args["variety"] or (ref_list[reference] and ref_list[reference][1] or false) or "cmn"
	mw.logObject(data)
	local variety_data = data.varieties_by_code[variety] or data.varieties_by_old_code[variety] or error("Variety " .. variety .. " not recognized.")
	-- unpack() doesn't work here because the data was loaded using mw.loadData()
	local std_code, norm_code, desc, tr_desc = variety_data[2], variety_data[3], variety_data[4], variety_data[5]
	norm_code = norm_code or std_code
	variety = std_code
	
	local lang_obj_wikt = m_languages.getByCode(variety, 3, "allow etym")
	
	if next(unrecognized_args) then
		--[[Special:WhatLinksHere/Wiktionary:Tracking/zh-usex/unrecognized arg]]
		require("Module:debug").track_unrecognized_args(unrecognized_args, "zh-usex")
	end
	
	if reference then
		require("Module:debug").track("zh-usex/ref")
	end
	
	if example:find("[%(%)]") then
		require("Module:debug").track("zh-usex/parentheses")
	end
	
	if example:find("&#") then
		require("Module:debug").track("zh-usex/html")
	end
	
	-- future escape character?
	if example:find("`") then
		require("Module:debug").track("zh-usex/backtick")
	end
	if example:find("  ") then
		require("Module:debug").track("zh-usex/double-space")
	end
	
	if (norm_code == "nan-hbl" or norm_code:find("^hak")) and example:find("%-") then
		require("Module:debug").track("zh-usex/hyphen")
	end
	
	if example:find("%w%{") then
		require("Module:debug").track("zh-usex/rom-text")
	end
	
	if not translation or translation == '' then -- per standard [[Module:usex]]
		-- translation = '<small>(please add an English translation of this ' .. (category == "quotations" and "quotation" or "usage example") .. ')</small> [[Category:' .. lang_obj_wikt:getFullName() .. (category == "詞語搭配" and  "用例" or category) .. "轉寫請求" .. ']]'
	end
	
	-- should we generate the other (simp/trad) form
	-- (in the end, only actually display if the converted text is different)
	local do_conv = true
	if norm_code == "vi" or norm_code == "ko" then
		do_conv = false
	end
	local conv_fun = m_zh.ts
	if simp then
		if category ~= "引言" then error("parameter simp cannot be true in [[Template:zh-x]] or [[Template:zh-co]].") end
		if norm_code == "vi" or norm_code == "ko" or norm_code == "lzh" or variety == "yue-HK" or variety == "cmn-TW" or
				variety == "nan-hbl-TW" or variety == "lzh-cmn-TW" or variety == "hak-hai" or variety == "hak-dab" or
				variety == "hak-zha" then
			error(("Parameter simp= cannot be specified for variety '%s'"):format(variety))
		end
		conv_fun = m_zh.st
	end
	
	-- should we generate the transcription
	local generate_tr = false
	if tr_data[norm_code] then
		if manual_tr then
			require("Module:debug").track("zh-usex/manual-tr")
		else
			generate_tr = true
		end
	end
	
	local boldify = false
	-- automatically boldify pagetitle if nothing is in bold
	if not example:find("'''") and not punctuation[PAGENAME] then
		boldify = true
	end
	
	-- tidying up the example, making it ready for transcription
	example = gsub(example, "[？！，。、“”…；：‘’|（）「」『』—《》〈〉【】·　．～]", " %0 ")
	example = example:gsub("—  —", "——") -- double em-dash (to be converted to single em-dash later)
		:gsub("<br */?>"," <br> ") -- process linebreaks
		:gsub("^ *",""):gsub(" *$",""):gsub("  +"," ") -- process spaces
		:gsub("%[%[(.-)%]%]%f[^%]]",function(a) -- process [[]]
			return a:gsub(" ","\1")
		end)
		:gsub("'''([^']+)'''", "<b>%1</b>") -- normalise bold syntax
		:gsub("%^<b>","<b>^")
		:gsub("</b>(%["..UTF8_char2.."%])","%1</b>")
		:gsub("</b>({[^{}]*})","%1</b>")
	
	-- parsing: convert "-", "--", "---" to "-", "..", "--" respectively
	-- so that "-" is the character that delimits links
	-- further explanation will use the replacement result to refer to the commands
	if norm_code == "cmn" then
		example = example:gsub("%-+",{["--"]="..",["---"]="--"})
		if example:find("%-[^%-%s]+\\") then
			require("Module:debug").track("zh-usex/extra-pinyin")
		end
	end

	local regex_data = tr_data[norm_code] or tr_data.default
	local segment_c = regex_data.segment_c -- the characters that delimit links
	local separator_conv = regex_data.separator_conv -- the table for separator mapping
	local link_ignore = regex_data.link_ignore -- the characters that do not affect links
	local tr_cap = regex_data.tr_cap -- transliteration can be capitalised
	local segment_regex = "(["..segment_c.."]*)([^"..segment_c.."]+)" -- the regex that matches each segment and the separator before it
	
	local cache = {} -- store the result of each segment
	local trad_text = ""
	local simp_text = ""
	-- generate the transliteration
	-- but store the results in the cache
	-- and also build up trad_text and simp_text
	local tr_text = example:gsub(segment_regex, function(separator,seg)
		separator = separator_conv[separator] or error('Invalid separator: "'..separator..'"')
		if cache[seg] then
			trad_text = trad_text .. cache[seg].trad
			simp_text = simp_text .. cache[seg].simp
			return separator..cache[seg].tr
		end
		
		if punctuation[seg] then
			cache[seg] = {
				trad = seg,
				simp = seg,
				tr = punctuation[seg]
			}
			trad_text = trad_text .. seg
			simp_text = simp_text .. seg
			return separator..punctuation[seg]
		end
		
		local generate_link = 0
		seg, generate_link = seg:gsub("@","")
		generate_link = (generate_link == 0)
		
		local target, display = "", seg
		local pos = seg:find("\\",1,true)
		if generate_link and pos then
			-- move formatting from start of target to display
			-- e.g. <b>^甲\乙 --> 甲\<b>^乙
			local bold = ""
			local caret = ""
			local start = 1
			if seg:sub(1,3) == "<b>" then
				bold,start = "<b>",4
			end
			if tr_cap and seg:sub(start) == "^" then
				caret,start = "^",start+1
			end
			target, display = seg:sub(start,pos-1), bold..caret..seg:sub(pos+1,-1)
			if target:find("</?b>") then -- Check for bold tags in target.
				require("Module:debug").track("zh-usex/bold-target")
			end
		end
		
		target = target:gsub("\1","")
		local target_trad = target:gsub("%["..UTF8_char2.."%]","")
		local target_simp = do_conv and convert(conv_fun, target)
		
		local occurrences = 0
		if boldify then
			display, occurrences = display:gsub(PAGENAME,"<b>"..PAGENAME.."</b>")
		end
		if occurrences > 0 then
			display = display:gsub("%[<b>"..PAGENAME.."</b>%]","%["..PAGENAME.."%]")
				:gsub("%^<b>","<b>^")
				:gsub("</b>(%["..UTF8_char2.."%])","%1</b>")
				:gsub("</b>({[^{}]*})","%1</b>")
		end
		
		local display_derom = display:gsub("{[^{}]*}","")
			:gsub("["..link_ignore.."]+","")
		local display_trad = display_derom:gsub("%["..UTF8_char2.."%]","")
		local display_simp = do_conv and convert(conv_fun, display_derom) or ""
		local seg_tr = generate_tr and get_tr(display, norm_code) or ""
		
		if display_trad:gsub("</?b>","") == PAGENAME or target_trad == PAGENAME then
			generate_link = false
			if boldify and occurrences == 0 then
				display_trad = "<b>" .. display_trad .. "</b>"
				display_simp = "<b>" .. display_simp .. "</b>"
				seg_tr = "\1" .. seg_tr .. "\2"
			end
		end
		
		local seg_trad = generate_link and make_link(target_trad, display_trad) or display_trad
		local seg_simp = generate_link and do_conv and make_link(target_simp, display_simp) or display_simp
		cache[seg] = {
			trad = seg_trad,
			simp = seg_simp,
			tr = seg_tr
		}
		trad_text = trad_text .. seg_trad
		simp_text = simp_text .. seg_simp
		return separator..seg_tr
	end)
	
	if trad_text == simp_text then
		do_conv = false
		simp_text = nil
	end
	
	if not trad_text:find("</?b>") then
		require("Module:debug").track("zh-usex/no-bold")
	end
	
	-- format generated tr
	-- at this point we have three temporary substitutions:
	-- <b>:\1, </b>:\2, ':\3
	if generate_tr then
		if norm_code == "cmn" then -- format apostrophe
			tr_text = tr_text
				:gsub("%f[^%z -]([\1\2^]*)\3", "%1")
				:gsub("\1\3","\3\1") -- <b>' → '<b>
				:gsub("^\3","\3^")   -- ^'   → '^ (shouldn't occur)
		elseif norm_code == "nan-hbl" or norm_code == "hak" then -- format hyphens
			tr_text = tr_text
				:gsub("%^%-","-^")
				:gsub("\1%-","-\1") -- <b>-  → -<b>
				:gsub("%-\2","\2-") -- -</b> → </b>-
				:gsub("%f[^%z ]%-%f[^%z %-]","") -- "-chhek" at beginning -> "chhek"
				:gsub("%f[%z %-]%-%f[%z ]","") -- "shi-" at the end -> "shi"
				:gsub("%-+","-")
				:gsub("%-?%%%-?", "--")
		end
		tr_text = tr_text:gsub("[\1\2\3]",{["\1"]="<b>",["\2"]="</b>",["\3"]="&#39;"})
		
		if find(tr_text, Han_pattern) then
			require("Module:debug").track("zh-usex/character without transliteration")
		end
	end

	local tag_start = " <span style=\"color:darkgreen; font-size:x-small;\">&#91;" -- HTML entity since "[[[w:MSC|MSC]]" is interpreted poorly
	local tag_end = "&#93;</span>"
	
	local simp_link = "[[w:简体中文|簡體]]"
	local trad_link = "[[w:繁体中文|繁體]]"
	if simp then
		simp_link, trad_link = trad_link, simp_link
	end
	
	trad_text, auto_spaces = trad_text:gsub("([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2")
	simp_text = do_conv and simp_text:gsub("([a-zA-Z]%]%])(%[%[[a-zA-Z])", "%1 %2") or false
	phonetic = manual_tr or (generate_tr and tr_text)

	if auto_spaces > 0 then
		require("Module:debug").track("zh-usex/auto-spaces")
	end
	
	-- overall transcription formatting
	if phonetic then
		phonetic = gsub(phonetic, " </b>", "</b> ")
		phonetic = gsub(phonetic, "  ", " ")
		if norm_code == "yue" or norm_code == "zhx-tai" or norm_code == "csp" or norm_code == "nan-tws" or norm_code == "nan-hnm" or
			norm_code == "zhx-sic" or norm_code == "cjy" or norm_code == "hsn" or norm_code == "gan" or
			norm_code == "yue-dgx" or norm_code == "yue-yjx" or norm_code == "yue-ylx" or
			variety == "hak-mei" then
			phonetic = gsub(phonetic, "([a-zê]+)([1-9%-]+)", "%1<sup>%2</sup>") -- superscript tones
		end
		phonetic = gsub(phonetic, " ([,%.?!;:’”)])", "%1") -- remove excess spaces from punctiation
		phonetic = gsub(phonetic, "([‘“(]) ", "%1")
		phonetic = phonetic:gsub(" <br> ", "<br>")
		if not manual_tr then
			if norm_code == "nan-hbl" then
				phonetic = gsub(phonetic, " +%-%-", "--")
			end
		end

		-- capitalisation
		if not manual_tr then
			if norm_code == "yue" or norm_code == "zhx-tai" or norm_code == "cjy" or norm_code == "hsn" or
				norm_code == "cmn-wuh" or norm_code == "nan-tws" or norm_code == "wxa" or norm_code == "wuu" or
				variety == "hak-mei" then
				args.tr_nocap = true
			end
			if not args.tr_nocap and match(example, "[。？！]") then
				phonetic = "^" .. gsub(phonetic, "([%.?!]) ", "%1 ^")
			end
			if not args.tr_nocap then
				phonetic = gsub(phonetic, "([%.%?%!][”’]) (.)", "%1 ^%2")
				phonetic = gsub(phonetic, "<br>(.)", "<br>^%1")
				phonetic = gsub(phonetic, ": ([“‘])(.)", ": %1^%2")
			end
			phonetic = gsub(phonetic, "%^<b>", "<b>^")
			phonetic = gsub(phonetic, "%^+.", upper)
			phonetic = gsub(phonetic, "%^", "")
		end

		if norm_code == "wuu" then
			local wuu_pron = require("Module:wuu-pron")
			if phonetic:find(":") then
				phonetic = "''" .. wuu_pron.wugniu_format(phonetic:sub(4)) .. "''"
			else
				require("Module:debug").track("zh-usex/wuu-wikt")
				phonetic = "''" .. wuu_pron.wugniu_format(wuu_pron.wikt_to_wugniu(phonetic)) .. "''"
			end
		elseif norm_code == "cmn-wuh" or norm_code == "wxa" then
			phonetic = "<span class=\"IPA\">[" .. phonetic .. "]</span>"

		elseif norm_code == "cdo" then
			local cdo_pron = require("Module:cdo-pron")
			phonetic = "<i>" .. phonetic .. "</i>" ..
				(not match(phonetic, "-[^ ]+-[^ ]+-[^ ]+-")
					and " / <span class=\"IPA\"><small>[" .. cdo_pron.sentence(phonetic) .. "]</small></span>"
					or "")

		else
			phonetic = "<i>" .. phonetic .. "</i>"
		end
		phonetic = "<span lang=\"zh-Latn\" style=\"color:#404D52\">" .. phonetic .. "</span>"
	end
	
	local collapse_start, collapse_end, collapse_tag, collapse_border_div, collapse_border_div_end = '', '', '', '', ''
	simplified_start = '<br>'
	if collapsed then
		collapse_start = '<span class="vsHide">'
		collapse_end = '</span>'
		collapse_tag = '<span class="vsToggleElement" style="color:darkgreen; font-size:x-small;padding-left:10px"></span>'
		collapse_border_div = '<div class="vsSwitcher" data-toggle-category="usage examples" style="border-left: 1px solid #930; border-left-width: 2px; padding-left: 0.8em;">'
		collapse_border_div_end = '</div>'
		simplified_start = '<hr>'
	end
	
	if actual_title.nsText == '' and (not args.nocat) then -- fixme: probably categorize only if text contains the actual word
		if reference then
			cat = "[[Category:有引文的" .. lang_obj_wikt:getFullName() .. "詞]]"
		else
			cat = "[[Category:有" .. category .. "的" .. lang_obj_wikt:getFullName() .. "詞]]"
		end
	end
	
	local zh_format_start_simp = "<span lang=\"zh-Hans\" class=\"Hans\">-{<!-- -->"
	local zh_format_start_trad = "<span lang=\"zh-Hant\" class=\"Hant\">-{<!-- -->"
	if simp then zh_format_start_simp, zh_format_start_trad = zh_format_start_trad, zh_format_start_simp end
	
	-- indentation, font and identity tags
	if ((norm_code == "cmn" and original_length > 7)
			or (norm_code ~= "cmn" and original_length > 5)
			or reference
			or collapsed
			or (match(example, "[，。？！、：；　]") and norm_code == "wuu")
			or (norm_code == "cdo" and original_length > 3)
			or (inline or "" ~= "")) then

		trad_text = zh_format_start_trad .. trad_text .. zh_format_end

		if not phonetic and translation then
			translation = "<i>" .. translation .. "</i>"
		end

		if phonetic  then
			phonetic = "<dd>" .. collapse_start .. phonetic
			if translation then
				translation = "<dd>" .. translation .. "</dd>"
			end
			tr_tag = tag_start .. tr_desc .. tag_end .. collapse_end .. "</dd>"
		elseif translation then
			translation = "<dd>" .. translation .. "</dd>"
		end

		if audio_file then
			audio = "<dd>[[File:" .. audio_file .. "]]</dd>"
		end
		
		if do_conv then
			trad_tag = collapse_start .. tag_start .. desc .. "，" .. trad_link .. tag_end .. collapse_end .. collapse_tag
			simp_text = simplified_start .. collapse_start .. zh_format_start_simp .. simp_text .. zh_format_end
			simp_tag = tag_start .. desc .. "，" .. simp_link .. tag_end .. collapse_end
		elseif norm_code == "vi" or norm_code == "ko" then
			trad_tag = collapse_start .. tag_start .. desc .."，" .. trad_link .. tag_end .. collapse_end .. collapse_tag
		else
			trad_tag = collapse_start .. tag_start .. desc .."，" .. trad_link .. "和" .. simp_link .. tag_end .. collapse_end .. collapse_tag
		end

		if reference then
			reference = "<dd>" .. collapse_start .. "<small>出自：" ..
				(ref_list[reference] and ref_list[reference][2] or reference) .. "</small>" .. collapse_end .. "</dd>"
		end

		return collapse_border_div .. "<dl class=\"zhusex\">" .. trad_text .. trad_tag .. (simp_text or "") .. (simp_tag or "") .. (reference or "") ..
			(phonetic and phonetic .. tr_tag or "") .. (audio or "") .. (translation or "") .. "</dl>" .. (cat or "") .. collapse_border_div_end

	else
		trad_text = zh_format_start_trad .. trad_text .. zh_format_end
		divider = "&nbsp; ―&nbsp; "

		if variety ~= "cmn" then
			ts_tag = tag_start .. desc .. tag_end
			tr_tag = tag_start .. tr_desc .. tag_end
		end

		if not phonetic then
			translation = "<i>" .. translation .. "</i>"
		end

		if do_conv then
			simp_text = "<span lang=\"zh-Hani\" class=\"Hani\">／</span>" .. zh_format_start_simp .. simp_text .. zh_format_end
		end

		if audio_file then
			audio = " [[File:" .. audio_file .. "]]"
		end

		return trad_text .. (simp_text or "") .. (ts_tag or "") .. divider ..
			(phonetic and phonetic .. (tr_tag or "") .. (audio or "") .. divider or "") .. (translation or "") .. (literal and "（字面義為“" .. literal .. "”）" or "") ..
			(cat or "")
	end
end

-- function export.migrate(text, translation, ref)
-- 	if type(text) == "table" then
-- 		if not text.args or not text.args[1] then
-- 			text = text:getParent()
-- 		end
-- 		if text.args[2] and text.args[2] ~= '' then
-- 			ref = text.args[1]
-- 			translation = text.args[3]
-- 			text = text.args[2]
-- 		else
-- 			text = text.args[1]
-- 		end
-- 	end
-- 	text = text:gsub('^[%*#: \n]+', ''):gsub('[ \n]+$', ''):gsub(' +', '　'):gsub('\n+', '<br>'):gsub('|', '\\'):gsub('\'\'\'%[%[', ' '):gsub('%]%]\'\'\'', ' '):gsub('%]%]%[%[', ' '):gsub('%]%]', ''):gsub('%[%[', '')
-- :gsub('\'\'\'', ''):gsub(',', '，'):gsub('!', '！'):gsub('%?', '？')
-- 	if translation then
-- 		if ref and ref ~= '' then
-- 			return '{{zh-x|' .. text .. '|' .. translation .. '|ref=' .. ref .. '}}'
-- 		else
-- 			return '{{zh-x|' .. text .. '|' .. translation .. '}}'
-- 		end
-- 	else
-- 		return text
-- 	end
-- end

return export