localexport={}localnamespace=mw.title.getCurrentTitle().nsTextlocalsubstring=mw.ustring.sublocalfunctionlog(...)ifnamespace=="Module"thenmw.log(...)endend--[[ The number of characters or ideographic sequences that must follow each ideographic description character.]]localIDchars={["⿰"]=2,["⿱"]=2,["⿲"]=3,["⿳"]=3,["⿴"]=2,["⿵"]=2,["⿶"]=2,["⿷"]=2,["⿸"]=2,["⿹"]=2,["⿺"]=2,["⿻"]=2,--[[ -- in future perhaps: https://www.unicode.org/L2/L2018/18012-irgn2273-four-new-idcs.pdf [mw.ustring.char(0x2FFC)] = 2, [mw.ustring.char(0x2FFD)] = 2, [mw.ustring.char(0x2FFE)] = 1, [mw.ustring.char(0x2FFF)] = 1, --]]}--[[ Returns the index in the string where the ideographic description sequence (IDS) ends, or the index of the end of the string. Iterates whenever another ideographic description character (IDC) is found.]]localfunctionfindEndOfIDS(text,IDchar,i)ifnot(textandIDcharandi)thenreturnnilendlocalj=ilocalcomponent=1-- Number of components expected after current IDC.localcomponents=IDchars[IDchar]whilecomponent<=componentsdoj=j+1localchar=substring(text,j,j)ifchar==""thenbreakelseifIDchars[char]thenj=findEndOfIDS(text,char,j)endcomponent=component+1end--[[ If the expected number of components has been found, return the current index in the text. ]]ifcomponent-components==1thenreturnjelsereturnnilendendlocalfunctiongetFromModule(codepoint,start,returnModule)--[=[ The sortkey modules handle two sets of codepoints. The first set runs from [[Module:zh-sortkey/data/001]] to [[Module:zh-sortkey/data/056]], then there is a gap of 90134 codepoints. The second set runs from [[Module:zh-sortkey/data/057]] to [[Module:zh-sortkey/data/177]]. ]=]localmoduleStart={[13312]=1,[131072]=57,}localmoduleName=string.format("Module:zh-sortkey/data/%03d",(codepoint-start)/500+moduleStart[start])-- log(codepoint .. ": data module: " .. moduleName)ifreturnModulethenreturnmoduleNameelselocalsuccess,data=pcall(mw.loadData,moduleName)ifsuccessthen-- log("success! ... " .. codepoint .. ": " .. tostring(data[codepoint]))returndata[codepoint]else-- log("failure: " .. codepoint .. " (" .. mw.ustring.char(codepoint) .. ")")returnnilendendendfunctionexport.getData(char,returnModule)iftype(char)=="string"thenchar=mw.ustring.codepoint(char)elseiftype(char)~=numberthenerror("getData must operate on a single character or codepoint.")end-- log(char, mw.ustring.char(char))ifchar>=13312andchar<=40938thenreturngetFromModule(char,13312,returnModule)elseifchar>=131072andchar<=191456thenreturngetFromModule(char,131072,returnModule)else-- log("not in range: " .. char .. " (" .. mw.ustring.char(char) .. ")")endreturnnilendfunctionexport.makeSortKey(text,lang,sc)localallowed_langs={zh=true,vi=true,ja=true,}iflangandnotallowed_langs[lang]thenreturntextendifscandsc~="Hani"thenreturntextendlocalsort={}locali=1whilei<=mw.ustring.len(text)dolocalcharacter=substring(text,i,i)--[=[ If we encounter an ideographic description character (IDC, find out if it begins a valid ideographic description sequence (IDS). If the IDS is valid and a sortkey for it is listed in [[Module:zh-sortkey/data/unsupported]], then return the sortkey, and move to the next character after the IDS. Otherwise, ignore the IDC and move to the next character after it. If the IDS is valid and no sortkey for it is found, track it. ]=]ifIDchars[character]thenlocalj=findEndOfIDS(text,character,i)localIDS,dataifjthenIDS=substring(text,i,j)data=mw.loadData("Module:zh-sortkey/data/unsupported")[IDS]endifIDSandnotdatathenrequire("Module:debug").track("zh-sortkey/IDS-without-sortkey")mw.log("ideographic description sequence without sortkey: "..IDS)endifIDSanddatathentable.insert(sort,data)i=jelsetable.insert(sort,character)endelsetable.insert(sort,export.getData(character)orcharacter)endi=i+1endsort=table.concat(sort)returnsortendreturnexport