Documentation for this module may be created at Module:POTY/parser/doc

Code

require('strict')

local p = {}

-- The featured picture ID (fpID) is usually provided in the format
-- YYYY-MM/ordinal
-- With YYYY being the four diget year, MM being the month, either zero-padded
-- or not (e.g. 2015-01 or 2015-1 are both valid), and ordinal being an ordinal
-- number in the month the image was featured. The ordinal number is not a
-- fully-qualified ID, instead it's a weak entity that, together with the year
-- and month, creates a UNIQUE Identifier.
local function normalizeFeaturedPictureID(fpID)
	if fpID then
		fpID = mw.text.trim(fpID)
		local yyyy, mm, ordinal = fpID:match('(%d%d%d%d)%-(%d+)/(%d+)')
		fpID = string.format('%s-%02u/%03u', yyyy, mm, ordinal)
	end
	if not fpID or not fpID:find('%d%d%d%d%-%d%d/%d%d%d') then
		fpID = '[fpID parse error]: Expected format YYYY-MM/ooo'
	end
	return fpID
end

local function normalizeFileName(fn)
	return fn:gsub('^[Ff]ile:', ''):gsub('_', ' ')
end

function p.getDummyHeading()
	return { hDesc='dummy', hId='dummy' }
end

-- Missing a real WikitextDOM parser ...
-- @return A table whose keys are categories and whose values are the file names
function p.parse(title)
	if not title or not #title then title = 'Commons:Picture of the Year/2013/Candidates' end
	title = mw.title.new(title)
	local wikitext = title:getContent()
	
	-- Page empty or does not exist?
	if not wikitext or 0 == #wikitext then return {} end
	
	local wikitextPartsRaw = mw.text.split( wikitext, '\n==.-==%s' )
	local headings, headingDescs, headingsMatched = {}, {}, wikitext:gmatch( '\n==.-==%s' )
	local result = {}
	
	for h in headingsMatched do
		local hId, hDesc = h:match('\n== ?%[%[[^\n]+/(.-)|(.-)%]%].-==%s')
		table.insert(headings, hId)
		table.insert(headingDescs, { hDesc=hDesc, hId=hId })
	end
	
	for i, v in ipairs(wikitextPartsRaw) do
		if v:find( '</gallery>', 1, true ) then
			v = v:gsub('%s*<gallery.->%s*', '')
			v = v:gsub('%s*</.-gallery>%s*', '')
			v = v:gsub('%s*<!%-%-.-%-%->', '')
			
			local spl =  mw.text.split( v, '%s*\n%s*' )
			local candidates = {}
			for x, y in ipairs(spl) do
				local m = mw.text.split( y, '|' )
				table.insert(candidates, { name = normalizeFileName(m[1]), fpId = normalizeFeaturedPictureID(m[2]) })
			end
			result[headings[i-1]] = candidates
		end
	end
	
	return result, headingDescs
end

function p.parseWinners(title)
	if not title or not #title then title = 'Commons:Picture of the Year/2013/Results/R2/gallery' end
	title = mw.title.new(title)
	local v = title:getContent()
	
	-- Page empty or does not exist?
	if not v or 0 == #v then return {} end
	
	if v:find( '</gallery>', 1, true ) then
		v = v:gsub('%s*<gallery.->%s*', '')
		v = v:gsub('%s*</[^>]-gallery>%s*', '')
		v = v:gsub('%s*<!%-%-.-%-%->', '')
		
		local spl =  mw.text.split( v, '%s*\n%s*' )
		local winners = {}
		for x, y in ipairs(spl) do
			local f, c = y:match( '([^|]-)%|(.+)' )
			if c then
				c = mw.text.trim(c)
				if c:find('{{POTY/caption') > 0 then
					table.insert(winners, { name = f:gsub('^[Ff]ile:', ''):gsub('_', ' '), caption = c })
				end
			end
		end
		return winners
	end
end


function p.parseSets(title)
	if not title or not #title then title = 'Commons:Picture of the Year/2013/Candidates/Sets' end
	
	title = mw.title.new(title)
	local wikitext = title:getContent()
	local setsByTitle = {}
	
	-- Page empty or does not exist?
	if not wikitext or 0 == #wikitext then return {} end
	
	local galleries = wikitext:gmatch( '<gallery.->(.-)</.-gallery>' )
	for g in galleries do
		local set = {}
		local fsplit = mw.text.split( g, '\n' )
		for _, f in ipairs(fsplit) do
			f = f:gsub('^[Ff]ile:', '')
			f = f:gsub('%|.-$', '')
			f = f:gsub('_', ' ')
			f = f:gsub('<!%-%-.-%-%->', '')
			f = mw.text.trim(f)
			if #f > 0 then
				table.insert(set, f)
				setsByTitle[f] = set
			end
		end
	end
	
	return setsByTitle
end

return p