Jump to content

Module:Cite taxon/utilities

From Wikipedia, the free encyclopedia
local p = {}

p.format_species_list = function(frame, taxa)

  local list = frame.args[1] or frame:getParent().args[1] or taxa
  local expand = frame.args['expand'] or frame:getParent().args['expand'] -- want to subst template code
  local compare = frame.args['compare'] or frame:getParent().args['compare'] 
  local option = frame.args['option'] or frame:getParent().args['option'] 
  local mode = frame.args['mode'] or frame:getParent().args['mode'] 
  local nolink =  frame.args['nolink'] or frame:getParent().args['nolink'] 
  
  list = mw.text.trim( list )
  
	local names = mw.text.split( list, "\n" )   -- could use plain option
	--local genus, species, authority
	local output = { }            -- table of species names for output

	local i = 1
	while names[i] do
		
		local name, match, skip
		local sep = " "                                               -- space between genus and species
		local infrasep = ""  
		local italics = "''"
		local genus, species, subspecies, authority
        --local subspecies = ""
        
        names[i] = mw.text.trim(names[i])  
        
        -- custom preprocess options
        if names[i] == "" then skip = true end                -- ignore blank lines
        if compare and not names[i]:find("^" .. compare) then -- only consider lines beginning with compare text (e.g. =genus)   
        	skip = true 
        end                                                        
        if option and string.lower(option) == "worms" then               -- process WoRMS list
	        if mode == "genus" then
	        	names[i] = names[i]:gsub("^Genus ", "")          -- delete leading "Genus"
	        else
	        	names[i] = names[i]:gsub("^Species ", "")         -- delete leading "Species"
        	end
        	if names[i]:find("accepted as") then              -- ignore invalid species, i.e. those "accepted as" something else
        		skip = true 
        	end
        end
        if option and string.lower(option) == "gbif" then               -- process GBIF list (copies with Species on alternate lines)
        	if names[i] == "Species" or names[i] == "Unranked" then
        		skip = true 
            end        
        end
        if option and string.lower(option) == "tpl" then               -- process tpl list 
        	if names[i]:find("Accepted") then 
        		names[i] = names[i]:gsub("^(.+)	Accepted.+", "%1")          -- only include up to Accepted
        		names[i] = mw.text.trim(names[i])                           -- needed as separate line as gsub returns str, i
        	else
        		skip = true                                                -- skip if not accepted
        	end   
        end
        
        if option and string.lower(option) == "algaebase" then               -- process algaebase
        	if names[i]:find("C$") then 
        		names[i] = names[i]:gsub("C$", "")                          -- only include C
        		names[i] = mw.text.trim(names[i])                           -- needed as separate line as gsub returns str, i
        	else
        		skip = true                                                -- skip if not accepted
        	end   
        end
        -- make list
                                                                              -- TODO try "(%S+)([%s×]+)(%S+) (.*)" 
        if mode == "genus" or mode == "taxon" then               -- assume form taxon authority
 				for g, a in string.gmatch(names[i], "(%S+) (.*)" )  do -- match: genus × species authority
					genus = g
					species = ""
				    sep = ""
					if a ~= "" then authority = a end                          -- no authority after space
					match = true                                               -- we have a match 
					if mode == "taxon" then italics = "" end
				end
      	
        else                                  -- match species list (various forms w/wo authority, hybrid)

	        if not match then 
				--for g, s, f, ss, a in string.gmatch(names[i], "(%S+) (%S+)( subsp%. )(%S+) (.*)" ) do -- match: genus species subsp. subspecies authority
				for g, s, f, ss, a in string.gmatch(names[i], "(%S+) (%S+)( [svfb][ubsparomiv]*%. )(%S+) (.*)" ) do -- match: genus species subsp. subspecies authority
					genus = g
					species = s 
					subspecies = ss
					infrasep = f --" subsp. "
					sep = " "
					if a ~= "" then authority = a end                          -- no authority after space
					match = true                                               -- we have a match 
				end
			end
			if not match then 
				--for g, s, f, ss in string.gmatch(names[i], "(%S+) (%S+)( subsp%. )(%S+)" ) do -- match: genus species subsp. subspecies
				for g, s, f, ss in string.gmatch(names[i], "(%S+) (%S+)( [svfb][ubsparomiv]*%. )(%S+)" ) do -- match: genus species subsp. subspecies
					genus = g
					species = s 
					subspecies = ss
					infrasep = f -- " subsp. "
					sep = " "
					--if a ~= "" then authority = a end                          -- no authority after space
					match = true                                               -- we have a match 
				end
			end
	        if not match then 
				for g, s, a in string.gmatch(names[i], "(%S+) × (%S+) (.*)" ) do -- match: genus × species authority
					genus = g
					species = s 
					sep = " × "
					if a ~= "" then authority = a end                          -- no authority after space
					match = true                                               -- we have a match 
				end
			end
	
			if not match then 
				for g, s in string.gmatch(names[i], "(%S+) × (%S+).*" ) do   --  match:  genus × species
					genus = g
					species = s
					sep = " × "
					if a ~= "" then authority = a end
					match = true
				end
			end
	        if not match then 
				for g, s, a in string.gmatch(names[i], "(%S+) (%S+) (.*)" ) do -- match: genus species authority
					genus = g
					species = s 
					if a ~= "" then authority = a end                          -- no authority after space
					match = true                                               -- we have a match for genus, species, authority
				end
			end
			if not match then 
				for g, s in string.gmatch(names[i], "(%S+) (%S+).*" ) do       -- match: genus species
					genus = g
					species = s 
					match = true
				end
			end
		end

		if match and not skip then
			local species_name = genus .. sep .. species  
			if subspecies then
				name = "''" .. species_name .. "''" .. infrasep .. "''"  .. subspecies .."''"          
				if not nolink then 
					name =  "[[" .. species_name .. infrasep .. subspecies   .. "|" .. name .. "]]"    -- wikilinked name with redirect
			    end
			else
				name = species_name
				if not nolink then name = "[[" .. name .. "]]" end       -- add wikilink
				name = italics .. name .. italics                        -- add italics 
			end
 
			if authority  and expand then
			   name = name.. " " .. frame:expandTemplate{ title = 'small', args = { authority } }  --expand template
			elseif authority then
			   name = name .. " {{small|" .. authority .."}}"                                 -- don't expand template
			end

		end
	   	if name  then 
	  		table.insert ( output , name )
	  	elseif not skip then 
	  		table.insert ( output ,'<span class="error">unsupported format: expects "genus species authority"</span>' )
	  	end
	   
  	 
		i=i+1
	end
  
	return "*" .. table.concat(output, "\n*")
	
end

p.format_taxon_list = function(frame)

  local list = frame.args[1] or frame:getParent().args[1] 
  local expand = frame.args['expand'] or frame:getParent().args['expand'] -- want to subst template code
  
  list = mw.text.trim( list )
  
	local names = mw.text.split( list, "\n", plain )
	--local genus, species, authority
	local output = { }            -- table of species names for output
    local patterns = { "(%S+)( × )(%S+) (.*)",           --     genus × species authority
    	              "(%S+)( × )(%S+).*",              --     genus × species
    	               "(%S+)( )(%S+) (.*)",            --     genus species authority
    	               "(%S+)( )(%S+).*",               --     genus species 
    	                                                -- taxon or genus authority
    }
	local i = 1
	while names[i] do
		local name, match
		--local sep = " "                                               -- space between genus and species
		local genus, separator, species, authority
                                                                     -- TODO try "(%S+)([%s×]+)(%S+) (.*)" 
       for k, v in pairs (patterns) do 
			for g, sep, s, a in string.gmatch(names[i], v ) do -- match: genus × species authority
				genus = g
				species = s 
				separator = sep
				if a and a ~= "" then authority = a end                          -- no authority after space
				match = true                                               -- we have a match 
			end
			if match then break end
		end

		if match then
			name = "''[[" .. genus .. separator .. species .. "]]''"  
			if authority  and expand then
			   name = name .. frame:expandTemplate{ title = 'small', args = { authority } }  --expand template
			elseif authority then
			   name = name .. "{{small|" .. authority .."}}"                                 -- don't expand template
			end

		end
	   	if name  then 
	  		table.insert ( output , name )
	  	else
	  		table.insert ( output ,'<span class="error">unsupported format: expects "genus species authority"</span>' )
	  	end
	   
  	 
		i=i+1
	end
  
	return "*" .. table.concat(output, "\n*")
	
end
return p