UsageEdit

{{#invoke:Import table|''import''
|page=
|config=
|common=
}}

This module is designed to import data from tables in Wikipedia articles into Wikidata. The first column of the table must be the name of the item.

  • If this is a link (or a redirect) to an existing article which has a corresponding Wikidata item, then the data will be imported into this item.
  • If the first column contains a valid identifier for a Wikidata item (e.g. Q123456) then the data will be imported into this item.
  • If this is plain text or a redlink, then the data will be imported into a new Wikidata item.

ParametersEdit

  • page - the name of the article/page to parse, e.g. |page=List of dams in South Africa
  • config - details about what type of data is held in each column of the table formatted as type-property-option. Currently recognised types are:
    • label - the name of the article
    • wikilink - a link to an article which should be the value of the property, e.g. wikilink-P131
    • quantity - specify the unit in the third parameter, e.g. quantity-P2048-Q11573
    • year - a 4-digit year, e.g. year-P571
    • text - any text to import into a string datatype, e.g. text-P3562
    • coord - coordinate position, inside the {{coord}} template, e.g. coord-P625
    • donotuse (or any other unrecognised type) - indicates a column that will not be imported
  • common - a set of values that every item in the list should have, formatted in pairs as property-value, e.g. |common=P31-Q12323,P17-Q258

NotesEdit

  • The module will not import any claim if there is already a statement for that property in Wikidata, no matter if the value is the same or different to that being imported, and even if the value is marked as deprecated.

require("strict")
local p = {}

local function resolveqid(label)
	local resolveEntity = require("Module:ResolveEntityId")._id
	local qid
	local rawlabel
	if label then
		rawlabel = string.match(label,'%[%[([^%|%]]+)%|') or string.match(label,'%[%[([^%|%]]+)%]%]')
		if rawlabel then
			qid = resolveEntity(rawlabel)
		else
			rawlabel = label
		end
	end
	return qid,rawlabel
end

local function tidystring(string)
	local tidy = mw.ustring.gsub(string,"%<ref.+%<%/ref%>","") -- remove any references with <ref .. </ref>
	tidy = mw.ustring.gsub(tidy,"%<ref.+%/%>","") -- remove any references with <ref .. />
	tidy = mw.text.trim(tidy) -- trim spaces
	return tidy
end

function p.import(frame)
	local tab = "|"
	local wikiqid = "Q328" -- QID for English Wikipedia
	local eol = "<br>" -- end of line string
	local coord2text = require("Module:Coordinates/sandbox")._coord2text
	local args = frame.args or frame:getParent().args
	if not args.page then
		return "No page specified."
	end
	if not args.config then
		return "No configuation."
	end
	local config = {}
	for c1,col in ipairs(mw.text.split(args.config,",")) do
		config[c1] = mw.text.split(col,"-")
	end
	local common = {}
	if args.common then
		for c1,prop in ipairs(mw.text.split(args.common,",")) do
			common[c1] = mw.text.split(prop,"-")
			common[common[c1][1]] = common[c1][2] -- create index, e.g. common["P31"]=Q39715
		end
	end
	local content = mw.title.new(args.page):getContent() -- read page specified
	content = string.match(content,"%{%|(.+)%|%}") -- keep table only
	content = string.gsub(content,"||","\n|") -- use \n| for column breaks
	local rows = mw.text.split(content,"|-",true) -- split table into rows
	table.remove(rows,1) -- remove table definition
	table.remove(rows,1) -- remove heading row
	local output = ""
	local v2 = ""
	for rn,row in ipairs(rows) do
		local columns = mw.text.split(row,"\n|") -- split table rom into columns
		table.remove(columns,1) -- remove content before the first \n| character
		local label = columns[1]
		if label then
			label = tidystring(label)
			local qid
			qid,label = resolveqid(label) -- resolve qid if first column is link
			if not qid then
				qid = string.match(label,"Q%d+") -- check if QID is specified in first column
			end
			local entity
			if qid then
				entity = mw.wikibase.getEntity(qid)
			else
				v2 = v2 .. "CREATE" .. eol
				qid = "LAST" -- creating new item, so use LAST
				v2 = v2 .. qid .. tab .. 'Len' .. tab .. '"' .. label .. '"' .. eol
				if common["P31"] then -- create auto-description
					v2 = v2 .. qid .. tab .. 'Den' .. tab .. '"' .. mw.wikibase.getLabel(common["P31"])
					if common["P17"] then -- add country
						v2 = v2 .. ' in ' .. mw.wikibase.getLabel(common["P17"])
					end
					v2 = v2  .. '"' .. eol
				end
				for c = 1,#common do
					v2 = v2 .. qid .. tab .. common[c][1] .. tab .. common[c][2] .. eol
				end
			end
			local function addtov2(prop,val)
				v2 = v2 .. qid .. tab .. prop .. tab .. val .. tab .. "S143" .. tab .. wikiqid .. eol
				return v2
			end
			for cn,col in ipairs(columns) do
				if config[cn] then
					local import = true
					if entity then
						if config[cn][2] then
							if entity:getAllStatements(config[cn][2])[1] then -- statement already present for this property
								import = false
							end
						else -- no property specified to import to
							import = false
						end
					end
					if import then
						col = tidystring(col)
						if  config[cn][1] == "wikilink" then
							local val = resolveqid(col)
							if val then
								v2 = addtov2(config[cn][2],val)
							end
						elseif config[cn][1] == "year" then
							local val = string.match(col,"%d%d%d%d")
							if val then
								v2 = addtov2(config[cn][2],"+" ..val .. "-00-00T00:00:00Z/9")
							end
						elseif config[cn][1] == "quantity" then
							local val = string.gsub(col,",","") -- remove any commas
							val = string.match(val,"%d+%.?%d*") -- extract value, possibly from inside convert template
							if val then
								if string.upper(string.sub(config[cn][3],1,1)) == "Q" then
									config[cn][3] = string.sub(config[cn][3],2)
								end
								v2 = addtov2(config[cn][2],val .. "U" .. config[cn][3])
							end
						elseif config[cn][1] == "text" then
							if col ~= "" then
								v2 = addtov2(config[cn][2],'"' .. col .. '"')
							end
						elseif config[cn][1] == "coord" then
							if col ~= "" then
								local val = mw.getCurrentFrame():preprocess(col)
								v2 = addtov2(config[cn][2],"@" .. coord2text(val,"lat") .. "/" .. coord2text(val,"long"))
							end
						end
					end
				end
			end
		end
	end
	return v2
end

return p