Home
Random
Recent changes
Special pages
Community portal
Preferences
About Stockhub
Disclaimers
Search
User menu
Talk
Contributions
Create account
Log in
Editing
Module:Sandbox/PHansen/URLutil
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
-- From [[:de:Modul:URLutil]] -- Via [[:en:User:PHansen/URLutil]] -- Descriptions -- en: [[:de:Wikipedia:Lua/Modul/URLutil/en]] -- de: [[:de:Wikipedia:Lua/Modul/URLutil/de]] -- Test : [[:de:Wikipedia:Lua/Modul/URLutil/Test]] -- Wikidata: [[:d:Q10859193]] --[=[ URLutil 2014-09-20 Utilities for URL etc. on www. * getAuthority() * getFragment() * getHost() * getLocation() * getPath() * getPort() * getQuery() * getQueryTable() * getRelativePath() * getScheme() * getTLD() * getTop2domain() * getTop3domain() * isAuthority() * isDomain() * isDomainExample() * isDomainInt() * isHost() * isIP() * isIPlocal() * isIPv4() * isIPv6() * isMailAddress() * isMailLink() * isProtocolDialog() * isProtocolWiki() * isResourceURL() * isSuspiciousURL() * isUnescapedURL() * isWebURL() * wikiEscapeURL() Only [[dotted decimal]] notation for IPv4 supported. Does not support dotted hexadecimal, dotted octal, or single-number formats. IPv6 URL (bracketed) not yet implemented; might need Wikintax escaping anyway. ]=] -- table for export local URLutil = {} URLutil.getURIScheme = function ( uri ) if type( uri ) == "string" then local prot, colon, slashes = uri:match( "^%s*([a-zA-Z]*)(:?)(/?/?)" ) if #colon == 1 and #prot >= 2 then return prot:lower() elseif #slashes == 2 and #prot == 0 then return "//" end end return false end -- getURIScheme() local getTopDomain = function ( url, mode ) local r = URLutil.getHost( url ) if r then local pattern = "[%w%%]+%.%a[%w-]*%a)$" if mode == 3 then pattern = "[%w%%]+%." .. pattern end r = mw.ustring.match( "." .. r, "%.(" .. pattern ) if not r then r = false end else r = false end return r end -- getTopDomain() URLutil.getAuthority = function ( url ) local r if type( url ) == "string" then local colon, host, port local pattern = "^%s*%w*:?//([%w%.%%-]+)(:?)([%d]*)/" local s = mw.text.decode( url ) local i = s:find( "#", 6, true ) if i then s = s:sub( 1, i - 1 ) .. "/" else s = s .. "/" end host, colon, port = mw.ustring.match( s, pattern ) if URLutil.isHost( host ) then host = mw.ustring.lower( host ) if colon == ":" then if port:find( "^[1-9]" ) then r = ( host .. ":" .. port ) end elseif #port == 0 then r = host end end else r = false end return r end -- URLutil.getAuthority() URLutil.getFragment = function ( url, decode ) local r if type( url ) == "string" then local s = mw.text.decode( url ) local i = s:find( "#", 1, true ) if i then r = mw.text.trim( s:sub( i ) ):sub( 2 ) if type( decode ) == "string" then local encoding = mw.text.trim( decode ) local launch if encoding == "%" then launch = true elseif encoding == "WIKI" then r = r:gsub( "%.(%x%x)", "%%%1" ) :gsub( "_", " " ) launch = true end if launch then r = mw.uri.decode( r, "PATH" ) end end else r = false end else r = nil end return r end -- URLutil.getFragment() URLutil.getHost = function ( url ) local r = URLutil.getAuthority( url ) if r then r = mw.ustring.match( r, "^([%w%.%%-]+):?[%d]*$" ) end return r end -- URLutil.getHost() URLutil.getLocation = function ( url ) local r if type( url ) == "string" then r = mw.text.trim( url ) if r == "" then r = false else local i r = mw.text.decode( r ) i = r:find( "#", 1, true ) if i then if i == 1 then r = false else r = r:sub( 1, i - 1 ) end end end else r = nil end return r end -- URLutil.getLocation() URLutil.getPath = function ( url ) local r = URLutil.getRelativePath( url ) if r then local s = r:match( "^([^%?]*)%?" ) if s then r = s end s = r:match( "^([^#]*)#" ) if s then r = s end end return r end -- URLutil.getPath() URLutil.getPort = function ( url ) local r = URLutil.getAuthority( url ) if r then r = r:match( ":([1-9][0-9]*)$" ) if r then r = tonumber( r ) else r = false end end return r end -- URLutil.getPort() URLutil.getQuery = function ( url, key, separator ) local r = URLutil.getLocation( url ) if r then r = r:match( "^[^%?]*%?(.+)$" ) if r then if type( key ) == "string" then local single = mw.text.trim( key ) local sep = "&" local s, scan if type( separator ) == "string" then s = mw.text.trim( separator ) if s:match( "^[&;,/]$" ) then sep = s end end s = string.format( "%s%s%s", sep, r, sep ) scan = string.format( "%s%s=([^%s]*)%s", sep, key, sep, sep ) r = s:match( scan ) end end if not r then r = false end end return r end -- URLutil.getQuery() URLutil.getQueryTable = function ( url, separator ) local r = URLutil.getQuery( url ) if r then local sep = "&" local n, pairs, s, set if type( separator ) == "string" then s = mw.text.trim( separator ) if s:match( "^[&;,/]$" ) then sep = s end end pairs = mw.text.split( r, sep, true ) n = #pairs r = { } for i = 1, n do s = pairs[ i ] if s:find( "=", 2, true ) then s, set = s:match( "^([^=]+)=(.*)$" ) if s then r[ s ] = set end else r[ s ] = false end end -- for i end return r end -- URLutil.getQueryTable() URLutil.getRelativePath = function ( url ) local r if type( url ) == "string" then local s = url:match( "^%s*[a-zA-Z]*://(.*)$" ) if s then s = s:match( "[^/]+(/.*)$" ) else local x x, s = url:match( "^%s*(/?)(/.*)$" ) if x == "/" then s = s:match( "/[^/]+(/.*)$" ) end end if s then r = mw.text.trim( s ) elseif URLutil.isResourceURL( url ) then r = "/" else r = false end else r = nil end return r end -- URLutil.getRelativePath() URLutil.getScheme = function ( url ) local r if type( url ) == "string" then local pattern = "^%s*([a-zA-Z]*)(:?)(//)" local prot, colon, slashes = url:match( pattern ) r = false if slashes == "//" then if colon == ":" then if #prot > 2 then r = prot:lower() .. "://" end elseif #prot == 0 then r = "//" end end else r = nil end return r end -- URLutil.getScheme() URLutil.getTLD = function ( url ) local r = URLutil.getHost( url ) if r then r = mw.ustring.match( r, "[%w]+%.(%a[%w-]*%a)$" ) if not r then r = false end end return r end -- URLutil.getTLD() URLutil.getTop2domain = function ( url ) return getTopDomain( url, 2 ) end -- URLutil.getTop2domain() URLutil.getTop3domain = function ( url ) return getTopDomain( url, 3 ) end -- URLutil.getTop3domain() URLutil.isAuthority = function ( s ) local r if type( s ) == "string" then local pattern = "^%s*([%w%.%%-]+)(:?)(%d*)%s*$" local host, colon, port = mw.ustring.match( s, pattern ) if colon == ":" then port = port:match( "^[1-9][0-9]*$" ) if type( port ) ~= "string" then r = false end elseif port ~= "" then r = false end r = URLutil.isHost( host ) else r = nil end return r end -- URLutil.isAuthority() URLutil.isDomain = function ( s ) local r if type( s ) == "string" then local scan = "^%s*([%w%.%%-]+%w)%.(%a[%w-]*%a)%s*$" local scope s, scope = mw.ustring.match( s, scan ) if type( s ) == "string" then if mw.ustring.find( s, "^%w" ) then if mw.ustring.find( s, "..", 1, true ) then r = false else r = true end end end else r = nil end return r end -- URLutil.isDomain() URLutil.isDomainExample = function ( url ) -- RFC 2606: example.com example.net example.org example.edu local r = getTopDomain( url, 2 ) if r then local s = r:lower():match( "^example%.([a-z][a-z][a-z])$" ) if s then r = ( s == "com" or s == "edu" or s == "net" or s == "org" ) else r = false end end return r end -- URLutil.isDomainExample() URLutil.isDomainInt = function ( url ) -- Internationalized Domain Name (Punycode) local r = URLutil.getHost( url ) if r then if r:match( "^[!-~]+$" ) then local s = "." .. r if s:find( ".xn--", 1, true ) then r = true else r = false end else r = true end end return r end -- URLutil.isDomainInt() URLutil.isHost = function ( s ) return URLutil.isDomain( s ) or URLutil.isIP( s ) end -- URLutil.isHost() URLutil.isIP = function ( s ) return URLutil.isIPv4( s ) and 4 or URLutil.isIPv6( s ) and 6 end -- URLutil.isIP() URLutil.isIPlocal = function ( s ) -- IPv4 according to RFC 1918, RFC 1122; even any 0.0.0.0 (RFC 5735) local r = false local num = s:match( "^ *([01][0-9]*)%." ) if num then num = tonumber( num ) if num == 0 then r = s:match( "^ *0+%.[0-9]+%.[0-9]+%.[0-9]+ *$" ) elseif num == 10 or num == 127 then -- loopback; private/local host: 127.0.0.1 r = URLutil.isIPv4( s ) elseif num == 169 then -- 169.254.*.* elseif num == 172 then -- 172.(16...31).*.* num = s:match( "^ *0*172%.([0-9]+)%." ) if num then num = tonumber( num ) if num >= 16 and num <= 31 then r = URLutil.isIPv4( s ) end end elseif beg == 192 then -- 192.168.*.* num = s:match( "^ *0*192%.([0-9]+)%." ) if num then num = tonumber( num ) if num == 168 then r = URLutil.isIPv4( s ) end end end end if r then r = true end return r end -- URLutil.isIPlocal() URLutil.isIPv4 = function ( s ) local function legal( n ) return ( tonumber( n ) < 256 ) end local r = false if type( s ) == "string" then local p1, p2, p3, p4 = s:match( "^%s*([1-9][0-9]?[0-9]?)%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%.([12]?[0-9]?[0-9])%s*$" ) if p1 and p2 and p3 and p4 then r = legal( p1 ) and legal( p2 ) and legal( p3 ) and legal( p4 ) end end return r end -- URLutil.isIPv4() URLutil.isIPv6 = function ( s ) local dcolon, groups if type( s ) ~= "string" or s:len() == 0 or s:find( "[^:%x]" ) -- only colon and hex digits are legal chars or s:find( "^:[^:]" ) -- can begin or end with :: but not with single : or s:find( "[^:]:$" ) or s:find( ":::" ) then return false end s = mw.text.trim( s ) s, dcolon = s:gsub( "::", ":" ) if dcolon > 1 then return false end -- at most one :: s = s:gsub( "^:?", ":" ) -- prepend : if needed, upper s, groups = s:gsub( ":%x%x?%x?%x?", "" ) -- remove valid groups, and count them return ( ( dcolon == 1 and groups < 8 ) or ( dcolon == 0 and groups == 8 ) ) and ( s:len() == 0 or ( dcolon == 1 and s == ":" ) ) -- might be one dangling : if original ended with :: end -- URLutil.isIPv6() URLutil.isMailAddress = function ( s ) if type( s ) == "string" then s = mw.ustring.match( s, "^%s*[%w%.%%_-]+@([%w%.%%-]+)%s*$" ) return URLutil.isDomain( s ) end return false end -- URLutil.isMailAddress() URLutil.isMailLink = function ( s ) if type( s ) == "string" then local addr s, addr = mw.ustring.match( s, "^%s*([Mm][Aa][Ii][Ll][Tt][Oo]):(%S[%w%.%%_-]*@[%w%.%%-]+)%s*$" ) if type( s ) == "string" then if s:lower() == "mailto" then return URLutil.isMailAddress( addr ) end end end return false end -- URLutil.isMailLink() local function isProtocolAccepted( prot, supplied ) if type( prot ) == "string" then local scheme, colon, slashes = mw.ustring.match( prot, "^%s*([a-zA-Z]*)(:?)(/?/?)%s*$" ) if slashes ~= "/" then if scheme == "" then if colon ~= ":" and slashes == "//" then return true end elseif colon == ":" or slashes == "" then local s = supplied:match( " " .. scheme:lower() .. " " ) if type( s ) == "string" then return true end end end end return false end -- isProtocolAccepted() URLutil.isProtocolMW = function ( prot ) return isProtocolAccepted( prot, " http https ftp ftps ssh sftp irc ircs xmpp sip sips gopher telnet nntp worldwind mailto tel sms news svn git mms bitcoin magnet urn geo " ) end -- URLutil.isProtocolMW() URLutil.isProtocolDialog = function ( prot ) return isProtocolAccepted( prot, " mailto irc ircs ssh telnet " ) end -- URLutil.isProtocolDialog() URLutil.isProtocolWiki = function ( prot ) return isProtocolAccepted( prot, " ftp ftps git http https nntp sftp svn worldwind " ) end -- URLutil.isProtocolWiki() URLutil.isResourceURL = function ( url ) local scheme = URLutil.getScheme( url ) if scheme then local s = " // http:// https:// ftp:// sftp:// " s = s:find( string.format( " %s ", scheme ) ) if s then if URLutil.getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end end end return false end -- URLutil.isResourceURL() URLutil.isSuspiciousURL = function ( url ) if URLutil.isResourceURL( url ) then local s = URLutil.getAuthority( url ) local pat = "[%[|%]" .. mw.ustring.char( 8201, 45, 8207, 8234, 45, 8239, 8288 ) .. "]" if s:find( "@" ) or url:find( "''" ) or url:find( pat ) or url:find( "[%.,]$" ) then return true end -- TODO zero width character ?? return false end return true end -- URLutil.isSuspiciousURL() URLutil.isUnescapedURL = function ( url, trailing ) if type( trailing ) ~= "string" then if URLutil.isWebURL( url ) then if url:match( "[%[|%]]" ) then return true end end end return false end -- URLutil.isUnescapedURL() URLutil.isWebURL = function ( url ) if URLutil.getScheme( url ) and URLutil.getAuthority( url ) then if not url:match( "%S%s+%S" ) then return true end end return false end -- URLutil.isWebURL() URLutil.wikiEscapeURL = function ( url ) if url:find( "[%[|%]]" ) then local n url, n = url:gsub( "%[", "[" ) :gsub( "|", "|" ) :gsub( "%]", "]" ) end return url end -- URLutil.wikiEscapeURL() -- Provide template access and expose URLutil table to require local p = {} function p.getURIScheme( frame ) return URLutil.getURIScheme( frame.args[ 1 ] ) or "" end function p.getAuthority( frame ) return URLutil.getAuthority( frame.args[ 1 ] ) or "" end function p.getFragment( frame ) local r = URLutil.getFragment( frame.args[ 1 ], frame.args[ 2 ] ) if r then r = "#" .. r else r = "" end return r end function p.getHost( frame ) return URLutil.getHost( frame.args[ 1 ] ) or "" end function p.getLocation( frame ) return URLutil.getLocation( frame.args[ 1 ] ) or "" end function p.getPath( frame ) return URLutil.getPath( frame.args[ 1 ] ) or "" end function p.getPort( frame ) return URLutil.getPort( frame.args[ 1 ] ) or "" end function p.getQuery( frame ) local r local key = frame.args[ 2 ] if key then key = mw.text.trim( key ) if key == "" then key = nil end end r = URLutil.getQuery( frame.args[ 1 ], key, frame.args[ 3 ] ) if r then if not key then r = "?" .. r end else r = "" end return r end function p.getRelativePath( frame ) return URLutil.getRelativePath( frame.args[ 1 ] ) or "" end function p.getScheme( frame ) return URLutil.getScheme( frame.args[ 1 ] ) or "" end function p.getTLD( frame ) return URLutil.getTLD( frame.args[ 1 ] ) or "" end function p.getTop2domain( frame ) return URLutil.getTop2domain( frame.args[ 1 ] ) or "" end function p.getTop3domain( frame ) return URLutil.getTop3domain( frame.args[ 1 ] ) or "" end function p.isAuthority( frame ) return URLutil.isAuthority( frame.args[ 1 ] ) and "1" or "" end function p.isDomain( frame ) return URLutil.isDomain( frame.args[ 1 ] ) and "1" or "" end function p.isDomainExample( frame ) return URLutil.isDomainExample( frame.args[ 1 ] ) and "1" or "" end function p.isDomainInt( frame ) return URLutil.isDomainInt( frame.args[ 1 ] ) and "1" or "" end function p.isHost( frame ) return URLutil.isHost( frame.args[ 1 ] ) and "1" or "" end function p.isIP( frame ) return URLutil.isIP( frame.args[ 1 ] ) or "" end function p.isIPlocal( frame ) return URLutil.isIPlocal( frame.args[ 1 ] ) and "1" or "" end function p.isIPv4( frame ) return URLutil.isIPv4( frame.args[ 1 ] ) and "1" or "" end function p.isIPv6( frame ) return URLutil.isIPv6( frame.args[ 1 ] ) and "1" or "" end function p.isMailAddress( frame ) return URLutil.isMailAddress( frame.args[ 1 ] ) and "1" or "" end function p.isMailLink( frame ) return URLutil.isMailLink( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolMW( frame ) return URLutil.isProtocolMW( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolDialog( frame ) return URLutil.isProtocolDialog( frame.args[ 1 ] ) and "1" or "" end function p.isProtocolWiki( frame ) return URLutil.isProtocolWiki( frame.args[ 1 ] ) and "1" or "" end function p.isResourceURL( frame ) return URLutil.isResourceURL( frame.args[ 1 ] ) and "1" or "" end function p.isSuspiciousURL( frame ) return URLutil.isSuspiciousURL( frame.args[ 1 ] ) and "1" or "" end function p.isUnescapedURL( frame ) return URLutil.isUnescapedURL( frame.args[ 1 ], frame.args[ 2 ] ) and "1" or "" end function p.isWebURL( frame ) return URLutil.isWebURL( frame.args[ 1 ] ) and "1" or "" end function p.wikiEscapeURL( frame ) return URLutil.wikiEscapeURL( frame.args[ 1 ] ) end function p.URLutil() return URLutil end return p
Summary:
Please note that all contributions to Stockhub may be edited, altered, or removed by other contributors. If you do not want your writing to be edited mercilessly, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource (see
Stockhub:Copyrights
for details).
Do not submit copyrighted work without permission!
Cancel
Editing help
(opens in new window)
Template used on this page:
Module:Sandbox/PHansen/URLutil/doc
(
edit
)