uri-tools.js 4.48 KB
Newer Older
1
// =============================================================================
2
// Library to deal with URI.
3
// =============================================================================
4

5 6
.pragma library

7 8 9 10 11 12
// Options.

// If true, strings starting with `www.` can be detected.
// Not standard but helpful.
var SUPPORTS_URL = true

13
// Level 0. --------------------------------------------------------------------
14

15
var URI_PCT_ENCODED = '%[A-Fa-f\\d]{2}'
16
var URI_PORT =  '\\d*'
17
var URI_SCHEME = '[a-zA-Z][\\w+\-\.]*'
18
var URI_SUB_DELIMS = '[!$&\'()*+,;=]'
19
var URI_UNRESERVED = '[\\w\-\._~]'
20

21
// Level 1. --------------------------------------------------------------------
22 23 24 25 26 27 28 29 30

var URI_HOST = '(' +
  '(' +
    URI_UNRESERVED +
    '|' + URI_PCT_ENCODED +
    '|' + URI_SUB_DELIMS +
  ')*' +
')'

31
var URI_PCHAR = '(?:' +
32 33 34 35 36 37
  URI_UNRESERVED +
  '|' + URI_PCT_ENCODED +
  '|' + URI_SUB_DELIMS +
  '|' + '[:@]' +
')'

38 39 40 41 42 43
var URI_USERINFO = '(?:' +
  URI_UNRESERVED +
  '|' + URI_PCT_ENCODED +
  '|' + URI_SUB_DELIMS +
  '|' + ':' +
')*'
44

45
// Level 2. --------------------------------------------------------------------
46

47 48 49
var URI_AUTHORITY = '(?:' + URI_USERINFO + '@' + ')?' +
  URI_HOST +
  '(?:' + ':' + URI_PORT + ')?'
50

51 52 53 54
var URI_FRAGMENT = '(?:' +
  URI_PCHAR +
  '|' + '[/?]' +
')*'
55

56 57 58 59
var URI_QUERY = '(?:' +
  URI_PCHAR +
  '|' + '[/?]' +
')*'
60

61 62
var URI_SEGMENT = URI_PCHAR + '*'
var URI_SEGMENT_NZ = URI_PCHAR + '+'
63

64
// Level 3. --------------------------------------------------------------------
65

66
var URI_PATH_ABEMPTY = '(?:' + '/' + URI_SEGMENT + ')*'
67

68 69
var URI_PATH_ABSOLUTE = '/' +
  '(?:' + URI_SEGMENT_NZ + '(?:' + '/' + URI_SEGMENT + ')*' + ')?'
70

71 72
var URI_PATH_ROOTLESS =
  URI_SEGMENT_NZ + '(?:' + '/' + URI_SEGMENT + ')*'
73

74
// Level 4. --------------------------------------------------------------------
75

76
// `path-empty` not used.
77
var URI_HIER_PART = '(?:' +
78 79 80 81 82
  '//' + URI_AUTHORITY + URI_PATH_ABEMPTY +
  '|' + URI_PATH_ABSOLUTE +
  '|' + URI_PATH_ROOTLESS +
')'

83
// Level 5. --------------------------------------------------------------------
84 85

// Regex to match URI. It respects the RFC 3986.
86
// But many features are not supported like IP format.
87 88 89 90 91
var URI = (SUPPORTS_URL
  ? '(?:' + URI_SCHEME + ':' + '|' + 'www\\.' + ')'
  :  URI_SCHEME + ':'
) + URI_HIER_PART + '(?:' + '\\?' + URI_QUERY + ')?' +
'(?:' + '#' + URI_FRAGMENT + ')?'
92

93 94
var URI_REGEX = new RegExp(URI, 'g')

95
// =============================================================================
96

97
/* TODO: Supports:
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

   URI-reference = URI / relative-ref

   absolute-URI  = scheme ":" hier-part [ "?" query ]

   relative-ref  = relative-part [ "?" query ] [ "#" fragment ]

   relative-part = "//" authority path-abempty
                 / path-absolute
                 / path-noscheme
                 / path-empty

   host          = IP-literal / IPv4address / reg-name

   IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"

   IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )

   IPv6address   =                            6( h16 ":" ) ls32
                 /                       "::" 5( h16 ":" ) ls32
                 / [               h16 ] "::" 4( h16 ":" ) ls32
                 / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
                 / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
                 / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
                 / [ *4( h16 ":" ) h16 ] "::"              ls32
                 / [ *5( h16 ":" ) h16 ] "::"              h16
                 / [ *6( h16 ":" ) h16 ] "::"

   h16           = 1*4HEXDIG
   ls32          = ( h16 ":" h16 ) / IPv4address
   IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet

   dec-octet     = DIGIT                 ; 0-9
                 / %x31-39 DIGIT         ; 10-99
                 / "1" 2DIGIT            ; 100-199
                 / "2" %x30-34 DIGIT     ; 200-249
                 / "25" %x30-35          ; 250-255

   reg-name      = *( unreserved / pct-encoded / sub-delims )

   path          = path-abempty    ; begins with "/" or is empty
                 / path-absolute   ; begins with "/" but not "//"
                 / path-noscheme   ; begins with a non-colon segment
                 / path-rootless   ; begins with a segment
                 / path-empty      ; zero characters

   path-noscheme = segment-nz-nc *( "/" segment )

   segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
                 ; non-zero-length segment without any colon ":"

   reserved      = gen-delims / sub-delims
   gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
*/