From de0318bc4cd3d6e781396c4bae86e4b2d6ea8817 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Wed, 28 Aug 2013 17:35:23 +0200 Subject: [PATCH] Refactored; move URL parser to separate function --- lib/cors-anywhere.js | 177 ++++++++++++++++++++++++------------------- 1 file changed, 99 insertions(+), 78 deletions(-) diff --git a/lib/cors-anywhere.js b/lib/cors-anywhere.js index 0fd6941..cf5ba6a 100644 --- a/lib/cors-anywhere.js +++ b/lib/cors-anywhere.js @@ -36,7 +36,7 @@ function showUsage(headers, response) { * @param hostname {string} Host name (excluding port) of requested resource. * @return {boolean} Whether the requested resource can be accessed. */ -function hasNoContent(hostname) { +function isValidHostName(hostname) { return !( regexp_tld.test(hostname) || net.isIPv4(hostname) || @@ -84,46 +84,83 @@ function isForbidden(host) { * @param req {ServerRequest} Incoming http request * @param res {ServerResponse} Outgoing (proxied) http request * @param proxy {HttpProxy} - * @param full_url {string} Canonical URL of outgoing (proxied) http request. - * @param isRequestedOverHttps {boolean} Whether the incoming request originates from https + * @param location {object} See parseURL + * @param requestState.proxyBaseUrl {string} Base URL of the CORS API endpoint. */ -function proxyRequest(req, res, proxy, full_url, isRequestedOverHttps, proxyOptions) { - if (isForbidden(proxyOptions.host)) { - res.writeHead(403, 'Refused to visit', withCORS({'Location': full_url}, req)); +function proxyRequest(req, res, proxy, location, requestState) { + if (isForbidden(location.hostname)) { + res.writeHead(403, 'Refused to visit', withCORS({'Location': location.full_url}, req)); return; } - var realHost = req.headers.host; + req.url = location.pathAndQueryString; // Let the "Host" header be the host part of the path (including port, if specified). - req.headers.host = full_url.split('/', 3)[2]; + req.headers.host = location.host; // "Allow observer to modify headers or abort response" // https://github.com/nodejitsu/node-http-proxy/blob/ebbba73e/lib/node-http-proxy/http-proxy.js#L321-L322 - proxy.on('proxyResponse', function(req, res, response) { - withCORS(response.headers, req); + proxy.once('proxyResponse', function(req, res, response) { var statusCode = response.statusCode; // Handle redirects if (statusCode === 301 || statusCode === 302 || statusCode === 303 || statusCode === 307 || statusCode === 308) { var locationHeader = response.headers['location']; if (locationHeader) { - response.headers['location'] = (isRequestedOverHttps ? 'https://' : 'http://') + realHost + '/' + - url.resolve(full_url, locationHeader); + locationHeader = url.resolve(location.full_url, locationHeader); + response.headers['location'] = requestState.proxyBaseUrl + '/' + locationHeader; } } + withCORS(response.headers, req); // Don't slip through cookies delete response.headers['set-cookie']; delete response.headers['set-cookie2']; - response.headers['x-request-url'] = full_url; + response.headers['x-request-url'] = location.full_url; }); // Start proxying the request - proxy.proxyRequest(req, res, proxyOptions); + proxy.proxyRequest(req, res, { + host: location.hostname, + port: location.port, + target: { + https: location.isHttps + } + }); } +/** + * @param req_url {string} The requested URL (scheme is optional). + * @return {object} Strings: full_url, host, hostname, pathAndQueryString + * Number: port + * boolean: isHttps + */ +function parseURL(req_url) { + var match = req_url.match(/^(?:(https?:)?\/\/)?(([^\/?]+?)(?::(\d{0,5})(?=[\/?]|$))?)([\/?][\S\s]*|$)/i); + // ^^^^^^^ ^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^ + // 1:protocol 3:hostname 4:port 5:path + query string + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + // 2:host + if (!match) { + return null; + } + var isHttps = (match[1] && match[1].toLowerCase()) === 'https:'; + var location = { + full_url: match[0], + isHttps: isHttps, + host: match[2], + hostname: match[3], + port: match[4] ? +match[4] : (isHttps ? 443 : 80), + pathAndQueryString: match[5] + }; + + if (!match[1]) { // Scheme is omitted. + location.full_url = (location.port === 443 ? 'https:' : 'http:') + location.full_url.replace(/^(?!\/)/, '//'); + } + return location; +} + // Request handler factory var getHandler = exports.getHandler = function(options) { var corsAnywhere = { @@ -162,71 +199,55 @@ var getHandler = exports.getHandler = function(options) { res.writeHead(200, cors_headers); res.end(); return; - } else { - // Actual request. First, extract the desired URL from the request: - var full_url, host, hostname, port, path, match, isHttps; - match = req.url.match(/^\/(?:(https?:)?\/\/)?(([^\/?]+?)(?::(\d{0,5})(?=[\/?]|$))?)([\/?][\S\s]*|$)/i); - // ^^^^^^^ ^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^ - // 1:protocol 3:hostname 4:port 5:path + query string - // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - // 2:host - if (!match || (match[2].indexOf('.') === -1 && match[2].indexOf(':') === -1)) { - if (match && match[2] === 'iscorsneeded') { - // Is CORS needed? This path is provided so that API consumers can test whether it's necessary - // to use CORS. The server's reply is always No, because if they can read it, then CORS headers - // are not necessary. - res.writeHead(200, {'Content-Type': 'text/plain'}); - res.end('no'); - } else { - // Incorrect usage. Show how to do it correctly. - showUsage(cors_headers, res); - } - return; - } else if (match[4] > 65535) { - // Port is higher than 65535 - res.writeHead(400, 'Invalid port', cors_headers); - res.end('Invalid port: ' + match[4]); - return; - } else if ( hasNoContent(match[3]) ) { - // Don't even try to proxy invalid hosts (such as /favicon.ico, /robots.txt) - res.writeHead(404, 'Invalid host', cors_headers); - res.end('Invalid host: ' + match[3]); - return; - } else if (!hasRequiredHeaders(req.headers)) { - res.writeHead(400, 'Header required', cors_headers); - res.end('Missing required request header. Must specify one of: ' + corsAnywhere.requireHeader); - return; - } else { - full_url = match[0].substr(1); - isHttps = (match[1] && match[1].toLowerCase()) === 'https:'; - host = match[2]; - hostname = match[3]; - // Read port from input: : / 443 if https / 80 by default - port = match[4] ? +match[4] : (isHttps ? 443 : 80); - path = match[5]; - - if (!match[1]) { - if (full_url.charAt(0) !== '/') full_url = '//' + full_url; - full_url = (port === 443 ? 'https:' : 'http:') + full_url; - } - } - // Change the requested path: - req.url = path; - - var isRequestedOverHttps = req.connection.encrypted || /^\s*https/.test(req.headers['x-forwarded-proto']); - - corsAnywhere.removeHeaders.forEach(function(header) { - delete req.headers[header]; - }); - - proxyRequest(req, res, proxy, full_url, isRequestedOverHttps, { - host: hostname, - port: port, - target: { - https: isHttps - } - }); } + + var location = parseURL(req.url.slice(1)); + + if (!location) { + // Invalid API call. Show how to correctly use the API + showUsage(cors_headers, res); + return; + } + + if (!hasRequiredHeaders(req.headers)) { + res.writeHead(400, 'Header required', cors_headers); + res.end('Missing required request header. Must specify one of: ' + corsAnywhere.requireHeader); + return; + } + + if (location.host === 'iscorsneeded') { + // Is CORS needed? This path is provided so that API consumers can test whether it's necessary + // to use CORS. The server's reply is always No, because if they can read it, then CORS headers + // are not necessary. + res.writeHead(200, {'Content-Type': 'text/plain'}); + res.end('no'); + return; + } + + if (location.port > 65535) { + // Port is higher than 65535 + res.writeHead(400, 'Invalid port', cors_headers); + res.end('Port number too large: ' + location.port); + return; + } + + if (isValidHostName(location.hostname)) { + // Don't even try to proxy invalid hosts (such as /favicon.ico, /robots.txt) + res.writeHead(404, 'Invalid host', cors_headers); + res.end('Invalid host: ' + location.hostname); + return; + } + + var isRequestedOverHttps = req.connection.encrypted || /^\s*https/.test(req.headers['x-forwarded-proto']); + var proxyBaseUrl = (isRequestedOverHttps ? 'https://' : 'http://') + req.headers.host; + + corsAnywhere.removeHeaders.forEach(function(header) { + delete req.headers[header]; + }); + + proxyRequest(req, res, proxy, location, { + proxyBaseUrl: proxyBaseUrl + }); }; };