Refactored; move URL parser to separate function

This commit is contained in:
Rob Wu
2013-08-28 17:35:23 +02:00
parent c37e496dee
commit de0318bc4c

View File

@@ -36,7 +36,7 @@ function showUsage(headers, response) {
* @param hostname {string} Host name (excluding port) of requested resource.
* @return {boolean} Whether the requested resource can be accessed.
*/
function hasNoContent(hostname) {
function isValidHostName(hostname) {
return !(
regexp_tld.test(hostname) ||
net.isIPv4(hostname) ||
@@ -84,46 +84,83 @@ function isForbidden(host) {
* @param req {ServerRequest} Incoming http request
* @param res {ServerResponse} Outgoing (proxied) http request
* @param proxy {HttpProxy}
* @param full_url {string} Canonical URL of outgoing (proxied) http request.
* @param isRequestedOverHttps {boolean} Whether the incoming request originates from https
* @param location {object} See parseURL
* @param requestState.proxyBaseUrl {string} Base URL of the CORS API endpoint.
*/
function proxyRequest(req, res, proxy, full_url, isRequestedOverHttps, proxyOptions) {
if (isForbidden(proxyOptions.host)) {
res.writeHead(403, 'Refused to visit', withCORS({'Location': full_url}, req));
function proxyRequest(req, res, proxy, location, requestState) {
if (isForbidden(location.hostname)) {
res.writeHead(403, 'Refused to visit', withCORS({'Location': location.full_url}, req));
return;
}
var realHost = req.headers.host;
req.url = location.pathAndQueryString;
// Let the "Host" header be the host part of the path (including port, if specified).
req.headers.host = full_url.split('/', 3)[2];
req.headers.host = location.host;
// "Allow observer to modify headers or abort response"
// https://github.com/nodejitsu/node-http-proxy/blob/ebbba73e/lib/node-http-proxy/http-proxy.js#L321-L322
proxy.on('proxyResponse', function(req, res, response) {
withCORS(response.headers, req);
proxy.once('proxyResponse', function(req, res, response) {
var statusCode = response.statusCode;
// Handle redirects
if (statusCode === 301 || statusCode === 302 || statusCode === 303 || statusCode === 307 || statusCode === 308) {
var locationHeader = response.headers['location'];
if (locationHeader) {
response.headers['location'] = (isRequestedOverHttps ? 'https://' : 'http://') + realHost + '/' +
url.resolve(full_url, locationHeader);
locationHeader = url.resolve(location.full_url, locationHeader);
response.headers['location'] = requestState.proxyBaseUrl + '/' + locationHeader;
}
}
withCORS(response.headers, req);
// Don't slip through cookies
delete response.headers['set-cookie'];
delete response.headers['set-cookie2'];
response.headers['x-request-url'] = full_url;
response.headers['x-request-url'] = location.full_url;
});
// Start proxying the request
proxy.proxyRequest(req, res, proxyOptions);
proxy.proxyRequest(req, res, {
host: location.hostname,
port: location.port,
target: {
https: location.isHttps
}
});
}
/**
* @param req_url {string} The requested URL (scheme is optional).
* @return {object} Strings: full_url, host, hostname, pathAndQueryString
* Number: port
* boolean: isHttps
*/
function parseURL(req_url) {
var match = req_url.match(/^(?:(https?:)?\/\/)?(([^\/?]+?)(?::(\d{0,5})(?=[\/?]|$))?)([\/?][\S\s]*|$)/i);
// ^^^^^^^ ^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^
// 1:protocol 3:hostname 4:port 5:path + query string
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// 2:host
if (!match) {
return null;
}
var isHttps = (match[1] && match[1].toLowerCase()) === 'https:';
var location = {
full_url: match[0],
isHttps: isHttps,
host: match[2],
hostname: match[3],
port: match[4] ? +match[4] : (isHttps ? 443 : 80),
pathAndQueryString: match[5]
};
if (!match[1]) { // Scheme is omitted.
location.full_url = (location.port === 443 ? 'https:' : 'http:') + location.full_url.replace(/^(?!\/)/, '//');
}
return location;
}
// Request handler factory
var getHandler = exports.getHandler = function(options) {
var corsAnywhere = {
@@ -162,71 +199,55 @@ var getHandler = exports.getHandler = function(options) {
res.writeHead(200, cors_headers);
res.end();
return;
} else {
// Actual request. First, extract the desired URL from the request:
var full_url, host, hostname, port, path, match, isHttps;
match = req.url.match(/^\/(?:(https?:)?\/\/)?(([^\/?]+?)(?::(\d{0,5})(?=[\/?]|$))?)([\/?][\S\s]*|$)/i);
// ^^^^^^^ ^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^
// 1:protocol 3:hostname 4:port 5:path + query string
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// 2:host
if (!match || (match[2].indexOf('.') === -1 && match[2].indexOf(':') === -1)) {
if (match && match[2] === 'iscorsneeded') {
// Is CORS needed? This path is provided so that API consumers can test whether it's necessary
// to use CORS. The server's reply is always No, because if they can read it, then CORS headers
// are not necessary.
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('no');
} else {
// Incorrect usage. Show how to do it correctly.
showUsage(cors_headers, res);
}
return;
} else if (match[4] > 65535) {
// Port is higher than 65535
res.writeHead(400, 'Invalid port', cors_headers);
res.end('Invalid port: ' + match[4]);
return;
} else if ( hasNoContent(match[3]) ) {
// Don't even try to proxy invalid hosts (such as /favicon.ico, /robots.txt)
res.writeHead(404, 'Invalid host', cors_headers);
res.end('Invalid host: ' + match[3]);
return;
} else if (!hasRequiredHeaders(req.headers)) {
res.writeHead(400, 'Header required', cors_headers);
res.end('Missing required request header. Must specify one of: ' + corsAnywhere.requireHeader);
return;
} else {
full_url = match[0].substr(1);
isHttps = (match[1] && match[1].toLowerCase()) === 'https:';
host = match[2];
hostname = match[3];
// Read port from input: :<port> / 443 if https / 80 by default
port = match[4] ? +match[4] : (isHttps ? 443 : 80);
path = match[5];
if (!match[1]) {
if (full_url.charAt(0) !== '/') full_url = '//' + full_url;
full_url = (port === 443 ? 'https:' : 'http:') + full_url;
}
}
// Change the requested path:
req.url = path;
var isRequestedOverHttps = req.connection.encrypted || /^\s*https/.test(req.headers['x-forwarded-proto']);
corsAnywhere.removeHeaders.forEach(function(header) {
delete req.headers[header];
});
proxyRequest(req, res, proxy, full_url, isRequestedOverHttps, {
host: hostname,
port: port,
target: {
https: isHttps
}
});
}
var location = parseURL(req.url.slice(1));
if (!location) {
// Invalid API call. Show how to correctly use the API
showUsage(cors_headers, res);
return;
}
if (!hasRequiredHeaders(req.headers)) {
res.writeHead(400, 'Header required', cors_headers);
res.end('Missing required request header. Must specify one of: ' + corsAnywhere.requireHeader);
return;
}
if (location.host === 'iscorsneeded') {
// Is CORS needed? This path is provided so that API consumers can test whether it's necessary
// to use CORS. The server's reply is always No, because if they can read it, then CORS headers
// are not necessary.
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('no');
return;
}
if (location.port > 65535) {
// Port is higher than 65535
res.writeHead(400, 'Invalid port', cors_headers);
res.end('Port number too large: ' + location.port);
return;
}
if (isValidHostName(location.hostname)) {
// Don't even try to proxy invalid hosts (such as /favicon.ico, /robots.txt)
res.writeHead(404, 'Invalid host', cors_headers);
res.end('Invalid host: ' + location.hostname);
return;
}
var isRequestedOverHttps = req.connection.encrypted || /^\s*https/.test(req.headers['x-forwarded-proto']);
var proxyBaseUrl = (isRequestedOverHttps ? 'https://' : 'http://') + req.headers.host;
corsAnywhere.removeHeaders.forEach(function(header) {
delete req.headers[header];
});
proxyRequest(req, res, proxy, location, {
proxyBaseUrl: proxyBaseUrl
});
};
};