From 8568c06b176dcd0abd956f8432d72d92ceac9248 Mon Sep 17 00:00:00 2001 From: Rob Wu Date: Fri, 19 Feb 2016 00:22:20 +0100 Subject: [PATCH] Support proxying through a proxy - #37 Automatically respects proxy headers from environment variables, using https://github.com/Rob--W/proxy-from-env --- README.md | 4 +++ lib/cors-anywhere.js | 31 ++++++++++++++---- package.json | 4 ++- test/setup.js | 6 ++++ test/test.js | 75 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index be144e6..1788b40 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,10 @@ The module exports two properties: `getHandler` and `createServer`. The following options are recognized by both methods: +* function `getProxyForUrl` - If set, specifies which intermediate proxy to use for a given URL. + If the return value is void, a direct request is sent. The default implementation is + [`proxy-from-env`](https://github.com/Rob--W/proxy-from-env), which respects the standard proxy + environment variables (e.g. `https_proxy`, `no_proxy`, etc.). * array of strings `originBlacklist` - If set, requests whose origin is listed are blocked. Example: `['https://bad.example.com', 'http://bad.example.com']` * array of strings `originWhitelist` - If set, requests whose origin is not listed are blocked. diff --git a/lib/cors-anywhere.js b/lib/cors-anywhere.js index 4f8da67..01c2388 100644 --- a/lib/cors-anywhere.js +++ b/lib/cors-anywhere.js @@ -7,6 +7,8 @@ var httpProxy = require('http-proxy'); var net = require('net'); var url = require('url'); var regexp_tld = require('./regexp-top-level-domain'); +var getProxyForUrl = require('proxy-from-env').getProxyForUrl; +var requiresPort = require('requires-port'); var help_file = __dirname + '/help.txt'; var help_text; @@ -74,15 +76,29 @@ function withCORS(headers, request) { */ function proxyRequest(req, res, proxy) { var location = req.corsAnywhereRequestState.location; - req.url = location.path; - // Start proxying the request - proxy.web(req, res, { - changeOrigin: true, + var proxyOptions = { + changeOrigin: false, prependPath: false, - target: location - }); + target: location, + headers: { + host: requiresPort(location.port, location.protocol) && !/:\d*$/.test(location.host) ? + location.host + ':' + location.port : + location.host, + }, + }; + var proxyThroughUrl = req.corsAnywhereRequestState.getProxyForUrl(location.href); + if (proxyThroughUrl) { + proxyOptions.target = proxyThroughUrl; + proxyOptions.toProxy = true; + // If a proxy URL was set, req.url must be an absolute URL. Then the request will not be sent + // directly to the proxied URL, but through another proxy. + req.url = location.href; + } + + // Start proxying the request + proxy.web(req, res, proxyOptions); } /** @@ -97,6 +113,7 @@ function proxyRequest(req, res, proxy) { * @param req {IncomingMessage} Incoming HTTP request, augmented with property corsAnywhereRequestState * @param req.corsAnywhereRequestState {object} * @param req.corsAnywhereRequestState.location {object} See parseURL + * @param req.corsAnywhereRequestState.getProxyForUrl {function} See proxyRequest * @param req.corsAnywhereRequestState.proxyBaseUrl {string} Base URL of the CORS API endpoint * @param req.corsAnywhereRequestState.maxRedirects {number} Maximum number of redirects * @param req.corsAnywhereRequestState.redirectCount_ {number} Internally used to count redirects @@ -196,6 +213,7 @@ function parseURL(req_url) { // Request handler factory var getHandler = exports.getHandler = function(options, proxy) { var corsAnywhere = { + getProxyForUrl: getProxyForUrl, // Function that specifies the proxy to use maxRedirects: 5, // Maximum number of redirects to be followed. originBlacklist: [], // Requests from these origins will be blocked. originWhitelist: [], // If non-empty, requests not from an origin in this list will be blocked. @@ -300,6 +318,7 @@ var getHandler = exports.getHandler = function(options, proxy) { req.corsAnywhereRequestState = { location: location, + getProxyForUrl: corsAnywhere.getProxyForUrl, maxRedirects: corsAnywhere.maxRedirects, proxyBaseUrl: proxyBaseUrl }; diff --git a/package.json b/package.json index 8d93806..f90a3fc 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,9 @@ ], "main": "./lib/cors-anywhere.js", "dependencies": { - "http-proxy": "1.11.1" + "http-proxy": "1.11.1", + "proxy-from-env": "0.0.1", + "requires-port": "1.0.0" }, "devDependencies": { "mocha": "~2.2.4", diff --git a/test/setup.js b/test/setup.js index fb87d9b..62f3903 100644 --- a/test/setup.js +++ b/test/setup.js @@ -93,6 +93,12 @@ nock('http://example.com') .replyWithError('throw node') ; +nock('https://example.com') + .persist() + .get('/') + .reply(200, 'Response from https://example.com') +; + echoheaders('http://example.com'); echoheaders('http://example.com:1337'); echoheaders('https://example.com'); diff --git a/test/test.js b/test/test.js index 493853e..667bfb8 100644 --- a/test/test.js +++ b/test/test.js @@ -3,6 +3,7 @@ require('./setup'); var createServer = require('../').createServer; var request = require('supertest'); var path = require('path'); +var http = require('http'); var fs = require('fs'); var assert = require('assert'); @@ -563,3 +564,77 @@ describe('httpProxyOptions.xfwd=false', function() { }, done); }); }); + +describe('httpProxyOptions.getProxyForUrl', function() { + var proxy_server; + var proxy_url; + before(function() { + // Using a real server instead of a mock because Nock doesn't can't mock proxies. + proxy_server = http.createServer(function(req, res) { + res.end(req.method + ' ' + req.url + ' Host=' + req.headers.host); + }); + proxy_url = 'http://127.0.0.1:' + proxy_server.listen(0).address().port; + + cors_anywhere = createServer({ + httpProxyOptions: { + xfwd: false + } + }); + cors_anywhere_port = cors_anywhere.listen(0).address().port; + }); + afterEach(function() { + // Assuming that they were not set before. + delete process.env.https_proxy; + delete process.env.http_proxy; + delete process.env.no_proxy; + }); + after(function(done) { + proxy_server.close(function() { + done(); + }); + }); + after(stopServer); + + it('http_proxy should be respected for matching domains', function(done) { + process.env.http_proxy = proxy_url; + + request(cors_anywhere) + .get('/http://example.com') + .expect('Access-Control-Allow-Origin', '*') + .expect(200, 'GET http://example.com/ Host=example.com', done); + }); + + it('http_proxy should be ignored for http URLs', function(done) { + process.env.http_proxy = proxy_url; + request(cors_anywhere) + .get('/https://example.com') + .expect('Access-Control-Allow-Origin', '*') + .expect(200, 'Response from https://example.com', done); + }); + + it('https_proxy should be respected for matching domains', function(done) { + process.env.https_proxy = proxy_url; + + request(cors_anywhere) + .get('/https://example.com') + .expect('Access-Control-Allow-Origin', '*') + .expect(200, 'GET https://example.com/ Host=example.com', done); + }); + + it('https_proxy should be ignored for http URLs', function(done) { + process.env.https_proxy = proxy_url; + request(cors_anywhere) + .get('/http://example.com') + .expect('Access-Control-Allow-Origin', '*') + .expect(200, 'Response from example.com', done); + }); + + it('https_proxy + no_proxy should not intercept requests in no_proxy', function(done) { + process.env.https_proxy = proxy_url; + process.env.no_proxy = 'example.com:443'; + request(cors_anywhere) + .get('/https://example.com') + .expect('Access-Control-Allow-Origin', '*') + .expect(200, 'Response from https://example.com', done); + }); +});