From 17aebb61025825feda76c87562862f6bdaf9a0c1 Mon Sep 17 00:00:00 2001 From: Nicholas Bollweg Date: Sat, 25 Jul 2015 13:11:15 -0400 Subject: [PATCH] various URL fixing mechanisms for #480 --- nbviewer/providers/base.py | 20 +++++++++++++++++--- nbviewer/providers/dropbox/handlers.py | 2 +- nbviewer/providers/url/handlers.py | 9 ++++++--- nbviewer/tests/test_utils.py | 2 +- nbviewer/utils.py | 13 ++++++++----- 5 files changed, 33 insertions(+), 13 deletions(-) diff --git a/nbviewer/providers/base.py b/nbviewer/providers/base.py index a6b2fe27..5b1002e4 100644 --- a/nbviewer/providers/base.py +++ b/nbviewer/providers/base.py @@ -17,10 +17,10 @@ try: # py3 from http.client import responses - from urllib.parse import urlparse + from urllib.parse import urlparse, urlunparse except ImportError: from httplib import responses - from urlparse import urlparse + from urlparse import urlparse, urlunparse from tornado import ( gen, @@ -66,8 +66,22 @@ def initialize(self, format=None, format_prefix=""): # Overloaded methods def redirect(self, url, *args, **kwargs): + purl = urlparse(url) + + eurl = urlunparse(( + purl.scheme, + purl.netloc, + "/".join([ + url_escape(url_unescape(p), plus=False) + for p in purl.path.split("/") + ]), + purl.params, + purl.query, + purl.fragment + )) + return super(BaseHandler, self).redirect( - "/".join(map(url_escape, url.split("/"))), + eurl, *args, **kwargs ) diff --git a/nbviewer/providers/dropbox/handlers.py b/nbviewer/providers/dropbox/handlers.py index d90fcf2d..d35e9232 100644 --- a/nbviewer/providers/dropbox/handlers.py +++ b/nbviewer/providers/dropbox/handlers.py @@ -7,6 +7,6 @@ def uri_rewrites(rewrites=[]): return rewrites + [ - (r'^http(s?)://www.dropbox.com/(sh?)/(.+)$', + (r'^http(s?)://www.dropbox.com/(sh?)/(.+?)(\?dl=.)*$', u'/url{0}/dl.dropbox.com/{1}/{2}'), ] diff --git a/nbviewer/providers/url/handlers.py b/nbviewer/providers/url/handlers.py index f3dff792..cbb997df 100644 --- a/nbviewer/providers/url/handlers.py +++ b/nbviewer/providers/url/handlers.py @@ -19,6 +19,7 @@ web, ) from tornado.log import app_log +from tornado.escape import url_unescape from ...utils import ( quote, @@ -35,15 +36,17 @@ class URLHandler(RenderingHandler): """Renderer for /url or /urls""" @cached @gen.coroutine - def get(self, secure, url): + def get(self, secure, netloc, url): proto = 'http' + secure + netloc = url_unescape(netloc) if '/?' in url: url, query = url.rsplit('/?', 1) else: query = None - remote_url = u"{}://{}".format(proto, quote(url)) + remote_url = u"{}://{}/{}".format(proto, netloc, quote(url)) + if query: remote_url = remote_url + '?' + query if not url.endswith('.ipynb'): @@ -95,7 +98,7 @@ def default_handlers(handlers=[]): """Tornado handlers""" return handlers + [ - (r'/url([s]?)/(.*)', URLHandler), + (r'/url([s]?)/([^/]+)/(.*)', URLHandler), ] diff --git a/nbviewer/tests/test_utils.py b/nbviewer/tests/test_utils.py index 489ec8c4..a371cd3a 100644 --- a/nbviewer/tests/test_utils.py +++ b/nbviewer/tests/test_utils.py @@ -39,7 +39,7 @@ def test_transform_ipynb_uri(): u'/url/dl.dropbox.com/s/bar/baz.qux'), ( u'https://www.dropbox.com/s/zip/baz.qux', u'/urls/dl.dropbox.com/s/zip/baz.qux'), - ( u'https://www.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb', + ( u'https://www.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb?dl=1', u'/urls/dl.dropbox.com/sh/mhviow274da2wly/CZKwRRcA0k/nested/furthernested/User%2520Interface.ipynb'), # URL ('https://example.org/ipynb', diff --git a/nbviewer/utils.py b/nbviewer/utils.py index a15ed78b..6ca69685 100644 --- a/nbviewer/utils.py +++ b/nbviewer/utils.py @@ -75,15 +75,18 @@ def transform_ipynb_uri(value, rewrite_providers=None): rewrite_providers = rewrite_providers or default_rewrites uri_rewrite_dict.update(provider_uri_rewrites(rewrite_providers)) + for reg, rewrite in uri_rewrite_dict.items(): + matches = re.match(reg, value) + if matches: + value = rewrite.format(*matches.groups()) + break + # encode query parameters as last url part if '?' in value: value, query = value.split('?', 1) value = '%s/%s' % (value, quote('?' + query)) - - for reg, rewrite in uri_rewrite_dict.items(): - matches = re.match(reg, value) - if matches: - return rewrite.format(*matches.groups()) + + return value # get_encoding_from_headers from requests.utils (1.2.3) # (c) 2013 Kenneth Reitz