From 25e9980af8dfb378e2f4726071d7a36b89771cc5 Mon Sep 17 00:00:00 2001
From: "Robin H. Johnson" <robbat2@gentoo.org>
Date: Fri, 19 Jun 2015 06:09:09 +0000
Subject: [PATCH] website: refactor and add redirect testcases, so far 3
 failures out of 27 new tests.

Signed-off-by: Robin H. Johnson <robbat2@gentoo.org>
---
 requirements.txt                      |   1 +
 s3tests/common.py                     |  17 ++
 s3tests/functional/test_s3_website.py | 269 ++++++++++++++++++++++++--
 3 files changed, 268 insertions(+), 19 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 7f1348a..beced13 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ requests ==0.14.0
 pytz >=2011k
 ordereddict
 httplib2
+lxml
diff --git a/s3tests/common.py b/s3tests/common.py
index a9ab28e..7b5cb47 100644
--- a/s3tests/common.py
+++ b/s3tests/common.py
@@ -5,6 +5,7 @@ import os
 import random
 import string
 import yaml
+from lxml import etree
 
 s3 = bunch.Bunch()
 config = bunch.Bunch()
@@ -251,3 +252,19 @@ def with_setup_kwargs(setup, teardown=None):
 #def test_gen():
 #    yield _test_gen, '1'
 #    yield _test_gen
+
+
+def normalize_xml_whitespace(xml, pretty_print=True):
+    root = etree.fromstring(xml.encode(encoding='ascii'))
+
+    for element in root.iter('*'):
+        if element.text is not None and not element.text.strip():
+            element.text = None
+        if element.text is not None:
+            element.text = element.text.strip().replace("\n","").replace("\r","")
+        if element.tail is not None and not element.tail.strip():
+            element.tail = None
+        if element.tail is not None:
+            element.tail = element.tail.strip().replace("\n","").replace("\r","")
+
+    return etree.tostring(root, encoding="utf-8", xml_declaration=True, pretty_print=pretty_print)
diff --git a/s3tests/functional/test_s3_website.py b/s3tests/functional/test_s3_website.py
index ca6444d..85a4e2b 100644
--- a/s3tests/functional/test_s3_website.py
+++ b/s3tests/functional/test_s3_website.py
@@ -4,6 +4,7 @@ import collections
 import nose
 import string
 import random
+from pprint import pprint
 
 from urlparse import urlparse
 
@@ -20,12 +21,15 @@ from . import (
     )
 
 from ..common import with_setup_kwargs
+from ..xmlhelper import normalize_xml_whitespace
+
+IGNORE_FIELD = 'IGNORETHIS'
 
 WEBSITE_CONFIGS_XMLFRAG = {
-        'IndexDoc': '<IndexDocument><Suffix>${IndexDocument_Suffix}</Suffix></IndexDocument>',
-        'IndexDocErrorDoc': '<IndexDocument><Suffix>${IndexDocument_Suffix}</Suffix></IndexDocument><ErrorDocument><Key>${ErrorDocument_Key}</Key></ErrorDocument>',
-        'RedirectAll': '<RedirectAllRequestsTo><HostName>${RedirectAllRequestsTo_HostName}</HostName></RedirectAllRequestsTo>',
-        'RedirectAll+Protocol': '<RedirectAllRequestsTo><HostName>${RedirectAllRequestsTo_HostName}</HostName><Protocol>${RedirectAllRequestsTo_Protocol}</Protocol></RedirectAllRequestsTo>',
+        'IndexDoc': '<IndexDocument><Suffix>${IndexDocument_Suffix}</Suffix></IndexDocument>${RoutingRules}',
+        'IndexDocErrorDoc': '<IndexDocument><Suffix>${IndexDocument_Suffix}</Suffix></IndexDocument><ErrorDocument><Key>${ErrorDocument_Key}</Key></ErrorDocument>${RoutingRules}',
+        'RedirectAll': '<RedirectAllRequestsTo><HostName>${RedirectAllRequestsTo_HostName}</HostName></RedirectAllRequestsTo>${RoutingRules}',
+        'RedirectAll+Protocol': '<RedirectAllRequestsTo><HostName>${RedirectAllRequestsTo_HostName}</HostName><Protocol>${RedirectAllRequestsTo_Protocol}</Protocol></RedirectAllRequestsTo>${RoutingRules}',
         }
 
 def make_website_config(xml_fragment):
@@ -34,23 +38,40 @@ def make_website_config(xml_fragment):
     """
     return '<?xml version="1.0" encoding="UTF-8"?><WebsiteConfiguration xmlns="http://doc.s3.amazonaws.com/doc/2006-03-01/">' + xml_fragment + '</WebsiteConfiguration>'
 
-def get_website_url(proto, bucket, path):
+def get_website_url(**kwargs):
     """
     Return the URL to a website page
     """
+    proto, bucket, hostname, path = 'http', None, None, '/'
+
+    if 'proto' in kwargs:
+        proto = kwargs['proto']
+    if 'bucket' in kwargs:
+        bucket = kwargs['bucket']
+    if 'hostname' in kwargs:
+        hostname = kwargs['hostname']
+    if 'path' in kwargs:
+        path = kwargs['path']
+    
     domain = config['main']['host']
     if('s3website_domain' in config['main']):
         domain = config['main']['s3website_domain']
     elif('s3website_domain' in config['alt']):
         domain = config['DEFAULT']['s3website_domain']
+    if hostname is None:
+        hostname = '%s.%s' % (bucket, domain)
     path = path.lstrip('/')
-    return "%s://%s.%s/%s" % (proto, bucket, domain, path)
+    return "%s://%s/%s" % (proto, hostname, path)
 
 def _test_website_populate_fragment(xml_fragment, fields):
+    for k in ['RoutingRules']:
+      if k in fields.keys() and len(fields[k]) > 0:
+         fields[k] = '<%s>%s</%s>' % (k, fields[k], k)
     f = {
           'IndexDocument_Suffix': choose_bucket_prefix(template='index-{random}.html', max_len=32),
           'ErrorDocument_Key': choose_bucket_prefix(template='error-{random}.html', max_len=32),
           'RedirectAllRequestsTo_HostName': choose_bucket_prefix(template='{random}.{random}.com', max_len=32),
+          'RoutingRules': ''
         }
     f.update(fields)
     xml_fragment = string.Template(xml_fragment).safe_substitute(**f)
@@ -58,10 +79,15 @@ def _test_website_populate_fragment(xml_fragment, fields):
 
 def _test_website_prep(bucket, xml_template, hardcoded_fields = {}):
     xml_fragment, f = _test_website_populate_fragment(xml_template, hardcoded_fields)
-    config_xml = make_website_config(xml_fragment)
-    print(config_xml)
-    bucket.set_website_configuration_xml(config_xml)
-    eq (config_xml, bucket.get_website_configuration_xml())
+    config_xml1 = make_website_config(xml_fragment)
+    bucket.set_website_configuration_xml(config_xml1)
+    config_xml1 = normalize_xml_whitespace(config_xml1, pretty_print=True) # Do it late, so the system gets weird whitespace
+    #print("config_xml1\n", config_xml1)
+    config_xml2 = bucket.get_website_configuration_xml()
+    config_xml2 = normalize_xml_whitespace(config_xml2, pretty_print=True) # For us to read
+    #print("config_xml2\n", config_xml2)
+    eq (config_xml1, config_xml2)
+    f['WebsiteConfiguration'] = config_xml2
     return f
 
 def __website_expected_reponse_status(res, status, reason):
@@ -70,15 +96,19 @@ def __website_expected_reponse_status(res, status, reason):
     if not isinstance(reason, collections.Container):
         reason = set([reason])
 
-    ok(res.status in status, 'HTTP status code mismatch')
-    ok(res.reason in reason, 'HTTP reason mismatch')
+    if status is not IGNORE_FIELD:
+        ok(res.status in status, 'HTTP code was %s should be %s' % (res.status, status))
+    if reason is not IGNORE_FIELD:
+        ok(res.reason in reason, 'HTTP reason was was %s should be %s' % (res.reason, reason))
 
 def _website_expected_error_response(res, bucket_name, status, reason, code):
     body = res.read()
     print(body)
     __website_expected_reponse_status(res, status, reason)
-    ok('<li>Code: '+code+'</li>' in body, 'HTML should contain "Code: %s" ' % (code, ))
-    ok(('<li>BucketName: %s</li>' % (bucket_name, )) in body, 'HTML should contain bucket name')
+    if code is not IGNORE_FIELD:
+        ok('<li>Code: '+code+'</li>' in body, 'HTML should contain "Code: %s" ' % (code, ))
+    if bucket_name is not IGNORE_FIELD:
+        ok(('<li>BucketName: %s</li>' % (bucket_name, )) in body, 'HTML should contain bucket name')
 
 def _website_expected_redirect_response(res, status, reason, new_url):
     body = res.read()
@@ -89,7 +119,7 @@ def _website_expected_redirect_response(res, status, reason, new_url):
     ok(len(body) == 0, 'Body of a redirect should be empty')
 
 def _website_request(bucket_name, path, method='GET'):
-    url = get_website_url('http', bucket_name, path)
+    url = get_website_url(proto='http', bucket=bucket_name, path=path)
     print("url", url)
 
     o = urlparse(url)
@@ -179,8 +209,8 @@ def test_website_private_bucket_list_public_index():
 @attr('s3website')
 def test_website_private_bucket_list_empty():
     bucket = get_new_bucket()
-    bucket.set_canned_acl('private')
     f = _test_website_prep(bucket, WEBSITE_CONFIGS_XMLFRAG['IndexDoc'])
+    bucket.set_canned_acl('private')
 
     res = _website_request(bucket.name, '')
     _website_expected_error_response(res, bucket.name, 403, 'Forbidden', 'AccessDenied')
@@ -517,8 +547,7 @@ def test_website_private_bucket_list_private_index_gooderrordoc():
     errorhtml.delete()
     bucket.delete()
 
-# ------ redirect tests
-
+# ------ RedirectAll tests
 @attr(resource='bucket')
 @attr(method='get')
 @attr(operation='list')
@@ -570,10 +599,212 @@ def test_website_bucket_private_redirectall_path_upgrade():
 
     pathfragment = choose_bucket_prefix(template='/{random}', max_len=16)
 
-    res = _website_request(bucket.name, +pathfragment)
+    res = _website_request(bucket.name, pathfragment)
     # RGW returns "302 Found" per RFC2616
     # S3 returns 302 Moved Temporarily per RFC1945
     new_url = 'https://%s%s' % (f['RedirectAllRequestsTo_HostName'], pathfragment)
     _website_expected_redirect_response(res, 302, ['Found', 'Moved Temporarily'], new_url)
 
     bucket.delete()
+
+# RoutingRules
+ROUTING_RULES = {
+    'empty': '',
+    'AmazonExample1': \
+"""
+    <RoutingRule>
+    <Condition>
+      <KeyPrefixEquals>docs/</KeyPrefixEquals>
+    </Condition>
+    <Redirect>
+      <ReplaceKeyPrefixWith>documents/</ReplaceKeyPrefixWith>
+    </Redirect>
+    </RoutingRule>
+""",
+    'AmazonExample1+Protocol=https': \
+"""
+    <RoutingRule>
+    <Condition>
+      <KeyPrefixEquals>docs/</KeyPrefixEquals>
+    </Condition>
+    <Redirect>
+      <Protocol>https</Protocol>
+      <ReplaceKeyPrefixWith>documents/</ReplaceKeyPrefixWith>
+    </Redirect>
+    </RoutingRule>
+""",
+    'AmazonExample1+Protocol=https+Hostname=xyzzy': \
+"""
+    <RoutingRule>
+    <Condition>
+      <KeyPrefixEquals>docs/</KeyPrefixEquals>
+    </Condition>
+    <Redirect>
+      <Protocol>https</Protocol>
+      <HostName>xyzzy</HostName>
+      <ReplaceKeyPrefixWith>documents/</ReplaceKeyPrefixWith>
+    </Redirect>
+    </RoutingRule>
+""",
+    'AmazonExample1+Protocol=http2': \
+"""
+    <RoutingRule>
+    <Condition>
+      <KeyPrefixEquals>docs/</KeyPrefixEquals>
+    </Condition>
+    <Redirect>
+      <Protocol>http2</Protocol>
+      <ReplaceKeyPrefixWith>documents/</ReplaceKeyPrefixWith>
+    </Redirect>
+    </RoutingRule>
+""",
+   'AmazonExample2': \
+"""
+    <RoutingRule>
+    <Condition>
+       <KeyPrefixEquals>images/</KeyPrefixEquals>
+    </Condition>
+    <Redirect>
+      <ReplaceKeyWith>folderdeleted.html</ReplaceKeyWith>
+    </Redirect>
+    </RoutingRule>
+""",
+   'AmazonExample2+HttpRedirectCode=314': \
+"""
+    <RoutingRule>
+    <Condition>
+       <KeyPrefixEquals>images/</KeyPrefixEquals>
+    </Condition>
+    <Redirect>
+      <HttpRedirectCode>314</HttpRedirectCode>
+      <ReplaceKeyWith>folderdeleted.html</ReplaceKeyWith>
+    </Redirect>
+    </RoutingRule>
+""",
+   'AmazonExample3': \
+"""
+    <RoutingRule>
+    <Condition>
+      <HttpErrorCodeReturnedEquals>404</HttpErrorCodeReturnedEquals >
+    </Condition>
+    <Redirect>
+      <HostName>ec2-11-22-333-44.compute-1.amazonaws.com</HostName>
+      <ReplaceKeyPrefixWith>report-404/</ReplaceKeyPrefixWith>
+    </Redirect>
+    </RoutingRule>
+""",
+   'AmazonExample3+KeyPrefixEquals': \
+"""
+    <RoutingRule>
+    <Condition>
+      <KeyPrefixEquals>images/</KeyPrefixEquals>
+      <HttpErrorCodeReturnedEquals>404</HttpErrorCodeReturnedEquals>
+    </Condition>
+    <Redirect>
+      <HostName>ec2-11-22-333-44.compute-1.amazonaws.com</HostName>
+      <ReplaceKeyPrefixWith>report-404/</ReplaceKeyPrefixWith>
+    </Redirect>
+    </RoutingRule>
+""",
+}
+
+ROUTING_RULES_TESTS = [
+  dict(xml=dict(RoutingRules=ROUTING_RULES['empty']), url='', location=None, code=200),
+  dict(xml=dict(RoutingRules=ROUTING_RULES['empty']), url='/', location=None, code=200), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['empty']), url='/x', location=None, code=404), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1']), url='/', location=None, code=200), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1']), url='/x', location=None, code=404), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1']), url='/docs/', location=dict(proto='http',bucket='{bucket_name}',path='/documents/'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1']), url='/docs/x', location=dict(proto='http',bucket='{bucket_name}',path='/documents/x'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https']), url='/', location=None, code=200), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https']), url='/x', location=None, code=404), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https']), url='/docs/', location=dict(proto='https',bucket='{bucket_name}',path='/documents/'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https']), url='/docs/x', location=dict(proto='https',bucket='{bucket_name}',path='/documents/x'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=http2']), url='/', location=None, code=200), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=http2']), url='/x', location=None, code=404), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=http2']), url='/docs/', location=dict(proto='http2',bucket='{bucket_name}',path='/documents/'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=http2']), url='/docs/x', location=dict(proto='http2',bucket='{bucket_name}',path='/documents/x'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https+Hostname=xyzzy']), url='/', location=None, code=200), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https+Hostname=xyzzy']), url='/x', location=None, code=404), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https+Hostname=xyzzy']), url='/docs/', location=dict(proto='https',hostname='xyzzy',path='/documents/'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample1+Protocol=https+Hostname=xyzzy']), url='/docs/x', location=dict(proto='https',hostname='xyzzy',path='/documents/x'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample2']), url='/images/', location=dict(proto='http',bucket='{bucket_name}',path='/folderdeleted.html'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample2']), url='/images/x', location=dict(proto='http',bucket='{bucket_name}',path='/folderdeleted.html'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample2+HttpRedirectCode=314']), url='/images/', location=dict(proto='http',bucket='{bucket_name}',path='/folderdeleted.html'), code=314), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample2+HttpRedirectCode=314']), url='/images/x', location=dict(proto='http',bucket='{bucket_name}',path='/folderdeleted.html'), code=314), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample3']), url='/x', location=dict(proto='http',bucket='ec2-11-22-333-44.compute-1.amazonaws.com',path='/report-404/x'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample3']), url='/images/x', location=dict(proto='http',bucket='ec2-11-22-333-44.compute-1.amazonaws.com',path='/report-404/images/x'), code=301), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample3+KeyPrefixEquals']), url='/x', location=None, code=404), 
+  dict(xml=dict(RoutingRules=ROUTING_RULES['AmazonExample3+KeyPrefixEquals']), url='/images/x', location=dict(proto='http',bucket='ec2-11-22-333-44.compute-1.amazonaws.com',path='/report-404/x'), code=301), 
+]
+
+def routing_setup():
+  kwargs = {'obj':[]}
+  bucket = get_new_bucket()
+  kwargs['bucket'] = bucket
+  kwargs['obj'].append(bucket)
+  f = _test_website_prep(bucket, WEBSITE_CONFIGS_XMLFRAG['IndexDocErrorDoc'])
+  kwargs.update(f)
+  bucket.set_canned_acl('public-read')
+
+  k = bucket.new_key(f['IndexDocument_Suffix'])
+  kwargs['obj'].append(k)
+  s = choose_bucket_prefix(template='<html><h1>Index</h1><body>{random}</body></html>', max_len=64)
+  k.set_contents_from_string(s)
+  k.set_canned_acl('public-read')
+
+  k = bucket.new_key(f['ErrorDocument_Key'])
+  kwargs['obj'].append(k)
+  s = choose_bucket_prefix(template='<html><h1>Error</h1><body>{random}</body></html>', max_len=64)
+  k.set_contents_from_string(s)
+  k.set_canned_acl('public-read')
+
+  return kwargs
+
+def routing_teardown(**kwargs):
+  for o in reversed(kwargs['obj']):
+    print('Deleting', str(o))
+    o.delete()
+  
+           
+@with_setup_kwargs(setup=routing_setup, teardown=routing_teardown) 
+def routing_check(*args, **kwargs):
+    bucket = kwargs['bucket']
+    args=args[0]
+    #print(args)
+    pprint(args)
+    xml_fields = kwargs.copy()
+    xml_fields.update(args['xml'])
+    pprint(xml_fields)
+    f = _test_website_prep(bucket, WEBSITE_CONFIGS_XMLFRAG['IndexDocErrorDoc'], hardcoded_fields=xml_fields)
+    #print(f)
+    config_xml2 = bucket.get_website_configuration_xml()
+    config_xml2 = normalize_xml_whitespace(config_xml2, pretty_print=True) # For us to read
+    res = _website_request(bucket.name, args['url'])
+    print(config_xml2)
+    # RGW returns "302 Found" per RFC2616
+    # S3 returns 302 Moved Temporarily per RFC1945
+    new_url = args['location']
+    if new_url is not None:
+        new_url = get_website_url(**new_url)
+        new_url = new_url.format(bucket_name=bucket.name)
+    if args['code'] >= 200 and args['code'] < 300:
+        #body = res.read()
+        #print(body)
+        #eq(body, args['content'], 'default content should match index.html set content')
+        ok(res.getheader('Content-Length', -1) > 0)
+    elif args['code'] >= 300 and args['code'] < 400:
+        _website_expected_redirect_response(res, args['code'], IGNORE_FIELD, new_url)
+    elif args['code'] >= 400:
+        _website_expected_error_response(res, bucket.name, args['code'], IGNORE_FIELD, IGNORE_FIELD)
+    else:
+        assert(False)
+
+@attr('xml')
+def testGEN_routing():
+
+    for t in ROUTING_RULES_TESTS:
+        yield routing_check, t
+
+    
+