From 2c710811fac9879be11be5735d58cb5fdc057ae9 Mon Sep 17 00:00:00 2001 From: Albin Antony Date: Mon, 3 Oct 2022 18:18:02 +0530 Subject: [PATCH 01/17] s3select: json output serialization Signed-off-by: Albin Antony --- s3tests_boto3/functional/test_s3select.py | 274 +++++++++++++++++++--- 1 file changed, 236 insertions(+), 38 deletions(-) diff --git a/s3tests_boto3/functional/test_s3select.py b/s3tests_boto3/functional/test_s3select.py index feb6720..c3cb72f 100644 --- a/s3tests_boto3/functional/test_s3select.py +++ b/s3tests_boto3/functional/test_s3select.py @@ -2,6 +2,7 @@ import pytest import random import string import re +import json from botocore.exceptions import ClientError import uuid @@ -88,7 +89,7 @@ def test_generate_where_clause(): single_line_csv = create_random_csv_object(1,1) bucket_name = "test" obj_name = get_random_string() #"single_line_csv.csv" - upload_csv_object(bucket_name,obj_name,single_line_csv) + upload_object(bucket_name,obj_name,single_line_csv) for _ in range(100): generate_s3select_where_clause(bucket_name,obj_name) @@ -100,7 +101,7 @@ def test_generate_projection(): single_line_csv = create_random_csv_object(1,1) bucket_name = "test" obj_name = get_random_string() #"single_line_csv.csv" - upload_csv_object(bucket_name,obj_name,single_line_csv) + upload_object(bucket_name,obj_name,single_line_csv) for _ in range(100): generate_s3select_expression_projection(bucket_name,obj_name) @@ -209,7 +210,57 @@ def create_random_csv_object_null(rows,columns,col_delim=",",record_delim="\n",c return result -def upload_csv_object(bucket_name,new_key,obj): +def create_random_json_object(rows,columns,col_delim=",",record_delim="\n",csv_schema=""): + result = "{\"root\" : [" + result += record_delim + if len(csv_schema)>0 : + result = csv_schema + record_delim + + for _ in range(rows): + row = "" + num = 0 + row += "{" + for _ in range(columns): + num += 1 + row = row + "\"c" + str(num) + "\"" + ": " "{}{}".format(random.randint(0,1000),col_delim) + row = row[:-1] + row += "}" + row += "," + result += row + record_delim + + result = result[:-2] + result += record_delim + result += "]" + "}" + + return result + +def csv_to_json(obj, field_split=",",row_split="\n",csv_schema=""): + result = "{\"root\" : [" + result += row_split + if len(csv_schema)>0 : + result = csv_schema + row_split + + for rec in obj.split(row_split): + row = "" + num = 0 + row += "{" + for col in rec.split(field_split): + if col == "": + break + num += 1 + row = row + "\"c" + str(num) + "\"" + ": " "{}{}".format(col,field_split) + row = row[:-1] + row += "}" + row += "," + result += row + row_split + + result = result[:-5] + result += row_split + result += "]" + "}" + + return result + +def upload_object(bucket_name,new_key,obj): client = get_client() client.create_bucket(Bucket=bucket_name) @@ -259,6 +310,7 @@ def run_s3select(bucket,key,query,column_delim=",",row_delim="\n",quot_char='"', end = event['End'] result.append(end.copy()) return result + def run_s3select_output(bucket,key,query, quot_field, op_column_delim = ",", op_row_delim = "\n", column_delim=",", op_quot_char = '"', op_esc_char = '\\', row_delim="\n",quot_char='"',esc_char='\\',csv_header_info="NONE"): s3 = get_client() @@ -279,6 +331,27 @@ def run_s3select_output(bucket,key,query, quot_field, op_column_delim = ",", op_ return result +def run_s3select_json(bucket,key,query, op_row_delim = "\n"): + + s3 = get_client() + + r = s3.select_object_content( + Bucket=bucket, + Key=key, + ExpressionType='SQL', + InputSerialization = {"JSON": {"Type": "DOCUMENT"}}, + OutputSerialization = {"JSON": {}}, + Expression=query,) + #Record delimiter optional in output serialization + + result = "" + for event in r['Payload']: + if 'Records' in event: + records = event['Records']['Payload'].decode('utf-8') + result += records + + return result + def remove_xml_tags_from_result(obj): result = "" for rec in obj.split("\n"): @@ -314,22 +387,147 @@ def test_count_operation(): bucket_name = "test" num_of_rows = 1234 obj_to_load = create_random_csv_object(num_of_rows,10) - upload_csv_object(bucket_name,csv_obj_name,obj_to_load) + upload_object(bucket_name,csv_obj_name,obj_to_load) res = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object;") ).replace(",","") s3select_assert_result( num_of_rows, int( res )) +@pytest.mark.s3select +def test_count_json_operation(): + json_obj_name = get_random_string() + bucket_name = "test" + num_of_rows = 1 + obj_to_load = create_random_json_object(num_of_rows,10) + upload_object(bucket_name,json_obj_name,obj_to_load) + res = remove_xml_tags_from_result(run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*];")) + s3select_assert_result( 1, int(res)) + + res = remove_xml_tags_from_result(run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root;")) + s3select_assert_result( 1, int(res)) + + obj_to_load = create_random_json_object(3,10) + upload_object(bucket_name,json_obj_name,obj_to_load) + res = remove_xml_tags_from_result(run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root;")) + s3select_assert_result( 3, int(res)) + +@pytest.mark.s3select +def test_json_column_sum_min_max(): + csv_obj = create_random_csv_object(10000,10) + + json_obj = csv_to_json(csv_obj); + + json_obj_name = get_random_string() + bucket_name = "test" + upload_object(bucket_name,json_obj_name,json_obj) + + json_obj_name_2 = get_random_string() + bucket_name_2 = "testbuck2" + upload_object(bucket_name_2,json_obj_name_2,json_obj) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select min(_1.c1) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 1 , csv_obj ) + res_target = min( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select min(_1.c4) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 4 , csv_obj ) + res_target = min( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select avg(_1.c6) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 6 , csv_obj ) + res_target = float(sum(list_int ))/10000 + + s3select_assert_result( float(res_s3select), float(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select max(_1.c4) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 4 , csv_obj ) + res_target = max( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select max(_1.c7) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 7 , csv_obj ) + res_target = max( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select sum(_1.c4) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 4 , csv_obj ) + res_target = sum( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select sum(_1.c7) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 7 , csv_obj ) + res_target = sum( list_int ) + + s3select_assert_result( int(res_s3select) , int(res_target) ) + + # the following queries, validates on *random* input an *accurate* relation between condition result,sum operation and count operation. + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name_2,json_obj_name_2,"select count(0),sum(_1.c1),sum(_1.c2) from s3object[*].root where (_1.c1-_1.c2) = 2;" ) ) + count,sum1,sum2 = res_s3select.split(",") + + s3select_assert_result( int(count)*2 , int(sum1)-int(sum2 ) ) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0),sum(_1.c1),sum(_1.c2) from s3object[*].root where (_1.c1-_1.c2) = 4;" ) ) + count,sum1,sum2 = res_s3select.split(",") + + s3select_assert_result( int(count)*4 , int(sum1)-int(sum2) ) + +@pytest.mark.s3select +def test_json_nullif_expressions(): + + json_obj = create_random_json_object(10000,10) + + json_obj_name = get_random_string() + bucket_name = "test" + upload_object(bucket_name,json_obj_name,json_obj) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where nullif(_1.c1,_1.c2) is null ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where _1.c1 = _1.c2 ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (nullif(_1.c1,_1.c2) is null) from s3object[*].root ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (_1.c1 = _1.c2) from s3object[*].root ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where not nullif(_1.c1,_1.c2) is null ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where _1.c1 != _1.c2 ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (nullif(_1.c1,_1.c2) is not null) from s3object[*].root ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (_1.c1 != _1.c2) from s3object[*].root ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where nullif(_1.c1,_1.c2) = _1.c1 ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where _1.c1 != _1.c2 ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + @pytest.mark.s3select def test_column_sum_min_max(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) csv_obj_name_2 = get_random_string() bucket_name_2 = "testbuck2" - upload_csv_object(bucket_name_2,csv_obj_name_2,csv_obj) + upload_object(bucket_name_2,csv_obj_name_2,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_1)) from s3object;") ).replace(",","") list_int = create_list_of_int( 1 , csv_obj ) @@ -391,7 +589,7 @@ def test_nullif_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where nullif(_1,_2) is null ;") ).replace("\n","") @@ -425,7 +623,7 @@ def test_nullif_expressions(): csv_obj = create_random_csv_object_null(10000,10) - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(*) from s3object where nullif(_1,null) is null;") ).replace("\n","") @@ -446,7 +644,7 @@ def test_nulliftrue_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where (nullif(_1,_2) is null) = true ;") ).replace("\n","") @@ -473,7 +671,7 @@ def test_is_not_null_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_null = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(*) from s3object where nullif(_1,_2) is not null ;") ).replace("\n","") @@ -494,7 +692,7 @@ def test_lowerupper_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from s3object ;') ).replace("\n","") @@ -512,7 +710,7 @@ def test_in_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) in(1);')).replace("\n","") @@ -581,7 +779,7 @@ def test_true_false_in_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(1)) = true;')).replace("\n","") @@ -626,7 +824,7 @@ def test_like_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%aeio%";')).replace("\n","") @@ -713,7 +911,7 @@ def test_truefalselike_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where (_1 like "%aeio%") = true;')).replace("\n","") @@ -758,7 +956,7 @@ def test_nullif_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where nullif(_1,_2) is null ;") ).replace("\n","") @@ -785,7 +983,7 @@ def test_lowerupper_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from stdin ;') ).replace("\n","") @@ -803,7 +1001,7 @@ def test_in_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) in(1);')).replace("\n","") @@ -842,7 +1040,7 @@ def test_like_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _1 like "%aeio%";')).replace("\n","") @@ -889,7 +1087,7 @@ def test_complex_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_1)),max(int(_2)),min(int(_3))+1 from s3object;")).replace("\n","") @@ -925,7 +1123,7 @@ def test_alias(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from s3object where a3>100 and a3<300;") ).replace(",","") @@ -944,7 +1142,7 @@ def test_alias_cyclic_refernce(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1,int(_2) as a2, a1+a4 as a3, a5+a1 as a4, int(_3)+a3 as a5 from s3object;") ) @@ -963,7 +1161,7 @@ def test_datetime(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_date_time = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(0) from s3object where extract(year from to_timestamp(_1)) > 1950 and extract(year from to_timestamp(_1)) < 1960;') ) @@ -994,7 +1192,7 @@ def test_true_false_datetime(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_date_time = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(0) from s3object where (extract(year from to_timestamp(_1)) > 1950) = true and (extract(year from to_timestamp(_1)) < 1960) = true;') ) @@ -1027,7 +1225,7 @@ def test_csv_parser(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) # return value contain comma{,} res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _6 from s3object;") ).replace("\n","") @@ -1068,7 +1266,7 @@ def test_csv_definition(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) # purpose of tests is to parse correctly input with different csv defintions res = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object;","|","\t") ).replace(",","") @@ -1098,7 +1296,7 @@ def test_schema_definition(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) # ignoring the schema on first line and retrieve using generic column number res_ignore = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _1,_3 from s3object;",csv_header_info="IGNORE") ).replace("\n","") @@ -1129,7 +1327,7 @@ def test_when_then_else_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case when cast(_1 as int)>100 and cast(_1 as int)<200 then "(100-200)" when cast(_1 as int)>200 and cast(_1 as int)<300 then "(200-300)" else "NONE" end from s3object;') ).replace("\n","") @@ -1158,7 +1356,7 @@ def test_coalesce_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where char_length(_3)>2 and char_length(_4)>2 and cast(substring(_3,1,2) as int) = cast(substring(_4,1,2) as int);') ).replace("\n","") @@ -1180,7 +1378,7 @@ def test_cast_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(_3 as int)>999;') ).replace("\n","") @@ -1206,7 +1404,7 @@ def test_version(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_version = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select version() from s3object;") ).replace("\n","") @@ -1219,7 +1417,7 @@ def test_trim_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou";')).replace("\n","") @@ -1258,7 +1456,7 @@ def test_truefalse_trim_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou" = true;')).replace("\n","") @@ -1297,7 +1495,7 @@ def test_escape_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_escape = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%_ar" escape "%";')).replace("\n","") @@ -1318,7 +1516,7 @@ def test_case_value_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_case = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case cast(_1 as int) when cast(_2 as int) then "case_1_1" else "case_2_2" end from s3object;')).replace("\n","") @@ -1333,7 +1531,7 @@ def test_bool_cast_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_cast = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(int(_1) as bool) = true ;')).replace("\n","") @@ -1348,7 +1546,7 @@ def test_progress_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) obj_size = len(csv_obj.encode('utf-8')) @@ -1376,7 +1574,7 @@ def test_output_serial_expressions(): csv_obj_name = get_random_string() bucket_name = "test" - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_1 = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,"select _1, _2 from s3object where nullif(_1,_2) is null ;", "ALWAYS") ).replace("\n",",").replace(",","") From 3437cda73df02dd9f4f417b9e3b18396b77a8045 Mon Sep 17 00:00:00 2001 From: Yuval Lifshitz Date: Wed, 25 Jan 2023 11:12:20 +0200 Subject: [PATCH 02/17] better error handling in the STS tests also, give more accurate instruction on how to run the tests Signed-off-by: Yuval Lifshitz --- README.rst | 9 +++++ s3tests_boto3/functional/test_sts.py | 58 ++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index f2b9818..cf9e702 100644 --- a/README.rst +++ b/README.rst @@ -52,6 +52,15 @@ You can run only the boto3 tests with:: This section contains some basic tests for the AssumeRole, GetSessionToken and AssumeRoleWithWebIdentity API's. The test file is located under ``s3tests_boto3/functional``. +To run the STS tests, the vstart cluster should be started with the following parameter (in addition to any parameters already used with it):: + + vstart.sh -o rgw_sts_key=abcdefghijklmnop -o rgw_s3_auth_use_sts=true + +Note that the ``rgw_sts_key`` can be set to anything that is 128 bits in length. +After the cluster is up the following command should be executed:: + + radosgw-admin caps add --tenant=testx --uid="9876543210abcdef0123456789abcdef0123456789abcdef0123456789abcdef" --caps="roles=*" + You can run only the sts tests (all the three API's) with:: S3TEST_CONF=your.conf tox s3tests_boto3/functional/test_sts.py diff --git a/s3tests_boto3/functional/test_sts.py b/s3tests_boto3/functional/test_sts.py index 0229dbd..8969167 100644 --- a/s3tests_boto3/functional/test_sts.py +++ b/s3tests_boto3/functional/test_sts.py @@ -56,6 +56,7 @@ log = logging.getLogger(__name__) def create_role(iam_client,path,rolename,policy_document,description,sessionduration,permissionboundary,tag_list=None): role_err=None + role_response = None if rolename is None: rolename=get_parameter_name() if tag_list is None: @@ -68,6 +69,7 @@ def create_role(iam_client,path,rolename,policy_document,description,sessiondura def put_role_policy(iam_client,rolename,policyname,role_policy): role_err=None + role_response = None if policyname is None: policyname=get_parameter_name() try: @@ -78,6 +80,7 @@ def put_role_policy(iam_client,rolename,policyname,role_policy): def put_user_policy(iam_client,username,policyname,policy_document): role_err=None + role_response = None if policyname is None: policyname=get_parameter_name() try: @@ -222,11 +225,17 @@ def test_assume_role_allow(): policy_document = "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Principal\":{\"AWS\":[\"arn:aws:iam:::user/"+sts_user_id+"\"]},\"Action\":[\"sts:AssumeRole\"]}]}" (role_error,role_response,general_role_name)=create_role(iam_client,'/',None,policy_document,None,None,None) - assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name+'' + if role_response: + assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name+'' + else: + assert False, role_error role_policy = "{\"Version\":\"2012-10-17\",\"Statement\":{\"Effect\":\"Allow\",\"Action\":\"s3:*\",\"Resource\":\"arn:aws:s3:::*\"}}" (role_err,response)=put_role_policy(iam_client,general_role_name,None,role_policy) - assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + if response: + assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + else: + assert False, role_err resp=sts_client.assume_role(RoleArn=role_response['Role']['Arn'],RoleSessionName=role_session_name) assert resp['ResponseMetadata']['HTTPStatusCode'] == 200 @@ -256,11 +265,17 @@ def test_assume_role_deny(): policy_document = "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Principal\":{\"AWS\":[\"arn:aws:iam:::user/"+sts_user_id+"\"]},\"Action\":[\"sts:AssumeRole\"]}]}" (role_error,role_response,general_role_name)=create_role(iam_client,'/',None,policy_document,None,None,None) - assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name+'' + if role_response: + assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name+'' + else: + assert False, role_error role_policy = "{\"Version\":\"2012-10-17\",\"Statement\":{\"Effect\":\"Deny\",\"Action\":\"s3:*\",\"Resource\":\"arn:aws:s3:::*\"}}" (role_err,response)=put_role_policy(iam_client,general_role_name,None,role_policy) - assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + if response: + assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + else: + assert False, role_err resp=sts_client.assume_role(RoleArn=role_response['Role']['Arn'],RoleSessionName=role_session_name) assert resp['ResponseMetadata']['HTTPStatusCode'] == 200 @@ -290,11 +305,17 @@ def test_assume_role_creds_expiry(): policy_document = "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Effect\":\"Allow\",\"Principal\":{\"AWS\":[\"arn:aws:iam:::user/"+sts_user_id+"\"]},\"Action\":[\"sts:AssumeRole\"]}]}" (role_error,role_response,general_role_name)=create_role(iam_client,'/',None,policy_document,None,None,None) - assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name+'' + if role_response: + assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name+'' + else: + assert False, role_error role_policy = "{\"Version\":\"2012-10-17\",\"Statement\":{\"Effect\":\"Allow\",\"Action\":\"s3:*\",\"Resource\":\"arn:aws:s3:::*\"}}" (role_err,response)=put_role_policy(iam_client,general_role_name,None,role_policy) - assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + if response: + assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + else: + assert False, role_err resp=sts_client.assume_role(RoleArn=role_response['Role']['Arn'],RoleSessionName=role_session_name,DurationSeconds=900) assert resp['ResponseMetadata']['HTTPStatusCode'] == 200 @@ -329,12 +350,18 @@ def test_assume_role_deny_head_nonexistent(): policy_document = '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS":["arn:aws:iam:::user/'+sts_user_id+'"]},"Action":["sts:AssumeRole"]}]}' (role_error,role_response,general_role_name)=create_role(iam_client,'/',None,policy_document,None,None,None) - assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name + if role_response: + assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name + else: + assert False, role_error # allow GetObject but deny ListBucket role_policy = '{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":"s3:GetObject","Principal":"*","Resource":"arn:aws:s3:::*"}}' (role_err,response)=put_role_policy(iam_client,general_role_name,None,role_policy) - assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + if response: + assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + else: + assert False, role_err resp=sts_client.assume_role(RoleArn=role_response['Role']['Arn'],RoleSessionName=role_session_name) assert resp['ResponseMetadata']['HTTPStatusCode'] == 200 @@ -367,12 +394,18 @@ def test_assume_role_allow_head_nonexistent(): policy_document = '{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS":["arn:aws:iam:::user/'+sts_user_id+'"]},"Action":["sts:AssumeRole"]}]}' (role_error,role_response,general_role_name)=create_role(iam_client,'/',None,policy_document,None,None,None) - assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name + if role_response: + assert role_response['Role']['Arn'] == 'arn:aws:iam:::role/'+general_role_name + else: + assert False, role_error # allow GetObject and ListBucket role_policy = '{"Version":"2012-10-17","Statement":{"Effect":"Allow","Action":["s3:GetObject","s3:ListBucket"],"Principal":"*","Resource":"arn:aws:s3:::*"}}' (role_err,response)=put_role_policy(iam_client,general_role_name,None,role_policy) - assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + if response: + assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + else: + assert False, role_err resp=sts_client.assume_role(RoleArn=role_response['Role']['Arn'],RoleSessionName=role_session_name) assert resp['ResponseMetadata']['HTTPStatusCode'] == 200 @@ -418,7 +451,10 @@ def test_assume_role_with_web_identity(): role_policy = "{\"Version\":\"2012-10-17\",\"Statement\":{\"Effect\":\"Allow\",\"Action\":\"s3:*\",\"Resource\":\"arn:aws:s3:::*\"}}" (role_err,response)=put_role_policy(iam_client,general_role_name,None,role_policy) - assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + if response: + assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + else: + assert False, role_err resp=sts_client.assume_role_with_web_identity(RoleArn=role_response['Role']['Arn'],RoleSessionName=role_session_name,WebIdentityToken=token) assert resp['ResponseMetadata']['HTTPStatusCode'] == 200 From a536dd0e88ac908cde07bf312c8db6fe4a0c9b69 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Thu, 16 Feb 2023 21:15:19 -0500 Subject: [PATCH 03/17] boto3: list_versions() omits empty KeyMarker/VersionIdMarker Signed-off-by: Casey Bodley --- s3tests_boto3/functional/__init__.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/s3tests_boto3/functional/__init__.py b/s3tests_boto3/functional/__init__.py index 22b136c..a65b54c 100644 --- a/s3tests_boto3/functional/__init__.py +++ b/s3tests_boto3/functional/__init__.py @@ -82,18 +82,13 @@ def get_objects_list(bucket, client=None, prefix=None): # generator function that returns object listings in batches, where each # batch is a list of dicts compatible with delete_objects() def list_versions(client, bucket, batch_size): - key_marker = '' - version_marker = '' + kwargs = {'Bucket': bucket, 'MaxKeys': batch_size} truncated = True while truncated: - listing = client.list_object_versions( - Bucket=bucket, - KeyMarker=key_marker, - VersionIdMarker=version_marker, - MaxKeys=batch_size) + listing = client.list_object_versions(**kwargs) - key_marker = listing.get('NextKeyMarker') - version_marker = listing.get('NextVersionIdMarker') + kwargs['KeyMarker'] = listing.get('NextKeyMarker') + kwargs['VersionIdMarker'] = listing.get('NextVersionIdMarker') truncated = listing['IsTruncated'] objs = listing.get('Versions', []) + listing.get('DeleteMarkers', []) From 5914eb2005bcdfc27dfa6221744c58e967a44e76 Mon Sep 17 00:00:00 2001 From: "Robin H. Johnson" Date: Tue, 31 Jan 2023 16:39:39 -0800 Subject: [PATCH 04/17] test_post_object_upload_size_rgw_chunk_size_bug: new testcase `ERR_TOO_SMALL` is wrongly returned if all of the following are true, - the get_data returns multiple items (chunks) - the length of the last item is smaller than the POST Policy's min value for content-length-range. The check should be `(ofs < min_len)` instead of `(len < min_len)` This is further confirmed by the next line of `s->obj_size = ofs` Move the `int len` scope inside loop to try and prevent the bug in future. The bug was refactored in 2016, but was introduced in Oct 2012, when this functionality was first added to RGW in commit 7bb3504d3f0974e9863f536e9af0ce8889d6888f. Reference: https://github.com/ceph/ceph/blob/933a42f9af349b3b222270e7f19f1fe151d89e8e/src/rgw/rgw_op.cc#L4474-L4513 Reference: https://github.com/ceph/ceph/commit/7bb3504d3f0974e9863f536e9af0ce8889d6888f Signed-off-by: Robin H. Johnson --- s3tests_boto3/functional/test_s3.py | 47 +++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index e309c48..db080e9 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -2818,6 +2818,53 @@ def test_post_object_upload_size_below_minimum(): r = requests.post(url, files=payload, verify=get_config_ssl_verify()) assert r.status_code == 400 +def test_post_object_upload_size_rgw_chunk_size_bug(): + # Test for https://tracker.ceph.com/issues/58627 + # TODO: if this value is different in Teuthology runs, this would need tuning + # https://github.com/ceph/ceph/blob/main/qa/suites/rgw/verify/striping%24/stripe-greater-than-chunk.yaml + _rgw_max_chunk_size = 4 * 2**20 # 4MiB + min_size = _rgw_max_chunk_size + max_size = _rgw_max_chunk_size * 3 + # [(chunk),(small)] + test_payload_size = _rgw_max_chunk_size + 200 # extra bit to push it over the chunk boundary + # it should be valid when we run this test! + assert test_payload_size > min_size + assert test_payload_size < max_size + + bucket_name = get_new_bucket() + client = get_client() + + url = _get_post_url(bucket_name) + utc = pytz.utc + expires = datetime.datetime.now(utc) + datetime.timedelta(seconds=+6000) + + policy_document = {"expiration": expires.strftime("%Y-%m-%dT%H:%M:%SZ"),\ + "conditions": [\ + {"bucket": bucket_name},\ + ["starts-with", "$key", "foo"],\ + {"acl": "private"},\ + ["starts-with", "$Content-Type", "text/plain"],\ + ["content-length-range", min_size, max_size],\ + ]\ + } + + test_payload = 'x' * test_payload_size + + json_policy_document = json.JSONEncoder().encode(policy_document) + bytes_json_policy_document = bytes(json_policy_document, 'utf-8') + policy = base64.b64encode(bytes_json_policy_document) + aws_secret_access_key = get_main_aws_secret_key() + aws_access_key_id = get_main_aws_access_key() + + signature = base64.b64encode(hmac.new(bytes(aws_secret_access_key, 'utf-8'), policy, hashlib.sha1).digest()) + + payload = OrderedDict([ ("key" , "foo.txt"),("AWSAccessKeyId" , aws_access_key_id),\ + ("acl" , "private"),("signature" , signature),("policy" , policy),\ + ("Content-Type" , "text/plain"),('file', (test_payload))]) + + r = requests.post(url, files=payload, verify=get_config_ssl_verify()) + assert r.status_code == 204 + def test_post_object_empty_conditions(): bucket_name = get_new_bucket() client = get_client() From 5219b86db9677983a299be9065bf9e846333c49a Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Fri, 24 Feb 2023 11:43:52 -0500 Subject: [PATCH 05/17] iam: add back missing fails_on_dbstore tags Fixes: https://tracker.ceph.com/issues/58762 Signed-off-by: Casey Bodley --- s3tests_boto3/functional/test_iam.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/s3tests_boto3/functional/test_iam.py b/s3tests_boto3/functional/test_iam.py index 0995f97..fa44357 100644 --- a/s3tests_boto3/functional/test_iam.py +++ b/s3tests_boto3/functional/test_iam.py @@ -477,6 +477,7 @@ def test_allow_bucket_actions_in_user_policy(): @pytest.mark.user_policy @pytest.mark.test_of_iam +@pytest.mark.fails_on_dbstore def test_deny_bucket_actions_in_user_policy(): client = get_iam_client() s3_client = get_alt_client() @@ -551,6 +552,7 @@ def test_allow_object_actions_in_user_policy(): @pytest.mark.user_policy @pytest.mark.test_of_iam +@pytest.mark.fails_on_dbstore def test_deny_object_actions_in_user_policy(): client = get_iam_client() s3_client_alt = get_alt_client() @@ -625,6 +627,7 @@ def test_allow_multipart_actions_in_user_policy(): @pytest.mark.user_policy @pytest.mark.test_of_iam +@pytest.mark.fails_on_dbstore def test_deny_multipart_actions_in_user_policy(): client = get_iam_client() s3_client = get_alt_client() @@ -667,6 +670,7 @@ def test_deny_multipart_actions_in_user_policy(): @pytest.mark.user_policy @pytest.mark.test_of_iam +@pytest.mark.fails_on_dbstore def test_allow_tagging_actions_in_user_policy(): client = get_iam_client() s3_client_alt = get_alt_client() @@ -712,6 +716,7 @@ def test_allow_tagging_actions_in_user_policy(): @pytest.mark.user_policy @pytest.mark.test_of_iam +@pytest.mark.fails_on_dbstore def test_deny_tagging_actions_in_user_policy(): client = get_iam_client() s3_client = get_alt_client() @@ -763,6 +768,7 @@ def test_deny_tagging_actions_in_user_policy(): @pytest.mark.user_policy @pytest.mark.test_of_iam +@pytest.mark.fails_on_dbstore def test_verify_conflicting_user_policy_statements(): s3client = get_alt_client() bucket = get_new_bucket(client=s3client) @@ -794,6 +800,7 @@ def test_verify_conflicting_user_policy_statements(): @pytest.mark.user_policy @pytest.mark.test_of_iam +@pytest.mark.fails_on_dbstore def test_verify_conflicting_user_policies(): s3client = get_alt_client() bucket = get_new_bucket(client=s3client) From 3a0f1f0ead196c67721e94a19544aad19846f039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20R=C3=B6hrich?= Date: Tue, 28 Feb 2023 12:19:54 +0100 Subject: [PATCH 06/17] QoL: Fix tox.ini syntax and other minor things MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix tox.ini syntax Modern tox versions require the expected environment variables to be listed one by one on separate lines in tox.ini - Add `venv` to list of ignored names for git This is a common name for a local Python virtual environment. Less typing than `virtualenv` - Add `tox` to requirements.txt Installing `tox` via `pip` has the advantage of including it in the virtual environment, thus avoiding trouble on operating systems shipping by default with python3.6 or older. It's also nice that `pip install -r requirements.txt` is now sufficient to set up the testing environment, after initializing the virtual environment with a moder-enough python version. Signed-off-by: Moritz Röhrich --- .gitignore | 1 + requirements.txt | 1 + tox.ini | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bcbae80..2a246a8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,6 @@ /*.egg-info /virtualenv +/venv config.yaml diff --git a/requirements.txt b/requirements.txt index ac1d18f..7742d8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ pytz >=2011k httplib2 lxml pytest +tox diff --git a/tox.ini b/tox.ini index 93fde38..e4a30e5 100644 --- a/tox.ini +++ b/tox.ini @@ -3,5 +3,7 @@ envlist = py [testenv] deps = -rrequirements.txt -passenv = S3TEST_CONF S3_USE_SIGV4 +passenv = + S3TEST_CONF + S3_USE_SIGV4 commands = pytest {posargs} From d976f47d747867b6dc1f5eedba4626dd670093b9 Mon Sep 17 00:00:00 2001 From: Soumya Koduri Date: Mon, 6 Mar 2023 19:48:01 +0530 Subject: [PATCH 07/17] dbstore: add back missing 'fails_on_dbstore' tag Mark testcase "test_lifecycle_expiration_header_and_tags_head" as fails_on_dbstore Signed-off-by: Soumya Koduri --- s3tests_boto3/functional/test_s3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index db080e9..8e06844 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -8350,6 +8350,7 @@ def test_lifecycle_expiration_header_tags_head(): @pytest.mark.lifecycle @pytest.mark.lifecycle_expiration +@pytest.mark.fails_on_dbstore def test_lifecycle_expiration_header_and_tags_head(): now = datetime.datetime.now(None) bucket_name = get_new_bucket() From 29b0e27e495fca140669f45cd51c7b172f41410a Mon Sep 17 00:00:00 2001 From: Soumya Koduri Date: Tue, 7 Mar 2023 15:17:47 +0530 Subject: [PATCH 08/17] lifecycle: Fix test_lifecycle_expiration_header_* testcases Few checks were incorrectly mapped when switched to 'assert'. This commit fixes the same. Signed-off-by: Soumya Koduri --- s3tests_boto3/functional/test_s3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index 8e06844..e4c7052 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -8346,7 +8346,7 @@ def test_lifecycle_expiration_header_tags_head(): # stat the object, check header response = client.head_object(Bucket=bucket_name, Key=key1) assert response['ResponseMetadata']['HTTPStatusCode'] == 200 - assert check_lifecycle_expiration_header(response, datetime.datetime.now(None), 'rule1', 1) + assert check_lifecycle_expiration_header(response, datetime.datetime.now(None), 'rule1', 1) == False @pytest.mark.lifecycle @pytest.mark.lifecycle_expiration @@ -8392,7 +8392,7 @@ def test_lifecycle_expiration_header_and_tags_head(): # stat the object, check header response = client.head_object(Bucket=bucket_name, Key=key1) assert response['ResponseMetadata']['HTTPStatusCode'] == 200 - assert check_lifecycle_expiration_header(response, datetime.datetime.now(None), 'rule1', 1) + assert check_lifecycle_expiration_header(response, datetime.datetime.now(None), 'rule1', 1) == False @pytest.mark.lifecycle def test_lifecycle_set_noncurrent(): From 13a9bfc00aab2bec910f5ac6b5f63fa24e5f48dd Mon Sep 17 00:00:00 2001 From: Mark Kogan Date: Tue, 7 Mar 2023 14:08:51 +0000 Subject: [PATCH 09/17] test object PUT with chunked transfer enconding Before the RGW fix PR was responding with 411 instead of 200 RGW fix PR: https://github.com/ceph/ceph/pull/50235 Signed-off-by: Mark Kogan --- s3tests_boto3/functional/test_s3.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index 8e06844..027fb8f 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -1573,6 +1573,19 @@ def test_object_write_to_nonexist_bucket(): assert error_code == 'NoSuchBucket' +def _ev_add_te_header(request, **kwargs): + request.headers.add_header('Transfer-Encoding', 'chunked') + +def test_object_write_with_chunked_transfer_encoding(): + bucket_name = get_new_bucket() + client = get_client() + + client.meta.events.register_first('before-sign.*.*', _ev_add_te_header) + response = client.put_object(Bucket=bucket_name, Key='foo', Body='bar') + + assert response['ResponseMetadata']['HTTPStatusCode'] == 200 + + def test_bucket_create_delete(): bucket_name = get_new_bucket() client = get_client() From bb27e04c456cc1b551d898a6814cda307f960de4 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Sun, 19 Feb 2023 12:32:47 -0500 Subject: [PATCH 10/17] add test_get_object_torrent Signed-off-by: Casey Bodley --- s3tests_boto3/functional/test_s3.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index e4c7052..b8c638f 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -12786,3 +12786,24 @@ def test_sse_s3_encrypted_upload_1mb(): @pytest.mark.fails_on_dbstore def test_sse_s3_encrypted_upload_8mb(): _test_sse_s3_encrypted_upload(8*1024*1024) + +def test_get_object_torrent(): + client = get_client() + bucket_name = get_new_bucket() + key = 'Avatar.mpg' + + file_size = 7 * 1024 * 1024 + data = 'A' * file_size + + client.put_object(Bucket=bucket_name, Key=key, Body=data) + + response = None + try: + response = client.get_object_torrent(Bucket=bucket_name, Key=key) + # if successful, verify the torrent contents are different from the body + assert data != _get_body(response) + except ClientError as e: + # accept 404 errors - torrent support may not be configured + status, error_code = _get_status_and_error_code(e.response) + assert status == 404 + assert error_code == 'NoSuchKey' From 97c0338adfa8c0d51b1c24ba69882218fc7c23e9 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Sun, 12 Mar 2023 15:33:34 -0400 Subject: [PATCH 11/17] s3website: collections.Container removed from python3.10 Fixes: https://tracker.ceph.com/issues/58960 Signed-off-by: Casey Bodley --- s3tests/functional/test_s3_website.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/s3tests/functional/test_s3_website.py b/s3tests/functional/test_s3_website.py index 8cc04ed..641cd71 100644 --- a/s3tests/functional/test_s3_website.py +++ b/s3tests/functional/test_s3_website.py @@ -1,5 +1,5 @@ import sys -import collections +from collections.abc import Container import pytest import string import random @@ -159,9 +159,9 @@ def _test_website_prep(bucket, xml_template, hardcoded_fields = {}, expect_fail= return f def __website_expected_reponse_status(res, status, reason): - if not isinstance(status, collections.Container): + if not isinstance(status, Container): status = set([status]) - if not isinstance(reason, collections.Container): + if not isinstance(reason, Container): reason = set([reason]) if status is not IGNORE_FIELD: @@ -179,7 +179,7 @@ def _website_expected_default_html(**kwargs): v = kwargs[k] if isinstance(v, str): v = [v] - elif not isinstance(v, collections.Container): + elif not isinstance(v, Container): v = [v] for v2 in v: s = '
  • %s: %s
  • ' % (k,v2) @@ -199,7 +199,7 @@ def _website_expected_error_response(res, bucket_name, status, reason, code, con if code is not IGNORE_FIELD: assert errorcode == code - if not isinstance(content, collections.Container): + if not isinstance(content, Container): content = set([content]) for f in content: if f is not IGNORE_FIELD and f is not None: From 787dc6bd43d11acaec6ec8e7b942296410b254f3 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Mon, 13 Mar 2023 13:00:25 -0400 Subject: [PATCH 12/17] boto3: multi-object-delete tests use client.delete_objects() when the tests were converted from boto2, they were rewritten as loops over client.delete_object(). switch back to multi-delete Signed-off-by: Casey Bodley --- s3tests_boto3/functional/test_s3.py | 49 ++++++++++++----------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index e4c7052..a7f7d42 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -7459,20 +7459,17 @@ def test_versioning_multi_object_delete(): num_versions = 2 (version_ids, contents) = create_multiple_versions(client, bucket_name, key, num_versions) + assert len(version_ids) == 2 - response = client.list_object_versions(Bucket=bucket_name) - versions = response['Versions'] - versions.reverse() - - for version in versions: - client.delete_object(Bucket=bucket_name, Key=key, VersionId=version['VersionId']) + # delete both versions + objects = [{'Key': key, 'VersionId': v} for v in version_ids] + client.delete_objects(Bucket=bucket_name, Delete={'Objects': objects}) response = client.list_object_versions(Bucket=bucket_name) assert not 'Versions' in response # now remove again, should all succeed due to idempotency - for version in versions: - client.delete_object(Bucket=bucket_name, Key=key, VersionId=version['VersionId']) + client.delete_objects(Bucket=bucket_name, Delete={'Objects': objects}) response = client.list_object_versions(Bucket=bucket_name) assert not 'Versions' in response @@ -7487,33 +7484,24 @@ def test_versioning_multi_object_delete_with_marker(): num_versions = 2 (version_ids, contents) = create_multiple_versions(client, bucket_name, key, num_versions) + assert len(version_ids) == num_versions + objects = [{'Key': key, 'VersionId': v} for v in version_ids] - client.delete_object(Bucket=bucket_name, Key=key) - response = client.list_object_versions(Bucket=bucket_name) - versions = response['Versions'] - delete_markers = response['DeleteMarkers'] + # create a delete marker + response = client.delete_object(Bucket=bucket_name, Key=key) + assert response['DeleteMarker'] + objects += [{'Key': key, 'VersionId': response['VersionId']}] - version_ids.append(delete_markers[0]['VersionId']) - assert len(version_ids) == 3 - assert len(delete_markers) == 1 - - for version in versions: - client.delete_object(Bucket=bucket_name, Key=key, VersionId=version['VersionId']) - - for delete_marker in delete_markers: - client.delete_object(Bucket=bucket_name, Key=key, VersionId=delete_marker['VersionId']) + # delete all versions + client.delete_objects(Bucket=bucket_name, Delete={'Objects': objects}) response = client.list_object_versions(Bucket=bucket_name) assert not 'Versions' in response assert not 'DeleteMarkers' in response - for version in versions: - client.delete_object(Bucket=bucket_name, Key=key, VersionId=version['VersionId']) - - for delete_marker in delete_markers: - client.delete_object(Bucket=bucket_name, Key=key, VersionId=delete_marker['VersionId']) - # now remove again, should all succeed due to idempotency + client.delete_objects(Bucket=bucket_name, Delete={'Objects': objects}) + response = client.list_object_versions(Bucket=bucket_name) assert not 'Versions' in response assert not 'DeleteMarkers' in response @@ -7527,8 +7515,11 @@ def test_versioning_multi_object_delete_with_marker_create(): key = 'key' - response = client.delete_object(Bucket=bucket_name, Key=key) - delete_marker_version_id = response['VersionId'] + # use delete_objects() to create a delete marker + response = client.delete_objects(Bucket=bucket_name, Delete={'Objects': [{'Key': key}]}) + assert len(response['Deleted']) == 1 + assert response['Deleted'][0]['DeleteMarker'] + delete_marker_version_id = response['Deleted'][0]['DeleteMarkerVersionId'] response = client.list_object_versions(Bucket=bucket_name) delete_markers = response['DeleteMarkers'] From febbcc12c2cbe11d2261a4f6e5a6cac4b05c3ac8 Mon Sep 17 00:00:00 2001 From: Casey Bodley Date: Thu, 30 Mar 2023 14:27:27 -0400 Subject: [PATCH 13/17] test_sse_s3_default_multipart_upload verifies encryption header Signed-off-by: Casey Bodley --- s3tests_boto3/functional/test_s3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index b8c638f..fad9f95 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -12642,6 +12642,7 @@ def test_sse_s3_default_multipart_upload(): assert response['Metadata'] == metadata assert response['ResponseMetadata']['HTTPHeaders']['content-type'] == content_type + assert response['ResponseMetadata']['HTTPHeaders']['x-amz-server-side-encryption'] == 'AES256' body = _get_body(response) assert body == data From 89bbe654ca7af859f52b770dfbf40d95a7ea41be Mon Sep 17 00:00:00 2001 From: galsalomon66 Date: Wed, 5 Apr 2023 01:30:00 +0300 Subject: [PATCH 14/17] upon removing the payload tag, the response index should be changed also Signed-off-by: galsalomon66 --- s3tests_boto3/functional/test_s3select.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3tests_boto3/functional/test_s3select.py b/s3tests_boto3/functional/test_s3select.py index c3cb72f..8dabcc6 100644 --- a/s3tests_boto3/functional/test_s3select.py +++ b/s3tests_boto3/functional/test_s3select.py @@ -75,7 +75,7 @@ def generate_s3select_expression_projection(bucket_name,obj_name): epsilon = float(0.00001) # both results should be close (epsilon) - assert( abs(float(res.split("\n")[1]) - eval(e)) < epsilon ) + assert( abs(float(res.split("\n")[0]) - eval(e)) < epsilon ) @pytest.mark.s3select def get_random_string(): From bc2a3b0b705277e313b4b13532a921d5599ed167 Mon Sep 17 00:00:00 2001 From: galsalomon66 Date: Sun, 9 Apr 2023 15:02:24 +0300 Subject: [PATCH 15/17] modifying of the run_s3select routine; to handle the different statuses (progress,stats,end) Signed-off-by: galsalomon66 --- s3tests_boto3/functional/test_s3select.py | 36 +++++++++++++++-------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/s3tests_boto3/functional/test_s3select.py b/s3tests_boto3/functional/test_s3select.py index 8dabcc6..1a2ccaf 100644 --- a/s3tests_boto3/functional/test_s3select.py +++ b/s3tests_boto3/functional/test_s3select.py @@ -275,6 +275,7 @@ def run_s3select(bucket,key,query,column_delim=",",row_delim="\n",quot_char='"', s3 = get_client() result = "" + result_status = {} try: r = s3.select_object_content( Bucket=bucket, @@ -302,14 +303,18 @@ def run_s3select(bucket,key,query,column_delim=",",row_delim="\n",quot_char='"', result.append(records.copy()) if 'Progress' in event: progress = event['Progress'] - result.append(progress.copy()) + result_status['Progress'] = event['Progress'] if 'Stats' in event: stats = event['Stats'] - result.append(stats.copy()) + result_status['Stats'] = event['Stats'] if 'End' in event: end = event['End'] - result.append(end.copy()) - return result + result_status['End'] = event['End'] + + if progress == False: + return result + else: + return result,result_status def run_s3select_output(bucket,key,query, quot_field, op_column_delim = ",", op_row_delim = "\n", column_delim=",", op_quot_char = '"', op_esc_char = '\\', row_delim="\n",quot_char='"',esc_char='\\',csv_header_info="NONE"): @@ -1550,21 +1555,26 @@ def test_progress_expressions(): obj_size = len(csv_obj.encode('utf-8')) - res_s3select_response = run_s3select(bucket_name,csv_obj_name,"select sum(int(_1)) from s3object;",progress = True) - records_payload_size = len(remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select sum(int(_1)) from s3object;')).replace("\n","")) + result_status = {} + result_size = 0 - total_response = len(res_s3select_response) - + res_s3select_response,result_status = run_s3select(bucket_name,csv_obj_name,"select sum(int(_1)) from s3object;",progress = True) + + for rec in res_s3select_response: + result_size += len(rec['Payload']) + + records_payload_size = result_size + # To do: Validate bytes processed after supporting compressed data - s3select_assert_result(obj_size, res_s3select_response[total_response-3]['Details']['BytesScanned']) - s3select_assert_result(records_payload_size, res_s3select_response[total_response-3]['Details']['BytesReturned']) + s3select_assert_result(obj_size, result_status['Progress']['Details']['BytesScanned']) + s3select_assert_result(records_payload_size, result_status['Progress']['Details']['BytesReturned']) # stats response payload validation - s3select_assert_result(obj_size, res_s3select_response[total_response-2]['Details']['BytesScanned']) - s3select_assert_result(records_payload_size, res_s3select_response[total_response-2]['Details']['BytesReturned']) + s3select_assert_result(obj_size, result_status['Stats']['Details']['BytesScanned']) + s3select_assert_result(records_payload_size, result_status['Stats']['Details']['BytesReturned']) # end response - s3select_assert_result({}, res_s3select_response[total_response-1]) + s3select_assert_result({}, result_status['End']) @pytest.mark.s3select def test_output_serial_expressions(): From 008f5025f7a3672e638ff5531377baa32f342504 Mon Sep 17 00:00:00 2001 From: galsalomon66 Date: Mon, 10 Apr 2023 12:26:59 +0300 Subject: [PATCH 16/17] progress nessage is sent back upon processing the object, the change make sure it stay with the max result Signed-off-by: galsalomon66 --- s3tests_boto3/functional/test_s3select.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/s3tests_boto3/functional/test_s3select.py b/s3tests_boto3/functional/test_s3select.py index 1a2ccaf..bda6ff7 100644 --- a/s3tests_boto3/functional/test_s3select.py +++ b/s3tests_boto3/functional/test_s3select.py @@ -297,18 +297,19 @@ def run_s3select(bucket,key,query,column_delim=",",row_delim="\n",quot_char='"', result += records else: result = [] + max_progress_scanned = 0 for event in r['Payload']: if 'Records' in event: records = event['Records'] result.append(records.copy()) if 'Progress' in event: - progress = event['Progress'] - result_status['Progress'] = event['Progress'] + if(event['Progress']['Details']['BytesScanned'] > max_progress_scanned): + max_progress_scanned = event['Progress']['Details']['BytesScanned'] + result_status['Progress'] = event['Progress'] + if 'Stats' in event: - stats = event['Stats'] result_status['Stats'] = event['Stats'] if 'End' in event: - end = event['End'] result_status['End'] = event['End'] if progress == False: From e18ea7fac449a420093bbdcfb016197db9f3de0e Mon Sep 17 00:00:00 2001 From: Cory Snyder Date: Thu, 21 Jul 2022 18:33:51 +0000 Subject: [PATCH 17/17] Add test_versioning_concurrent_multi_object_delete Tests that concurrent multi-object delete requests which specify the same versioned object instances return successful object responses within response body. relates to: https://tracker.ceph.com/issues/56646 Signed-off-by: Cory Snyder --- s3tests_boto3/functional/test_s3.py | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/s3tests_boto3/functional/test_s3.py b/s3tests_boto3/functional/test_s3.py index d325584..0d47f1c 100644 --- a/s3tests_boto3/functional/test_s3.py +++ b/s3tests_boto3/functional/test_s3.py @@ -1637,6 +1637,39 @@ def _make_objs_dict(key_names): objs_dict = {'Objects': objs_list} return objs_dict +def test_versioning_concurrent_multi_object_delete(): + num_objects = 5 + num_threads = 5 + bucket_name = get_new_bucket() + + check_configure_versioning_retry(bucket_name, "Enabled", "Enabled") + + key_names = ["key_{:d}".format(x) for x in range(num_objects)] + bucket = _create_objects(bucket_name=bucket_name, keys=key_names) + + client = get_client() + versions = client.list_object_versions(Bucket=bucket_name)['Versions'] + assert len(versions) == num_objects + objs_dict = {'Objects': [dict((k, v[k]) for k in ["Key", "VersionId"]) for v in versions]} + results = [None] * num_threads + + def do_request(n): + results[n] = client.delete_objects(Bucket=bucket_name, Delete=objs_dict) + + t = [] + for i in range(num_threads): + thr = threading.Thread(target = do_request, args=[i]) + thr.start() + t.append(thr) + _do_wait_completion(t) + + for response in results: + assert len(response['Deleted']) == num_objects + assert 'Errors' not in response + + response = client.list_objects(Bucket=bucket_name) + assert 'Contents' not in response + def test_multi_object_delete(): key_names = ['key0', 'key1', 'key2'] bucket_name = _create_objects(keys=key_names)