From 819dd5aa323664d26c5881905d10748e0b56cea5 Mon Sep 17 00:00:00 2001 From: Amit Prinz Setter Date: Sun, 29 Jan 2023 09:25:58 +0200 Subject: [PATCH 1/2] Use get_new_bucket_name() in select tests. Signed-off-by: Amit Prinz Setter --- s3tests_boto3/functional/test_s3select.py | 73 ++++++++++++----------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/s3tests_boto3/functional/test_s3select.py b/s3tests_boto3/functional/test_s3select.py index feb6720..9740e88 100644 --- a/s3tests_boto3/functional/test_s3select.py +++ b/s3tests_boto3/functional/test_s3select.py @@ -9,7 +9,8 @@ import uuid from . import ( configfile, setup_teardown, - get_client + get_client, + get_new_bucket_name ) import logging @@ -86,7 +87,7 @@ def test_generate_where_clause(): # create small csv file for testing the random expressions single_line_csv = create_random_csv_object(1,1) - bucket_name = "test" + bucket_name = get_new_bucket_name() obj_name = get_random_string() #"single_line_csv.csv" upload_csv_object(bucket_name,obj_name,single_line_csv) @@ -98,7 +99,7 @@ def test_generate_projection(): # create small csv file for testing the random expressions single_line_csv = create_random_csv_object(1,1) - bucket_name = "test" + bucket_name = get_new_bucket_name() obj_name = get_random_string() #"single_line_csv.csv" upload_csv_object(bucket_name,obj_name,single_line_csv) @@ -311,7 +312,7 @@ def create_list_of_int(column_pos,obj,field_split=",",row_split="\n"): @pytest.mark.s3select def test_count_operation(): csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() num_of_rows = 1234 obj_to_load = create_random_csv_object(num_of_rows,10) upload_csv_object(bucket_name,csv_obj_name,obj_to_load) @@ -324,7 +325,7 @@ def test_column_sum_min_max(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) csv_obj_name_2 = get_random_string() @@ -390,7 +391,7 @@ def test_nullif_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where nullif(_1,_2) is null ;") ).replace("\n","") @@ -445,7 +446,7 @@ def test_nulliftrue_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where (nullif(_1,_2) is null) = true ;") ).replace("\n","") @@ -472,7 +473,7 @@ def test_is_not_null_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_null = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(*) from s3object where nullif(_1,_2) is not null ;") ).replace("\n","") @@ -493,7 +494,7 @@ def test_lowerupper_expressions(): csv_obj = create_random_csv_object(1,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from s3object ;') ).replace("\n","") @@ -511,7 +512,7 @@ def test_in_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) in(1);')).replace("\n","") @@ -580,7 +581,7 @@ def test_true_false_in_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(1)) = true;')).replace("\n","") @@ -625,7 +626,7 @@ def test_like_expressions(): csv_obj = create_random_csv_object_string(1000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%aeio%";')).replace("\n","") @@ -712,7 +713,7 @@ def test_truefalselike_expressions(): csv_obj = create_random_csv_object_string(1000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where (_1 like "%aeio%") = true;')).replace("\n","") @@ -757,7 +758,7 @@ def test_nullif_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where nullif(_1,_2) is null ;") ).replace("\n","") @@ -784,7 +785,7 @@ def test_lowerupper_expressions(): csv_obj = create_random_csv_object(1,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from stdin ;') ).replace("\n","") @@ -802,7 +803,7 @@ def test_in_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) in(1);')).replace("\n","") @@ -841,7 +842,7 @@ def test_like_expressions(): csv_obj = create_random_csv_object_string(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _1 like "%aeio%";')).replace("\n","") @@ -888,7 +889,7 @@ def test_complex_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_1)),max(int(_2)),min(int(_3))+1 from s3object;")).replace("\n","") @@ -924,7 +925,7 @@ def test_alias(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from s3object where a3>100 and a3<300;") ).replace(",","") @@ -943,7 +944,7 @@ def test_alias_cyclic_refernce(): csv_obj = create_random_csv_object(number_of_rows,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1,int(_2) as a2, a1+a4 as a3, a5+a1 as a4, int(_3)+a3 as a5 from s3object;") ) @@ -961,7 +962,7 @@ def test_datetime(): csv_obj = create_csv_object_for_datetime(10000,1) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) @@ -992,7 +993,7 @@ def test_true_false_datetime(): csv_obj = create_csv_object_for_datetime(10000,1) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) @@ -1025,7 +1026,7 @@ def test_csv_parser(): csv_obj = r',first,,,second,third="c31,c32,c33",forth="1,2,3,4",fifth=my_string=\"any_value\" \, my_other_string=\"aaaa\,bbb\" ,' + "\n" csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) @@ -1066,7 +1067,7 @@ def test_csv_definition(): csv_obj = create_random_csv_object(number_of_rows,10,"|","\t") csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) @@ -1096,7 +1097,7 @@ def test_schema_definition(): csv_obj = create_random_csv_object(number_of_rows,10,csv_schema="c1,c2,c3,c4,c5,c6,c7,c8,c9,c10") csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) @@ -1128,7 +1129,7 @@ def test_when_then_else_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case when cast(_1 as int)>100 and cast(_1 as int)<200 then "(100-200)" when cast(_1 as int)>200 and cast(_1 as int)<300 then "(200-300)" else "NONE" end from s3object;') ).replace("\n","") @@ -1157,7 +1158,7 @@ def test_coalesce_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where char_length(_3)>2 and char_length(_4)>2 and cast(substring(_3,1,2) as int) = cast(substring(_4,1,2) as int);') ).replace("\n","") @@ -1179,7 +1180,7 @@ def test_cast_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(_3 as int)>999;') ).replace("\n","") @@ -1204,7 +1205,7 @@ def test_version(): csv_obj = create_random_csv_object(number_of_rows,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) @@ -1218,7 +1219,7 @@ def test_trim_expressions(): csv_obj = create_random_csv_object_trim(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou";')).replace("\n","") @@ -1257,7 +1258,7 @@ def test_truefalse_trim_expressions(): csv_obj = create_random_csv_object_trim(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou" = true;')).replace("\n","") @@ -1296,7 +1297,7 @@ def test_escape_expressions(): csv_obj = create_random_csv_object_escape(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_escape = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%_ar" escape "%";')).replace("\n","") @@ -1317,7 +1318,7 @@ def test_case_value_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_case = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case cast(_1 as int) when cast(_2 as int) then "case_1_1" else "case_2_2" end from s3object;')).replace("\n","") @@ -1332,7 +1333,7 @@ def test_bool_cast_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_cast = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(int(_1) as bool) = true ;')).replace("\n","") @@ -1347,7 +1348,7 @@ def test_progress_expressions(): csv_obj = create_random_csv_object(1000000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) obj_size = len(csv_obj.encode('utf-8')) @@ -1375,7 +1376,7 @@ def test_output_serial_expressions(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() - bucket_name = "test" + bucket_name = get_new_bucket_name() upload_csv_object(bucket_name,csv_obj_name,csv_obj) res_s3select_1 = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,"select _1, _2 from s3object where nullif(_1,_2) is null ;", "ALWAYS") ).replace("\n",",").replace(",","") From 2998ea91ebb114ff82aa19f97c28a522ba85adf4 Mon Sep 17 00:00:00 2001 From: galsalomon66 Date: Thu, 8 Jun 2023 10:07:54 +0300 Subject: [PATCH 2/2] instead of hard-coded bucket-name using get_bucket_name.merging PR #488 alphaprinz Signed-off-by: galsalomon66 --- s3tests_boto3/functional/test_s3select.py | 319 ++++++++++++++++++---- 1 file changed, 264 insertions(+), 55 deletions(-) diff --git a/s3tests_boto3/functional/test_s3select.py b/s3tests_boto3/functional/test_s3select.py index 9740e88..5c18073 100644 --- a/s3tests_boto3/functional/test_s3select.py +++ b/s3tests_boto3/functional/test_s3select.py @@ -2,6 +2,7 @@ import pytest import random import string import re +import json from botocore.exceptions import ClientError import uuid @@ -75,7 +76,7 @@ def generate_s3select_expression_projection(bucket_name,obj_name): epsilon = float(0.00001) # both results should be close (epsilon) - assert( abs(float(res.split("\n")[1]) - eval(e)) < epsilon ) + assert( abs(float(res.split("\n")[0]) - eval(e)) < epsilon ) @pytest.mark.s3select def get_random_string(): @@ -89,7 +90,7 @@ def test_generate_where_clause(): single_line_csv = create_random_csv_object(1,1) bucket_name = get_new_bucket_name() obj_name = get_random_string() #"single_line_csv.csv" - upload_csv_object(bucket_name,obj_name,single_line_csv) + upload_object(bucket_name,obj_name,single_line_csv) for _ in range(100): generate_s3select_where_clause(bucket_name,obj_name) @@ -101,7 +102,7 @@ def test_generate_projection(): single_line_csv = create_random_csv_object(1,1) bucket_name = get_new_bucket_name() obj_name = get_random_string() #"single_line_csv.csv" - upload_csv_object(bucket_name,obj_name,single_line_csv) + upload_object(bucket_name,obj_name,single_line_csv) for _ in range(100): generate_s3select_expression_projection(bucket_name,obj_name) @@ -210,7 +211,57 @@ def create_random_csv_object_null(rows,columns,col_delim=",",record_delim="\n",c return result -def upload_csv_object(bucket_name,new_key,obj): +def create_random_json_object(rows,columns,col_delim=",",record_delim="\n",csv_schema=""): + result = "{\"root\" : [" + result += record_delim + if len(csv_schema)>0 : + result = csv_schema + record_delim + + for _ in range(rows): + row = "" + num = 0 + row += "{" + for _ in range(columns): + num += 1 + row = row + "\"c" + str(num) + "\"" + ": " "{}{}".format(random.randint(0,1000),col_delim) + row = row[:-1] + row += "}" + row += "," + result += row + record_delim + + result = result[:-2] + result += record_delim + result += "]" + "}" + + return result + +def csv_to_json(obj, field_split=",",row_split="\n",csv_schema=""): + result = "{\"root\" : [" + result += row_split + if len(csv_schema)>0 : + result = csv_schema + row_split + + for rec in obj.split(row_split): + row = "" + num = 0 + row += "{" + for col in rec.split(field_split): + if col == "": + break + num += 1 + row = row + "\"c" + str(num) + "\"" + ": " "{}{}".format(col,field_split) + row = row[:-1] + row += "}" + row += "," + result += row + row_split + + result = result[:-5] + result += row_split + result += "]" + "}" + + return result + +def upload_object(bucket_name,new_key,obj): client = get_client() client.create_bucket(Bucket=bucket_name) @@ -225,6 +276,7 @@ def run_s3select(bucket,key,query,column_delim=",",row_delim="\n",quot_char='"', s3 = get_client() result = "" + result_status = {} try: r = s3.select_object_content( Bucket=bucket, @@ -246,20 +298,26 @@ def run_s3select(bucket,key,query,column_delim=",",row_delim="\n",quot_char='"', result += records else: result = [] + max_progress_scanned = 0 for event in r['Payload']: if 'Records' in event: records = event['Records'] result.append(records.copy()) if 'Progress' in event: - progress = event['Progress'] - result.append(progress.copy()) + if(event['Progress']['Details']['BytesScanned'] > max_progress_scanned): + max_progress_scanned = event['Progress']['Details']['BytesScanned'] + result_status['Progress'] = event['Progress'] + if 'Stats' in event: - stats = event['Stats'] - result.append(stats.copy()) + result_status['Stats'] = event['Stats'] if 'End' in event: - end = event['End'] - result.append(end.copy()) - return result + result_status['End'] = event['End'] + + if progress == False: + return result + else: + return result,result_status + def run_s3select_output(bucket,key,query, quot_field, op_column_delim = ",", op_row_delim = "\n", column_delim=",", op_quot_char = '"', op_esc_char = '\\', row_delim="\n",quot_char='"',esc_char='\\',csv_header_info="NONE"): s3 = get_client() @@ -280,6 +338,27 @@ def run_s3select_output(bucket,key,query, quot_field, op_column_delim = ",", op_ return result +def run_s3select_json(bucket,key,query, op_row_delim = "\n"): + + s3 = get_client() + + r = s3.select_object_content( + Bucket=bucket, + Key=key, + ExpressionType='SQL', + InputSerialization = {"JSON": {"Type": "DOCUMENT"}}, + OutputSerialization = {"JSON": {}}, + Expression=query,) + #Record delimiter optional in output serialization + + result = "" + for event in r['Payload']: + if 'Records' in event: + records = event['Records']['Payload'].decode('utf-8') + result += records + + return result + def remove_xml_tags_from_result(obj): result = "" for rec in obj.split("\n"): @@ -315,22 +394,147 @@ def test_count_operation(): bucket_name = get_new_bucket_name() num_of_rows = 1234 obj_to_load = create_random_csv_object(num_of_rows,10) - upload_csv_object(bucket_name,csv_obj_name,obj_to_load) + upload_object(bucket_name,csv_obj_name,obj_to_load) res = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object;") ).replace(",","") s3select_assert_result( num_of_rows, int( res )) +@pytest.mark.s3select +def test_count_json_operation(): + json_obj_name = get_random_string() + bucket_name = get_new_bucket_name() + num_of_rows = 1 + obj_to_load = create_random_json_object(num_of_rows,10) + upload_object(bucket_name,json_obj_name,obj_to_load) + res = remove_xml_tags_from_result(run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*];")) + s3select_assert_result( 1, int(res)) + + res = remove_xml_tags_from_result(run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root;")) + s3select_assert_result( 1, int(res)) + + obj_to_load = create_random_json_object(3,10) + upload_object(bucket_name,json_obj_name,obj_to_load) + res = remove_xml_tags_from_result(run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root;")) + s3select_assert_result( 3, int(res)) + +@pytest.mark.s3select +def test_json_column_sum_min_max(): + csv_obj = create_random_csv_object(10000,10) + + json_obj = csv_to_json(csv_obj); + + json_obj_name = get_random_string() + bucket_name = get_new_bucket_name() + upload_object(bucket_name,json_obj_name,json_obj) + + json_obj_name_2 = get_random_string() + bucket_name_2 = "testbuck2" + upload_object(bucket_name_2,json_obj_name_2,json_obj) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select min(_1.c1) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 1 , csv_obj ) + res_target = min( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select min(_1.c4) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 4 , csv_obj ) + res_target = min( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select avg(_1.c6) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 6 , csv_obj ) + res_target = float(sum(list_int ))/10000 + + s3select_assert_result( float(res_s3select), float(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select max(_1.c4) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 4 , csv_obj ) + res_target = max( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select max(_1.c7) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 7 , csv_obj ) + res_target = max( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select sum(_1.c4) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 4 , csv_obj ) + res_target = sum( list_int ) + + s3select_assert_result( int(res_s3select), int(res_target)) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select sum(_1.c7) from s3object[*].root;") ).replace(",","") + list_int = create_list_of_int( 7 , csv_obj ) + res_target = sum( list_int ) + + s3select_assert_result( int(res_s3select) , int(res_target) ) + + # the following queries, validates on *random* input an *accurate* relation between condition result,sum operation and count operation. + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name_2,json_obj_name_2,"select count(0),sum(_1.c1),sum(_1.c2) from s3object[*].root where (_1.c1-_1.c2) = 2;" ) ) + count,sum1,sum2 = res_s3select.split(",") + + s3select_assert_result( int(count)*2 , int(sum1)-int(sum2 ) ) + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0),sum(_1.c1),sum(_1.c2) from s3object[*].root where (_1.c1-_1.c2) = 4;" ) ) + count,sum1,sum2 = res_s3select.split(",") + + s3select_assert_result( int(count)*4 , int(sum1)-int(sum2) ) + +@pytest.mark.s3select +def test_json_nullif_expressions(): + + json_obj = create_random_json_object(10000,10) + + json_obj_name = get_random_string() + bucket_name = get_new_bucket_name() + upload_object(bucket_name,json_obj_name,json_obj) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where nullif(_1.c1,_1.c2) is null ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where _1.c1 = _1.c2 ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (nullif(_1.c1,_1.c2) is null) from s3object[*].root ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (_1.c1 = _1.c2) from s3object[*].root ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where not nullif(_1.c1,_1.c2) is null ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where _1.c1 != _1.c2 ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (nullif(_1.c1,_1.c2) is not null) from s3object[*].root ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select (_1.c1 != _1.c2) from s3object[*].root ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where nullif(_1.c1,_1.c2) = _1.c1 ;") ).replace("\n","") + + res_s3select = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where _1.c1 != _1.c2 ;") ).replace("\n","") + + s3select_assert_result( res_s3select_nullif, res_s3select) + + @pytest.mark.s3select def test_column_sum_min_max(): csv_obj = create_random_csv_object(10000,10) csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) csv_obj_name_2 = get_random_string() bucket_name_2 = "testbuck2" - upload_csv_object(bucket_name_2,csv_obj_name_2,csv_obj) + upload_object(bucket_name_2,csv_obj_name_2,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_1)) from s3object;") ).replace(",","") list_int = create_list_of_int( 1 , csv_obj ) @@ -392,7 +596,7 @@ def test_nullif_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where nullif(_1,_2) is null ;") ).replace("\n","") @@ -426,7 +630,7 @@ def test_nullif_expressions(): csv_obj = create_random_csv_object_null(10000,10) - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(*) from s3object where nullif(_1,null) is null;") ).replace("\n","") @@ -447,7 +651,7 @@ def test_nulliftrue_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where (nullif(_1,_2) is null) = true ;") ).replace("\n","") @@ -474,7 +678,7 @@ def test_is_not_null_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_null = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(*) from s3object where nullif(_1,_2) is not null ;") ).replace("\n","") @@ -495,7 +699,7 @@ def test_lowerupper_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from s3object ;') ).replace("\n","") @@ -513,7 +717,7 @@ def test_in_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) in(1);')).replace("\n","") @@ -582,7 +786,7 @@ def test_true_false_in_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(1)) = true;')).replace("\n","") @@ -627,7 +831,7 @@ def test_like_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%aeio%";')).replace("\n","") @@ -714,7 +918,7 @@ def test_truefalselike_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where (_1 like "%aeio%") = true;')).replace("\n","") @@ -759,7 +963,7 @@ def test_nullif_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where nullif(_1,_2) is null ;") ).replace("\n","") @@ -786,7 +990,7 @@ def test_lowerupper_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from stdin ;') ).replace("\n","") @@ -804,7 +1008,7 @@ def test_in_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) in(1);')).replace("\n","") @@ -843,7 +1047,7 @@ def test_like_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _1 like "%aeio%";')).replace("\n","") @@ -890,7 +1094,7 @@ def test_complex_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_1)),max(int(_2)),min(int(_3))+1 from s3object;")).replace("\n","") @@ -926,7 +1130,7 @@ def test_alias(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from s3object where a3>100 and a3<300;") ).replace(",","") @@ -945,7 +1149,7 @@ def test_alias_cyclic_refernce(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1,int(_2) as a2, a1+a4 as a3, a5+a1 as a4, int(_3)+a3 as a5 from s3object;") ) @@ -964,7 +1168,7 @@ def test_datetime(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_date_time = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(0) from s3object where extract(year from to_timestamp(_1)) > 1950 and extract(year from to_timestamp(_1)) < 1960;') ) @@ -995,7 +1199,7 @@ def test_true_false_datetime(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_date_time = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(0) from s3object where (extract(year from to_timestamp(_1)) > 1950) = true and (extract(year from to_timestamp(_1)) < 1960) = true;') ) @@ -1028,7 +1232,7 @@ def test_csv_parser(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) # return value contain comma{,} res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _6 from s3object;") ).replace("\n","") @@ -1069,7 +1273,7 @@ def test_csv_definition(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) # purpose of tests is to parse correctly input with different csv defintions res = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object;","|","\t") ).replace(",","") @@ -1099,7 +1303,7 @@ def test_schema_definition(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) # ignoring the schema on first line and retrieve using generic column number res_ignore = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _1,_3 from s3object;",csv_header_info="IGNORE") ).replace("\n","") @@ -1130,7 +1334,7 @@ def test_when_then_else_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case when cast(_1 as int)>100 and cast(_1 as int)<200 then "(100-200)" when cast(_1 as int)>200 and cast(_1 as int)<300 then "(200-300)" else "NONE" end from s3object;') ).replace("\n","") @@ -1159,7 +1363,7 @@ def test_coalesce_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where char_length(_3)>2 and char_length(_4)>2 and cast(substring(_3,1,2) as int) = cast(substring(_4,1,2) as int);') ).replace("\n","") @@ -1181,7 +1385,7 @@ def test_cast_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(_3 as int)>999;') ).replace("\n","") @@ -1207,7 +1411,7 @@ def test_version(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_version = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select version() from s3object;") ).replace("\n","") @@ -1220,7 +1424,7 @@ def test_trim_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou";')).replace("\n","") @@ -1259,7 +1463,7 @@ def test_truefalse_trim_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou" = true;')).replace("\n","") @@ -1298,7 +1502,7 @@ def test_escape_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_escape = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%_ar" escape "%";')).replace("\n","") @@ -1319,7 +1523,7 @@ def test_case_value_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_case = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case cast(_1 as int) when cast(_2 as int) then "case_1_1" else "case_2_2" end from s3object;')).replace("\n","") @@ -1334,7 +1538,7 @@ def test_bool_cast_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_cast = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(int(_1) as bool) = true ;')).replace("\n","") @@ -1349,25 +1553,30 @@ def test_progress_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) obj_size = len(csv_obj.encode('utf-8')) - res_s3select_response = run_s3select(bucket_name,csv_obj_name,"select sum(int(_1)) from s3object;",progress = True) - records_payload_size = len(remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select sum(int(_1)) from s3object;')).replace("\n","")) + result_status = {} + result_size = 0 - total_response = len(res_s3select_response) - + res_s3select_response,result_status = run_s3select(bucket_name,csv_obj_name,"select sum(int(_1)) from s3object;",progress = True) + + for rec in res_s3select_response: + result_size += len(rec['Payload']) + + records_payload_size = result_size + # To do: Validate bytes processed after supporting compressed data - s3select_assert_result(obj_size, res_s3select_response[total_response-3]['Details']['BytesScanned']) - s3select_assert_result(records_payload_size, res_s3select_response[total_response-3]['Details']['BytesReturned']) + s3select_assert_result(obj_size, result_status['Progress']['Details']['BytesScanned']) + s3select_assert_result(records_payload_size, result_status['Progress']['Details']['BytesReturned']) # stats response payload validation - s3select_assert_result(obj_size, res_s3select_response[total_response-2]['Details']['BytesScanned']) - s3select_assert_result(records_payload_size, res_s3select_response[total_response-2]['Details']['BytesReturned']) + s3select_assert_result(obj_size, result_status['Stats']['Details']['BytesScanned']) + s3select_assert_result(records_payload_size, result_status['Stats']['Details']['BytesReturned']) # end response - s3select_assert_result({}, res_s3select_response[total_response-1]) + s3select_assert_result({}, result_status['End']) @pytest.mark.s3select def test_output_serial_expressions(): @@ -1377,7 +1586,7 @@ def test_output_serial_expressions(): csv_obj_name = get_random_string() bucket_name = get_new_bucket_name() - upload_csv_object(bucket_name,csv_obj_name,csv_obj) + upload_object(bucket_name,csv_obj_name,csv_obj) res_s3select_1 = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,"select _1, _2 from s3object where nullif(_1,_2) is null ;", "ALWAYS") ).replace("\n",",").replace(",","")