Merge pull request #587 from ceph/fix_cherry_pick_tracker_65651

Fix cherry pick tracker 65651
This commit is contained in:
Casey Bodley 2024-09-12 17:04:35 -04:00 committed by GitHub
commit 78f4f3b5f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -7,11 +7,14 @@ from botocore.exceptions import ClientError
from botocore.exceptions import EventStreamError
import uuid
import warnings
import traceback
from . import (
configfile,
setup_teardown,
get_client
get_client,
get_new_bucket_name
)
import logging
@ -88,19 +91,20 @@ def test_generate_where_clause():
# create small csv file for testing the random expressions
single_line_csv = create_random_csv_object(1,1)
bucket_name = "test"
bucket_name = get_new_bucket_name()
obj_name = get_random_string() #"single_line_csv.csv"
upload_object(bucket_name,obj_name,single_line_csv)
for _ in range(100):
generate_s3select_where_clause(bucket_name,obj_name)
@pytest.mark.s3select
def test_generate_projection():
# create small csv file for testing the random expressions
single_line_csv = create_random_csv_object(1,1)
bucket_name = "test"
bucket_name = get_new_bucket_name()
obj_name = get_random_string() #"single_line_csv.csv"
upload_object(bucket_name,obj_name,single_line_csv)
@ -111,12 +115,26 @@ def s3select_assert_result(a,b):
if type(a) == str:
a_strip = a.strip()
b_strip = b.strip()
if a=="" and b=="":
warnings.warn(UserWarning("{}".format("both results are empty, it may indicates a wrong input, please check the test input")))
## print the calling function that created the empty result.
stack = traceback.extract_stack(limit=2)
formatted_stack = traceback.format_list(stack)[0]
warnings.warn(UserWarning("{}".format(formatted_stack)))
return True
assert a_strip != ""
assert b_strip != ""
else:
if a=="" and b=="":
warnings.warn(UserWarning("{}".format("both results are empty, it may indicates a wrong input, please check the test input")))
## print the calling function that created the empty result.
stack = traceback.extract_stack(limit=2)
formatted_stack = traceback.format_list(stack)[0]
warnings.warn(UserWarning("{}".format(formatted_stack)))
return True
assert a != ""
assert b != ""
assert a == b
assert True
def create_csv_object_for_datetime(rows,columns):
result = ""
@ -400,7 +418,7 @@ def create_list_of_int(column_pos,obj,field_split=",",row_split="\n"):
@pytest.mark.s3select
def test_count_operation():
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
num_of_rows = 1234
obj_to_load = create_random_csv_object(num_of_rows,10)
upload_object(bucket_name,csv_obj_name,obj_to_load)
@ -411,7 +429,8 @@ def test_count_operation():
@pytest.mark.s3select
def test_count_json_operation():
json_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
num_of_rows = 1
obj_to_load = create_random_json_object(num_of_rows,10)
upload_object(bucket_name,json_obj_name,obj_to_load)
@ -433,7 +452,8 @@ def test_json_column_sum_min_max():
json_obj = csv_to_json(csv_obj);
json_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,json_obj_name,json_obj)
json_obj_name_2 = get_random_string()
@ -499,7 +519,8 @@ def test_json_nullif_expressions():
json_obj = create_random_json_object(10000,10)
json_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,json_obj_name,json_obj)
res_s3select_nullif = remove_xml_tags_from_result( run_s3select_json(bucket_name,json_obj_name,"select count(0) from s3object[*].root where nullif(_1.c1,_1.c2) is null ;") ).replace("\n","")
@ -538,7 +559,8 @@ def test_column_sum_min_max():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
csv_obj_name_2 = get_random_string()
@ -604,7 +626,8 @@ def test_nullif_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where nullif(_1,_2) is null ;") ).replace("\n","")
@ -659,7 +682,8 @@ def test_nulliftrue_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from s3object where (nullif(_1,_2) is null) = true ;") ).replace("\n","")
@ -686,7 +710,8 @@ def test_is_not_null_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_null = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(*) from s3object where nullif(_1,_2) is not null ;") ).replace("\n","")
@ -707,7 +732,8 @@ def test_lowerupper_expressions():
csv_obj = create_random_csv_object(1,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from s3object ;') ).replace("\n","")
@ -725,7 +751,8 @@ def test_in_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) in(1);')).replace("\n","")
@ -794,7 +821,11 @@ def test_true_false_in_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
## 1,2 must exist in first/second column (to avoid empty results)
csv_obj = csv_obj + "1,2,,,,,,,,,,\n"
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(1)) = true;')).replace("\n","")
@ -839,7 +870,8 @@ def test_like_expressions():
csv_obj = create_random_csv_object_string(1000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%aeio%";')).replace("\n","")
@ -926,7 +958,8 @@ def test_truefalselike_expressions():
csv_obj = create_random_csv_object_string(1000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_like = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where (_1 like "%aeio%") = true;')).replace("\n","")
@ -971,7 +1004,8 @@ def test_nullif_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where nullif(_1,_2) is null ;") ).replace("\n","")
@ -998,7 +1032,8 @@ def test_lowerupper_expressions():
csv_obj = create_random_csv_object(1,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from stdin ;') ).replace("\n","")
@ -1016,7 +1051,8 @@ def test_in_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) in(1);')).replace("\n","")
@ -1055,7 +1091,8 @@ def test_like_expressions():
csv_obj = create_random_csv_object_string(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _1 like "%aeio%";')).replace("\n","")
@ -1102,7 +1139,8 @@ def test_complex_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_1)),max(int(_2)),min(int(_3))+1 from s3object;")).replace("\n","")
@ -1138,7 +1176,8 @@ def test_alias():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1, int(_2) as a2 , (a1+a2) as a3 from s3object where a3>100 and a3<300;") ).replace(",","")
@ -1157,7 +1196,8 @@ def test_alias_cyclic_refernce():
csv_obj = create_random_csv_object(number_of_rows,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select int(_1) as a1,int(_2) as a2, a1+a4 as a3, a5+a1 as a4, int(_3)+a3 as a5 from s3object;") )
@ -1175,7 +1215,7 @@ def test_datetime():
csv_obj = create_csv_object_for_datetime(10000,1)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
@ -1206,7 +1246,7 @@ def test_true_false_datetime():
csv_obj = create_csv_object_for_datetime(10000,1)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
@ -1239,7 +1279,7 @@ def test_csv_parser():
csv_obj = r',first,,,second,third="c31,c32,c33",forth="1,2,3,4",fifth=my_string=\"any_value\" \, my_other_string=\"aaaa\,bbb\" ,' + "\n"
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
@ -1280,7 +1320,7 @@ def test_csv_definition():
csv_obj = create_random_csv_object(number_of_rows,10,"|","\t")
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
@ -1310,7 +1350,7 @@ def test_schema_definition():
csv_obj = create_random_csv_object(number_of_rows,10,csv_schema="c1,c2,c3,c4,c5,c6,c7,c8,c9,c10")
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
@ -1341,7 +1381,8 @@ def test_when_then_else_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case when cast(_1 as int)>100 and cast(_1 as int)<200 then "(100-200)" when cast(_1 as int)>200 and cast(_1 as int)<300 then "(200-300)" else "NONE" end from s3object;') ).replace("\n","")
@ -1370,7 +1411,8 @@ def test_coalesce_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where char_length(_3)>2 and char_length(_4)>2 and cast(substring(_3,1,2) as int) = cast(substring(_4,1,2) as int);') ).replace("\n","")
@ -1392,7 +1434,8 @@ def test_cast_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(_3 as int)>999;') ).replace("\n","")
@ -1417,7 +1460,7 @@ def test_version():
csv_obj = create_random_csv_object(number_of_rows,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
@ -1431,7 +1474,8 @@ def test_trim_expressions():
csv_obj = create_random_csv_object_trim(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou";')).replace("\n","")
@ -1470,7 +1514,8 @@ def test_truefalse_trim_expressions():
csv_obj = create_random_csv_object_trim(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_trim = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where trim(_1) = "aeiou" = true;')).replace("\n","")
@ -1509,7 +1554,8 @@ def test_escape_expressions():
csv_obj = create_random_csv_object_escape(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_escape = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where _1 like "%_ar" escape "%";')).replace("\n","")
@ -1530,7 +1576,8 @@ def test_case_value_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_case = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select case cast(_1 as int) when cast(_2 as int) then "case_1_1" else "case_2_2" end from s3object;')).replace("\n","")
@ -1545,7 +1592,8 @@ def test_bool_cast_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_cast = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from s3object where cast(int(_1) as bool) = true ;')).replace("\n","")
@ -1560,7 +1608,8 @@ def test_progress_expressions():
csv_obj = create_random_csv_object(1000000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
obj_size = len(csv_obj.encode('utf-8'))
@ -1593,7 +1642,8 @@ def test_output_serial_expressions():
csv_obj = create_random_csv_object(10000,10)
csv_obj_name = get_random_string()
bucket_name = "test"
bucket_name = get_new_bucket_name()
upload_object(bucket_name,csv_obj_name,csv_obj)
res_s3select_1 = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,"select _1, _2 from s3object where nullif(_1,_2) is null ;", "ALWAYS") ).replace("\n",",").replace(",","")