adding aggregation tests

This commit is contained in:
gal salomon 2020-03-29 17:23:05 +03:00
parent 74daf86fe5
commit f42872fd53

View file

@ -146,20 +146,21 @@ def remove_xml_tags_from_result(obj):
result += rec + "\n" # remove by split result += rec + "\n" # remove by split
return result return result
def sum_column_x(column_pos,obj): def create_list_of_int(column_pos,obj):
res = 0 res = 0
list_of_int = []
for rec in obj.split("\n"): for rec in obj.split("\n"):
col_num = 1 col_num = 1
if ( len(rec) == 0): if ( len(rec) == 0):
continue; continue;
for col in rec.split(","): for col in rec.split(","):
if (col_num == column_pos): if (col_num == column_pos):
res += int(col); list_of_int.append(int(col));
col_num+=1; col_num+=1;
return res return list_of_int
def test_count_operation(): def test_count_operation():
csv_obj_name = "csv_star_oper" csv_obj_name = "csv_star_oper"
bucket_name = "test" bucket_name = "test"
@ -170,22 +171,59 @@ def test_count_operation():
assert num_of_rows == int( res ) assert num_of_rows == int( res )
def test_column_sum(): def test_column_sum_min_max():
csv_obj = create_random_csv_object(10,10) csv_obj = create_random_csv_object(10000,10)
csv_obj_name = "csv_10x10" csv_obj_name = "csv_10000x10"
bucket_name = "test" bucket_name = "test"
upload_csv_object(bucket_name,csv_obj_name,csv_obj) upload_csv_object(bucket_name,csv_obj_name,csv_obj)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select sum(int(_1)) from stdin;") ).replace(",","") res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_1)) from stdin;") ).replace(",","")
res_target = sum_column_x( 1 , csv_obj ) list_int = create_list_of_int( 1 , csv_obj )
res_target = min( list_int )
assert int(res_s3select) == int(res_target) assert int(res_s3select) == int(res_target)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select min(int(_4)) from stdin;") ).replace(",","")
list_int = create_list_of_int( 4 , csv_obj )
res_target = min( list_int )
assert int(res_s3select) == int(res_target)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select max(int(_4)) from stdin;") ).replace(",","")
list_int = create_list_of_int( 4 , csv_obj )
res_target = max( list_int )
assert int(res_s3select) == int(res_target)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select max(int(_7)) from stdin;") ).replace(",","")
list_int = create_list_of_int( 7 , csv_obj )
res_target = max( list_int )
assert int(res_s3select) == int(res_target)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select sum(int(_4)) from stdin;") ).replace(",","")
list_int = create_list_of_int( 4 , csv_obj )
res_target = sum( list_int )
assert int(res_s3select) == int(res_target)
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select sum(int(_7)) from stdin;") ).replace(",","")
list_int = create_list_of_int( 7 , csv_obj )
res_target = sum( list_int )
assert int(res_s3select) == int(res_target)
def test_alias(): def test_alias():
# purpose: test is comparing result of exact queries , one with alias the other without.
# this test is settign alias on 3 projections, the third projection is using other projection alias, also the where clause is using aliases
# the test validate that where-cluase and projections are executing aliases correctlly, bare in mind that each alias has its own cache,
# and that cache need to invalidate time.
csv_obj = create_random_csv_object(10000,10) csv_obj = create_random_csv_object(10000,10)
csv_obj_name = "csv_10x10" csv_obj_name = "csv_10000x10"
bucket_name = "test" bucket_name = "test"
upload_csv_object(bucket_name,csv_obj_name,csv_obj) upload_csv_object(bucket_name,csv_obj_name,csv_obj)
@ -196,4 +234,3 @@ def test_alias():
assert res_s3select_alias == res_s3select_no_alias assert res_s3select_alias == res_s3select_no_alias