forked from TrueCloudLab/s3-tests
Merge pull request #472 from galsalomon66/fixes_related_to_new_csv_parser_2
new CSV parser has some impact on engine results
This commit is contained in:
commit
e4953a3b76
1 changed files with 161 additions and 19 deletions
|
@ -15,6 +15,9 @@ from . import (
|
||||||
import logging
|
import logging
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
import collections
|
||||||
|
collections.Callable = collections.abc.Callable
|
||||||
|
|
||||||
region_name = ''
|
region_name = ''
|
||||||
|
|
||||||
# recurssion function for generating arithmetical expression
|
# recurssion function for generating arithmetical expression
|
||||||
|
@ -68,10 +71,10 @@ def generate_s3select_expression_projection(bucket_name,obj_name):
|
||||||
res = remove_xml_tags_from_result( run_s3select(bucket_name,obj_name,"select " + e + " from s3object;",) ).replace(",","")
|
res = remove_xml_tags_from_result( run_s3select(bucket_name,obj_name,"select " + e + " from s3object;",) ).replace(",","")
|
||||||
|
|
||||||
# accuracy level
|
# accuracy level
|
||||||
epsilon = float(0.000001)
|
epsilon = float(0.00001)
|
||||||
|
|
||||||
# both results should be close (epsilon)
|
# both results should be close (epsilon)
|
||||||
assert (1 - (float(res.split("\n")[1]) / eval( e )) ) < epsilon
|
assert( abs(float(res.split("\n")[1]) - eval(e)) < epsilon )
|
||||||
|
|
||||||
@attr('s3select')
|
@attr('s3select')
|
||||||
def get_random_string():
|
def get_random_string():
|
||||||
|
@ -749,6 +752,136 @@ def test_truefalselike_expressions():
|
||||||
|
|
||||||
s3select_assert_result( res_s3select_like, res_s3select )
|
s3select_assert_result( res_s3select_like, res_s3select )
|
||||||
|
|
||||||
|
@attr('s3select')
|
||||||
|
def test_nullif_expressions():
|
||||||
|
|
||||||
|
csv_obj = create_random_csv_object(10000,10)
|
||||||
|
|
||||||
|
csv_obj_name = get_random_string()
|
||||||
|
bucket_name = "test"
|
||||||
|
upload_csv_object(bucket_name,csv_obj_name,csv_obj)
|
||||||
|
|
||||||
|
res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where nullif(_1,_2) is null ;") ).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where _1 = _2 ;") ).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_nullif, res_s3select)
|
||||||
|
|
||||||
|
res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where not nullif(_1,_2) is null ;") ).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where _1 != _2 ;") ).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_nullif, res_s3select)
|
||||||
|
|
||||||
|
res_s3select_nullif = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where nullif(_1,_2) = _1 ;") ).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select count(0) from stdin where _1 != _2 ;") ).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_nullif, res_s3select)
|
||||||
|
|
||||||
|
@attr('s3select')
|
||||||
|
def test_lowerupper_expressions():
|
||||||
|
|
||||||
|
csv_obj = create_random_csv_object(1,10)
|
||||||
|
|
||||||
|
csv_obj_name = get_random_string()
|
||||||
|
bucket_name = "test"
|
||||||
|
upload_csv_object(bucket_name,csv_obj_name,csv_obj)
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select lower("AB12cd$$") from stdin ;') ).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select, "ab12cd$$")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select upper("ab12CD$$") from stdin ;') ).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select, "AB12CD$$")
|
||||||
|
|
||||||
|
@attr('s3select')
|
||||||
|
def test_in_expressions():
|
||||||
|
|
||||||
|
# purpose of test: engine is process correctly several projections containing aggregation-functions
|
||||||
|
csv_obj = create_random_csv_object(10000,10)
|
||||||
|
|
||||||
|
csv_obj_name = get_random_string()
|
||||||
|
bucket_name = "test"
|
||||||
|
upload_csv_object(bucket_name,csv_obj_name,csv_obj)
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) in(1);')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) = 1;')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) in(1,0);')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where int(_1) = 1 or int(_1) = 0;')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_2) from stdin where int(_2) in(1,0,2);')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_2) from stdin where int(_2) = 1 or int(_2) = 0 or int(_2) = 2;')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_2) from stdin where int(_2)*2 in(int(_3)*2,int(_4)*3,int(_5)*5);')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_2) from stdin where int(_2)*2 = int(_3)*2 or int(_2)*2 = int(_4)*3 or int(_2)*2 = int(_5)*5;')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where character_length(_1) = 2 and substring(_1,2,1) in ("3");')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from stdin where _1 like "_3";')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
@attr('s3select')
|
||||||
|
def test_like_expressions():
|
||||||
|
|
||||||
|
csv_obj = create_random_csv_object_string(10000,10)
|
||||||
|
|
||||||
|
csv_obj_name = get_random_string()
|
||||||
|
bucket_name = "test"
|
||||||
|
upload_csv_object(bucket_name,csv_obj_name,csv_obj)
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _1 like "%aeio%";')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select count(*) from stdin where substring(_1,11,4) = "aeio" ;')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _1 like "cbcd%";')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select count(*) from stdin where substring(_1,1,4) = "cbcd";')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _3 like "%y[y-z]";')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select count(*) from stdin where substring(_3,character_length(_3),1) between "y" and "z" and substring(_3,character_length(_3)-1,1) = "y";')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _2 like "%yz";')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select count(*) from stdin where substring(_2,character_length(_2),1) = "z" and substring(_2,character_length(_2)-1,1) = "y";')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _3 like "c%z";')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select count(*) from stdin where substring(_3,character_length(_3),1) = "z" and substring(_3,1,1) = "c";')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select count(*) from stdin where _2 like "%xy_";')).replace("\n","")
|
||||||
|
|
||||||
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name, 'select count(*) from stdin where substring(_2,character_length(_2)-1,1) = "y" and substring(_2,character_length(_2)-2,1) = "x";')).replace("\n","")
|
||||||
|
|
||||||
|
nose.tools.assert_equal( res_s3select_in, res_s3select )
|
||||||
|
|
||||||
|
|
||||||
@attr('s3select')
|
@attr('s3select')
|
||||||
def test_complex_expressions():
|
def test_complex_expressions():
|
||||||
|
|
||||||
|
@ -891,7 +1024,7 @@ def test_csv_parser():
|
||||||
# purpuse: test default csv values(, \n " \ ), return value may contain meta-char
|
# purpuse: test default csv values(, \n " \ ), return value may contain meta-char
|
||||||
# NOTE: should note that default meta-char for s3select are also for python, thus for one example double \ is mandatory
|
# NOTE: should note that default meta-char for s3select are also for python, thus for one example double \ is mandatory
|
||||||
|
|
||||||
csv_obj = ',first,,,second,third="c31,c32,c33",forth="1,2,3,4",fifth="my_string=\\"any_value\\" , my_other_string=\\"aaaa,bbb\\" ",' + "\n"
|
csv_obj = r',first,,,second,third="c31,c32,c33",forth="1,2,3,4",fifth=my_string=\"any_value\" \, my_other_string=\"aaaa\,bbb\" ,' + "\n"
|
||||||
csv_obj_name = get_random_string()
|
csv_obj_name = get_random_string()
|
||||||
bucket_name = "test"
|
bucket_name = "test"
|
||||||
|
|
||||||
|
@ -899,15 +1032,15 @@ def test_csv_parser():
|
||||||
|
|
||||||
# return value contain comma{,}
|
# return value contain comma{,}
|
||||||
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _6 from s3object;") ).replace("\n","")
|
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _6 from s3object;") ).replace("\n","")
|
||||||
s3select_assert_result( res_s3select_alias, 'third="c31,c32,c33"')
|
s3select_assert_result( res_s3select_alias, 'third=c31,c32,c33')
|
||||||
|
|
||||||
# return value contain comma{,}
|
# return value contain comma{,}
|
||||||
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _7 from s3object;") ).replace("\n","")
|
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _7 from s3object;") ).replace("\n","")
|
||||||
s3select_assert_result( res_s3select_alias, 'forth="1,2,3,4"')
|
s3select_assert_result( res_s3select_alias, 'forth=1,2,3,4')
|
||||||
|
|
||||||
# return value contain comma{,}{"}, escape-rule{\} by-pass quote{"} , the escape{\} is removed.
|
# return value contain comma{,}{"}, escape-rule{\} by-pass quote{"} , the escape{\} is removed.
|
||||||
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _8 from s3object;") ).replace("\n","")
|
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _8 from s3object;") ).replace("\n","")
|
||||||
s3select_assert_result( res_s3select_alias, 'fifth="my_string="any_value" , my_other_string="aaaa,bbb" "')
|
s3select_assert_result( res_s3select_alias, 'fifth=my_string="any_value" , my_other_string="aaaa,bbb" ')
|
||||||
|
|
||||||
# return NULL as first token
|
# return NULL as first token
|
||||||
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _1 from s3object;") ).replace("\n","")
|
res_s3select_alias = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _1 from s3object;") ).replace("\n","")
|
||||||
|
@ -1246,31 +1379,40 @@ def test_output_serial_expressions():
|
||||||
bucket_name = "test"
|
bucket_name = "test"
|
||||||
upload_csv_object(bucket_name,csv_obj_name,csv_obj)
|
upload_csv_object(bucket_name,csv_obj_name,csv_obj)
|
||||||
|
|
||||||
res_s3select_1 = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,"select _1, _2 from s3object where nullif(_1,_2) is null ;", "ALWAYS") ).replace("\n",",")
|
res_s3select_1 = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,"select _1, _2 from s3object where nullif(_1,_2) is null ;", "ALWAYS") ).replace("\n",",").replace(",","")
|
||||||
|
|
||||||
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _1, _2 from s3object where _1 = _2 ;") ).replace("\n",",")
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,"select _1, _2 from s3object where _1 = _2 ;") ).replace("\n",",")
|
||||||
|
|
||||||
res_s3select_list = res_s3select.split(',')
|
res_s3select_list = res_s3select.split(',')
|
||||||
res_s3select_final = (','.join('"' + item + '"' for item in res_s3select_list)).replace('""','') # remove empty result(first,last)
|
|
||||||
|
|
||||||
s3select_assert_result( res_s3select_1, res_s3select_final)
|
res_s3select_list.pop()
|
||||||
|
|
||||||
res_s3select_in = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(int(_2)));', "ASNEEDED", '$', '#')).replace("\n","")
|
res_s3select_final = (''.join('"' + item + '"' for item in res_s3select_list))
|
||||||
|
|
||||||
|
s3select_assert_result( '""'+res_s3select_1+'""', res_s3select_final)
|
||||||
|
|
||||||
|
|
||||||
|
res_s3select_in = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(int(_2)));', "ASNEEDED", '$', '#')).replace("\n","#") ## TODO why \n appears in output?
|
||||||
|
|
||||||
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) = int(_2);')).replace("\n","#")
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) = int(_2);')).replace("\n","#")
|
||||||
res_s3select = res_s3select[1:len(res_s3select)] # remove first redundant
|
|
||||||
res_s3select_final = res_s3select[0:len(res_s3select)-1] # remove last redundant
|
|
||||||
|
|
||||||
s3select_assert_result( res_s3select_in, res_s3select_final )
|
res_s3select_list = res_s3select.split('#')
|
||||||
|
|
||||||
|
res_s3select_list.pop()
|
||||||
|
|
||||||
|
res_s3select_final = (''.join(item + '#' for item in res_s3select_list))
|
||||||
|
|
||||||
|
|
||||||
|
s3select_assert_result(res_s3select_in , res_s3select_final )
|
||||||
|
|
||||||
|
|
||||||
res_s3select_quot = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(int(_2)));', "ALWAYS", '$', '#')).replace("\n","")
|
res_s3select_quot = remove_xml_tags_from_result( run_s3select_output(bucket_name,csv_obj_name,'select int(_1) from s3object where (int(_1) in(int(_2)));', "ALWAYS", '$', '#')).replace("\n","")
|
||||||
|
|
||||||
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) = int(_2);')).replace("\n","#")
|
res_s3select = remove_xml_tags_from_result( run_s3select(bucket_name,csv_obj_name,'select int(_1) from s3object where int(_1) = int(_2);')).replace("\n","#")
|
||||||
res_s3select = res_s3select[1:len(res_s3select)] # remove first redundant
|
|
||||||
res_s3select = res_s3select[0:len(res_s3select)-1] # remove last redundant
|
|
||||||
|
|
||||||
res_s3select_list = res_s3select.split('#')
|
res_s3select_list = res_s3select.split('#')
|
||||||
res_s3select_final = ('#'.join('"' + item + '"' for item in res_s3select_list)).replace('""','')
|
|
||||||
|
|
||||||
s3select_assert_result( res_s3select_quot, res_s3select_final )
|
res_s3select_list.pop()
|
||||||
|
|
||||||
|
res_s3select_final = (''.join('"' + item + '"' + '#' for item in res_s3select_list))
|
||||||
|
|
||||||
|
s3select_assert_result( '""#'+res_s3select_quot+'""#', res_s3select_final )
|
||||||
|
|
Loading…
Reference in a new issue