diff --git a/clickhouse/types/type_parser.cpp b/clickhouse/types/type_parser.cpp index 82492412..c1df828c 100644 --- a/clickhouse/types/type_parser.cpp +++ b/clickhouse/types/type_parser.cpp @@ -173,6 +173,7 @@ bool TypeParser::Parse(TypeAst* type) { type_->code = Type::String; break; } + case Token::QuotedIdentifier: case Token::Name: if (!type_->name.empty()) { // A second Name token on the same element means the @@ -260,6 +261,35 @@ TypeParser::Token TypeParser::NextToken() { } return Token{Token::QuotedString, StringView(cur_++, 1)}; } + case '"': + case '`': + { + const auto quote = *cur_; + ++cur_; + // Two escape forms are recognised, both quote-specific (e.g. + // inside a backtick-quoted identifier only backtick escapes + // apply; a doubled double-quote is treated as two literals): + // \q – backslash followed by the opening quote character + // qq – two consecutive opening quote characters + scratch_.clear(); + for (; cur_ < end_; ++cur_) { + if (*cur_ == '\\' && cur_ + 1 < end_ && *(cur_ + 1) == quote) { + scratch_ += quote; + ++cur_; + } else if (*cur_ == quote) { + if (cur_ + 1 < end_ && *(cur_ + 1) == quote) { + scratch_ += quote; + ++cur_; + } else { + ++cur_; + return Token{Token::QuotedIdentifier, StringView{scratch_}}; + } + } else { + scratch_ += *cur_; + } + } + return Token{Token::Invalid, StringView()}; + } default: { const char* st = cur_; diff --git a/clickhouse/types/type_parser.h b/clickhouse/types/type_parser.h index 9cc29512..fb58ec46 100644 --- a/clickhouse/types/type_parser.h +++ b/clickhouse/types/type_parser.h @@ -62,6 +62,7 @@ class TypeParser { RPar, Comma, QuotedString, // string with quotation marks included + QuotedIdentifier, EOS, }; @@ -84,6 +85,11 @@ class TypeParser { TypeAst* type_; std::stack open_elements_; + // Backing storage for unescaped QuotedIdentifier token values. When a + // quoted identifier contains escape sequences the unescaped content is + // written here and the returned StringView points into this string. + // Valid only until the next NextToken() call. + std::string scratch_; }; diff --git a/clickhouse/types/types.cpp b/clickhouse/types/types.cpp index e12342c8..ec38b5be 100644 --- a/clickhouse/types/types.cpp +++ b/clickhouse/types/types.cpp @@ -473,24 +473,46 @@ LowCardinalityType::LowCardinalityType(TypeRef nested_type) : Type(LowCardinalit LowCardinalityType::~LowCardinalityType() { } +// Checks if `name` is a valid plain identifier (must not be quoted). +// The condition for this is a match against `^[a-zA-Z_][0-9a-zA-Z_]*$` +static bool IsPlainIdentifier(const std::string& name) { + if (name.empty()) return false; + auto is_alpha_or_under = [](char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; }; + auto is_alnum_or_under = [&is_alpha_or_under](char c) { return is_alpha_or_under(c) || (c >= '0' && c <= '9'); }; + if (!is_alpha_or_under(name[0])) return false; + for (size_t i = 1; i < name.size(); ++i) + if (!is_alnum_or_under(name[i])) return false; + return true; +} + +// Appends a fieldname, potentially quoting it and escaping backticks. +static void AppendFieldname(const std::string& name, std::string& out) { + if (IsPlainIdentifier(name)) { + out += name; + return; + } + out += '`'; + for (char c : name) { + if (c == '`') + out += "``"; + else + out += c; + } + out += '`'; +} + std::string TupleType::GetName() const { std::string result("Tuple("); bool has_complete_names = !item_names_.empty(); - if (!item_types_.empty()) { - if (has_complete_names) { - result += item_names_[0] + " " + item_types_[0]->GetName(); - } else { - result += item_types_[0]->GetName(); - } - } - - for (size_t i = 1; i < item_types_.size(); ++i) { + for (size_t i = 0; i < item_types_.size(); ++i) { + if (i > 0) + result += ", "; if (has_complete_names) { - result += ", " + item_names_[i] + " " + item_types_[i]->GetName(); - } else { - result += ", " + item_types_[i]->GetName(); + AppendFieldname(item_names_[i], result); + result += ' '; } + result += item_types_[i]->GetName(); } result += ")"; diff --git a/ut/abnormal_column_names_test.cpp b/ut/abnormal_column_names_test.cpp index 11868f73..c26cd782 100644 --- a/ut/abnormal_column_names_test.cpp +++ b/ut/abnormal_column_names_test.cpp @@ -74,8 +74,20 @@ INSTANTIATE_TEST_SUITE_P(ClientColumnNames, AbnormalColumnNamesClientTest, .SetSendRetries(1) .SetPingBeforeQuery(true) .SetCompressionMethod(CompressionMethod::None), - {"select 123,231,113", "select 'ABC','AAA','BBB','CCC'"}, - {"123,231,113", "'ABC','AAA','BBB','CCC'"}, + /* queries = */ { + "select 123,231,113", + "select 'ABC','AAA','BBB','CCC'", + "select 'A.B','C.D'", + "select 'A`B','C``D'", + "select 'A\\`B','C\\`\\`D'" + }, + /* expected column names = */ { + "123,231,113", + "'ABC','AAA','BBB','CCC'", + "'A.B','C.D'", + "'A`B','C``D'", + "'A`B','C``D'" + }, } )); diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 3e931132..13a1731c 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -298,6 +298,15 @@ TEST(ColumnsCase, TupleSlice){ ASSERT_EQ((*tuple2)[1]->As()->At(0), "3"); } +TEST(ColumnsCase, TupleWithQuotedFieldNames) { + auto col = CreateColumnByType("Tuple(`a.b` Int8, `c.d` String)"); + ASSERT_NE(col, nullptr); + const auto& names = col->AsStrict()->Type()->As()->GetItemNames(); + ASSERT_EQ(names.size(), 2u); + EXPECT_EQ(names[0], "a.b"); + EXPECT_EQ(names[1], "c.d"); +} + TEST(ColumnsCase, TimeAppend) { auto col = std::make_shared(); col->Append(1); diff --git a/ut/roundtrip_tests.cpp b/ut/roundtrip_tests.cpp index 326420d3..9ff4edf3 100644 --- a/ut/roundtrip_tests.cpp +++ b/ut/roundtrip_tests.cpp @@ -255,6 +255,65 @@ TEST_P(RoundtripCase, TupleTNullableString) { EXPECT_TRUE(CompareRecursive(*col, *result_typed)); } +TEST_P(RoundtripCase, TupleWithQuotedFieldNames) { + auto col_a = std::make_shared(std::vector{1}); + auto col_b = std::make_shared(std::vector{2}); + auto col_c = std::make_shared(std::vector{3}); + auto col = std::make_shared( + std::vector({col_a, col_b, col_c}), + std::vector{"a.a", "b`b", "c``c"} + ); + + auto result = RoundtripColumnValues(*client_, col)->AsStrict(); + EXPECT_TRUE(CompareRecursive(*col->At(0), *result->At(0))); + EXPECT_TRUE(CompareRecursive(*col->At(1), *result->At(1))); + EXPECT_TRUE(CompareRecursive(*col->At(2), *result->At(2))); + + const auto& names = result->Type()->As()->GetItemNames(); + ASSERT_EQ(names.size(), 3u); + EXPECT_EQ(names[0], "a.a"); + EXPECT_EQ(names[1], "b`b"); + EXPECT_EQ(names[2], "c``c"); +} + +TEST_P(RoundtripCase, SelectTupleByFieldNames) { + auto col_a = std::make_shared(std::vector{1}); + auto col_b = std::make_shared(std::vector{2}); + auto col_c = std::make_shared(std::vector{3}); + auto col = std::make_shared( + std::vector({col_a, col_b, col_c}), + std::vector{"a.a", "b`b", "c``c"} + ); + + // skip result, we will do it manually with a separate SELECT statement + RoundtripColumnValues(*client_, col)->AsStrict(); + + // NOTE: Each backtick must be escaped with either "\\" (double "\\" so the compiler + // turns it into "\") or a double backtick, "``". When we create or receive the columns, + // this escaping is done automatically by the type parser, but when we write queries + // ourselves, the escaping has to be done manually. + client_->BeginSelect( + "SELECT " + " col.`a.a`, " + " col.`b``b`, col.`b\\`b`, " + " col.`c````c`, col.`c\\`\\`c` " + "FROM temporary_roundtrip_table " + "ORDER BY id"); + + Block last_block; + while (auto tmp = client_->NextBlock()) { + if (tmp->GetRowCount() > 0) { + last_block = *tmp; + } + } + + EXPECT_TRUE(CompareRecursive(*col->At(0), *last_block.At(0))); + EXPECT_TRUE(CompareRecursive(*col->At(1), *last_block.At(1))); + EXPECT_TRUE(CompareRecursive(*col->At(1), *last_block.At(2))); + EXPECT_TRUE(CompareRecursive(*col->At(2), *last_block.At(3))); + EXPECT_TRUE(CompareRecursive(*col->At(2), *last_block.At(4))); +} + TEST_P(RoundtripCase, Map_TString_TNullableString) { using Key = ColumnString; using Value = ColumnNullableT; diff --git a/ut/type_parser_ut.cpp b/ut/type_parser_ut.cpp index f593de2c..561b1c9d 100644 --- a/ut/type_parser_ut.cpp +++ b/ut/type_parser_ut.cpp @@ -133,6 +133,77 @@ TEST(TypeParserCase, ParseNamedTuple) { ASSERT_EQ(ast.elements[1].code, Type::String); } +TEST(TypeParserCase, ParseNamedTuple_BacktickQuotedFieldNames) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a.b` Int8, `c.d` String)").Parse(&ast)); + ASSERT_EQ(ast.meta, TypeAst::Tuple); + ASSERT_EQ(ast.elements.size(), 2u); + + ASSERT_EQ(ast.elements[0].element_name, "a.b"); + ASSERT_EQ(ast.elements[0].name, "Int8"); + ASSERT_EQ(ast.elements[0].code, Type::Int8); + + ASSERT_EQ(ast.elements[1].element_name, "c.d"); + ASSERT_EQ(ast.elements[1].name, "String"); + ASSERT_EQ(ast.elements[1].code, Type::String); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubleQuotedFieldNames) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(\"a.b\" Int8, \"c.d\" String)").Parse(&ast)); + ASSERT_EQ(ast.meta, TypeAst::Tuple); + ASSERT_EQ(ast.elements.size(), 2u); + + ASSERT_EQ(ast.elements[0].element_name, "a.b"); + ASSERT_EQ(ast.elements[0].name, "Int8"); + ASSERT_EQ(ast.elements[0].code, Type::Int8); + + ASSERT_EQ(ast.elements[1].element_name, "c.d"); + ASSERT_EQ(ast.elements[1].name, "String"); + ASSERT_EQ(ast.elements[1].code, Type::String); +} + +TEST(TypeParserCase, ParseNamedTuple_UnterminatedQuote) { + TypeAst ast; + EXPECT_FALSE(TypeParser("Tuple(`a.b Int8)").Parse(&ast)); + EXPECT_FALSE(TypeParser("Tuple(a.b` Int8)").Parse(&ast)); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubledBacktickEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a``b` UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a`b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_BackslashBacktickEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a\\`b` UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a`b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubleQuoteNotEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(`a\"\"b` UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a\"\"b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_DoubledDoubleQuoteEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(\"a\"\"b\" UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a\"b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + +TEST(TypeParserCase, ParseNamedTuple_BacktickNotEscape) { + TypeAst ast; + ASSERT_TRUE(TypeParser("Tuple(\"a``b\" UInt8)").Parse(&ast)); + ASSERT_EQ(ast.elements[0].element_name, "a``b"); + ASSERT_EQ(ast.elements[0].code, Type::UInt8); +} + TEST(TypeParserCase, ParseDecimal) { TypeAst ast; TypeParser("Decimal(12, 5)").Parse(&ast);