-
Notifications
You must be signed in to change notification settings - Fork 0
Adds initial scaffolding for the split interface #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e6632f4
b25dc17
1d96659
6a286de
557e286
1475000
ed47b98
0bfcecf
e96c545
bb60de3
4223276
236502b
0d0cdee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #include <beman/str_split/config.hpp> | ||
| #include <beman/str_split/str_split.hpp> | ||
|
|
||
| #include <iostream> | ||
| #include <string_view> | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can see other repos, but ultimately we add an #ifdef here for the modules version that does import beman.split_str and import std. conditionally based on the macro |
||
| using namespace std::literals::string_view_literals; | ||
|
|
||
| using ::beman::str_split::split_by_ascii_whitespace; | ||
| using ::beman::str_split::str_split_to; | ||
|
|
||
| int main() { | ||
| constexpr std::string_view text = "The quick brown fox jumps over the lazy dog"; | ||
|
|
||
| const std::vector<std::string_view> parts = str_split_to(text, split_by_ascii_whitespace()); | ||
| for (std::string_view part : parts) { | ||
| std::cout << part << std::endl; | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #ifndef BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP | ||
| #define BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP | ||
|
|
||
| #include <beman/str_split/config.hpp> | ||
|
|
||
| #if BEMAN_STR_SPLIT_USE_MODULES() && !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) | ||
|
|
||
| import beman.str_split; | ||
|
|
||
| #else | ||
|
|
||
| #if !BEMAN_STR_SPLIT_USE_MODULES() | ||
|
|
||
| #include <string> | ||
| #include <string_view> | ||
| #include <vector> | ||
|
|
||
| #endif // !BEMAN_STR_SPLIT_USE_MODULES() | ||
|
|
||
| namespace beman::str_split { | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Concepts: | ||
| //------------------------------------------------------------------------------ | ||
|
|
||
| // TODO(aryann): Should the concepts be placed in a private namespace? | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These would be what we generally call |
||
|
|
||
| // A range of chars. | ||
| template <typename T> | ||
| concept char_range = std::ranges::input_range<T> && std::same_as<std::ranges::range_value_t<T>, char>; | ||
|
|
||
| // A type that cannot be converted to `std::string_view`. | ||
| template <typename T> | ||
| concept not_string_view_convertible = !std::convertible_to<T&&, std::string_view>; | ||
|
|
||
| template <typename T, typename Self> | ||
| concept different_from = !std::same_as<std::remove_cvref_t<T>, Self>; | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Patterns: | ||
| //------------------------------------------------------------------------------ | ||
|
|
||
| // Splits by a substring. | ||
| struct split_by { | ||
| public: | ||
| // Constructor for anything that can be converted to a `std::string_view`. | ||
| constexpr explicit split_by(std::string_view delimiter) : delimiter_(delimiter) {} | ||
|
|
||
| // Constructor for range of characters that are not `std::string_view` convertible. | ||
| template <std::ranges::input_range Range> | ||
| requires( | ||
| // Ensures the range's value type is `char`. Notably, this rejects ranges of other values types such as | ||
| // `int` and `unsigned char`. This requirement prevents narrowing conversions. | ||
| char_range<Range> && | ||
|
|
||
| // Ensures this constructor does not compete with the `std::string_view` overload. | ||
| not_string_view_convertible<Range> && | ||
|
|
||
| // Ensures this constructor does not hijack copy and move construction which would fail to compile with a | ||
| // difficult-to-read wall of errors. | ||
| different_from<Range, split_by>) | ||
| constexpr explicit split_by(Range&& range) : delimiter_(std::ranges::begin(range), std::ranges::end(range)) {} | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we could constrain this with: |
||
|
|
||
| // TODO(aryann): Here and below, implement a find member function that accepts the current "haystack" string and | ||
| // returns the position of the first match. We may also need to control the visibility of such function. | ||
|
|
||
| private: | ||
| const std::string delimiter_; | ||
| }; | ||
|
|
||
| // Splits by the first matching character in a given character sequence. | ||
| struct split_by_first_of { | ||
| private: | ||
| const std::string chars_; | ||
| }; | ||
|
|
||
| // Splits by character. | ||
| struct split_by_char { | ||
| public: | ||
| constexpr explicit split_by_char(char delimiter) : delimiter_(delimiter) {} | ||
|
|
||
| private: | ||
| const char delimiter_; | ||
| }; | ||
|
|
||
| struct split_by_ascii_whitespace {}; | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Split functions: | ||
| //------------------------------------------------------------------------------ | ||
|
|
||
| // TODO(aryann): Consider an alternative approach where the split type is determined by the function name: | ||
| // | ||
| // * split(std::string_view): Equivalent to | ||
| // str_split(std::string_view, split_by_ascii_whitespace). | ||
| // | ||
| // * split(std::string_view, Range&&): Equivalent to | ||
| // str_split(std::string_view, split_by). | ||
| // | ||
| // * split_by_first_of(std::string_view, Range&&): Equivalent to | ||
| // str_split(std::string_view, split_by_first_of). | ||
| // | ||
|
|
||
| template <class OutputIt, class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter, OutputIt dest) -> OutputIt { | ||
| return dest; | ||
| } | ||
|
|
||
| template <class Container, class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter) -> Container { | ||
| Container container; | ||
| return str_split_to(text, delimiter, container); | ||
| } | ||
|
|
||
| template <template <class...> class Container, class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter) | ||
| -> Container<std::basic_string_view<CharT, Traits> > { | ||
| return str_split_to(text, delimiter); | ||
| } | ||
|
|
||
| template <class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter) | ||
| -> std::vector<std::basic_string_view<CharT, Traits> > { | ||
| std::vector<std::basic_string_view<CharT, Traits> > result; | ||
| return result; | ||
| } | ||
|
|
||
| // TODO(aryann): Add support for max splits. | ||
|
|
||
| } // namespace beman::str_split | ||
|
|
||
| #endif // BEMAN_STR_SPLIT_USE_MODULES() && | ||
| // !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) | ||
|
|
||
| #endif // BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP | ||
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #include <array> | ||
| #include <deque> | ||
| #include <string_view> | ||
| #include <type_traits> | ||
| #include <vector> | ||
|
|
||
| #include <beman/str_split/config.hpp> | ||
| #include <beman/str_split/str_split_to.hpp> | ||
| #include <gmock/gmock.h> | ||
| #include <gtest/gtest.h> | ||
|
|
||
| namespace { | ||
|
|
||
| using namespace std::literals::string_view_literals; | ||
|
|
||
| using ::beman::str_split::split_by; | ||
| using ::beman::str_split::split_by_ascii_whitespace; | ||
| using ::beman::str_split::split_by_char; | ||
| using ::beman::str_split::str_split_to; | ||
| using ::testing::ElementsAre; | ||
|
|
||
| TEST(Delimiter, SplitBy) { | ||
| // `std:string-view`-convertible inputs: | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(" ")), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by("string")), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::string("string"))), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::string_view("string"))), ElementsAre()); | ||
|
|
||
| // Ranges: | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::vector<char>{'a', 'b', 'c'})), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::array{'a', 'b', 'c'})), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(Delimiter, SplitByChar) { | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by_char(' ')), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by_char('s')), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(Delimiter, SplitByAsciiWhitespace) { | ||
| EXPECT_THAT(str_split_to(""sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, StringViewVector) { | ||
| static_assert(std::is_same_v<decltype(str_split_to("my string"sv, split_by_ascii_whitespace())), | ||
| std::vector<std::string_view>>); | ||
|
|
||
| EXPECT_THAT(str_split_to("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, StringViewContainer) { | ||
| static_assert(std::is_same_v<decltype(str_split_to<std::vector<std::string_view>>("my string"sv, | ||
| split_by_ascii_whitespace())), | ||
| std::vector<std::string_view>>); | ||
| static_assert(std::is_same_v<decltype(str_split_to<std::deque<std::string_view>>("my string"sv, | ||
| split_by_ascii_whitespace())), | ||
| std::deque<std::string_view>>); | ||
|
|
||
| EXPECT_THAT(str_split_to<std::vector<std::string_view>>("my string"sv, split_by_ascii_whitespace()), | ||
| ElementsAre()); | ||
| EXPECT_THAT(str_split_to<std::deque<std::string_view>>("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, StringContainer) { | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to<std::vector<std::string>>("my string"sv, split_by_ascii_whitespace())), | ||
| std::vector<std::string>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to<std::deque<std::string>>("my string"sv, split_by_ascii_whitespace())), | ||
| std::deque<std::string>>); | ||
|
|
||
| EXPECT_THAT(str_split_to<std::vector<std::string>>("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to<std::deque<std::string>>("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, CharTypes) { | ||
| static_assert(std::is_same_v<decltype(str_split_to(std::wstring_view(L"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::wstring_view>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to(std::u8string_view(u8"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::u8string_view>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to(std::u16string_view(u"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::u16string_view>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to(std::u32string_view(U"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::u32string_view>>); | ||
|
|
||
| EXPECT_THAT(str_split_to(std::wstring_view(L"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to(std::u8string_view(u8"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to(std::u16string_view(u"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to(std::u32string_view(U"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| } // namespace |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any config headers should be automatically included in the str_split.hpp rendering this include unnecessary.