16#include <sdsl/suffix_trees.hpp>
45 template <semialphabet alphabet_t, text_layout text_layout_mode_, std::ranges::range text_t>
50 static_assert(std::ranges::bidirectional_range<text_t>,
"The text must model bidirectional_range.");
51 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
52 "The alphabet of the text collection must be convertible to the alphabet of the index.");
53 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
55 if (std::ranges::empty(text))
60 static_assert(std::ranges::bidirectional_range<text_t>,
61 "The text collection must model bidirectional_range.");
62 static_assert(std::ranges::bidirectional_range<std::ranges::range_reference_t<text_t>>,
63 "The elements of the text collection must model bidirectional_range.");
64 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
65 "The alphabet of the text collection must be convertible to the alphabet of the index.");
66 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
68 if (std::ranges::empty(text))
71 static_assert(alphabet_size<range_innermost_value_t<text_t>> <= 256,
"The alphabet is too big.");
81template <
typename index_t>
84template <
typename index_t>
85class bi_fm_index_cursor;
89template <semialphabet alphabet_t, text_layout text_layout_mode_, detail::sdsl_index sdsl_index_type_>
90class reverse_fm_index;
127 sdsl::csa_wt<sdsl::wt_blcd<sdsl::bit_vector,
128 sdsl::rank_support_v<>,
129 sdsl::select_support_scan<>,
130 sdsl::select_support_scan<0>>,
133 sdsl::sa_order_sa_sampling<>,
134 sdsl::isa_sampling<>,
135 sdsl::plain_byte_alphabet>;
214 template <
typename output_it_t,
typename sequence_t>
217 constexpr size_t sigma = alphabet_size<alphabet_t>;
220 constexpr auto warn_if_rank_out_of_range = [](uint8_t
const rank)
222 if (rank >= max_sigma - 1)
224 "character alphabets the last one/two values are reserved"
225 "(single sequence/collection).");
230 [&warn_if_rank_out_of_range](
auto const & chr)
233 if constexpr (sigma >= max_sigma)
234 warn_if_rank_out_of_range(rank);
236 (
void)warn_if_rank_out_of_range;
261 template <std::ranges::range text_t>
265 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
272 sdsl::int_vector<8> tmp_text(std::ranges::distance(text));
277 sdsl::construct_im(
index, tmp_text, 0);
286 template <std::ranges::range text_t>
290 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
294 for (
auto && t : text)
295 text_sizes.
push_back(std::ranges::distance(t));
297 size_t const number_of_texts{text_sizes.
size()};
302 if (number_of_texts == text_size)
305 constexpr auto sigma = alphabet_size<alphabet_t>;
310 sdsl::sd_vector_builder builder(text_size, number_of_texts);
311 size_t prefix_sum{0};
313 for (
auto &&
size : text_sizes)
315 builder.set(prefix_sum);
316 prefix_sum +=
size + 1;
324 sdsl::int_vector<8> tmp_text(text_size - (number_of_texts > 1));
326 constexpr uint8_t delimiter = sigma >= 255 ? 255 : sigma + 1;
328 auto copy_join_with = [](
auto output_it,
auto &&
collection)
332 auto const collection_sentinel = std::ranges::end(
collection);
333 if (collection_it == collection_sentinel)
339 for (; collection_it != collection_sentinel; ++collection_it)
341 *output_it = delimiter;
353 if (number_of_texts == 1)
354 tmp_text.back() = delimiter;
363 if (number_of_texts == 1)
366 tmp_text.back() = delimiter;
374 sdsl::construct_im(
index, tmp_text, 0);
392 template <
typename bi_fm_index_t>
395 template <
typename fm_index_t>
398 template <
typename fm_index_t>
452 template <std::ranges::b
idirectional_range text_t>
521 return !(*
this == rhs);
550 template <cereal_archive archive_t>
560 auto sigma = alphabet_size<alphabet_t>;
562 if (sigma != alphabet_size<alphabet_t>)
565 +
" but it is being read into an fm_index with an alphabet of size "
574 + (tmp ?
"text collection" :
"single text")
575 +
" but it is being read into an fm_index expecting a "
586template <std::ranges::range text_t>
614 template <std::ranges::range text_t>
619 auto reverse_text = text | std::views::reverse;
624 auto reverse_text = text |
views::deep{std::views::reverse} | std::views::reverse;
633 template <std::ranges::b
idirectional_range text_t>
The SeqAn Bidirectional FM Index Cursor.
Definition bi_fm_index_cursor.hpp:52
An FM Index specialisation that handles reversing the given text.
Definition fm_index.hpp:611
void construct_(text_t &&text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition fm_index.hpp:615
reverse_fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition fm_index.hpp:634
The SeqAn FM Index Cursor.
Definition fm_index_cursor.hpp:84
The SeqAn FM Index.
Definition fm_index.hpp:186
sdsl_index_type_ sdsl_index_type
The type of the underlying SDSL index.
Definition fm_index.hpp:192
fm_index & operator=(fm_index rhs)
When copy/move assigning, also update internal data structures.
Definition fm_index.hpp:429
fm_index()=default
Defaulted.
static constexpr text_layout text_layout_mode
Indicates whether index is built over a collection.
Definition fm_index.hpp:379
void construct(text_t &&text, bool reverse=false)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition fm_index.hpp:288
cursor_type cursor() const noexcept
Returns a seqan3::fm_index_cursor on the index that can be used for searching. Cursor is pointing to...
Definition fm_index.hpp:538
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition fm_index.hpp:470
sdsl::rank_support_sd< 1 > text_begin_rs
Rank support for text_begin.
Definition fm_index.hpp:211
sdsl_index_type index
Underlying index from the SDSL.
Definition fm_index.hpp:204
bool operator!=(fm_index const &rhs) const noexcept
Compares two indices.
Definition fm_index.hpp:519
typename sdsl_index_type::alphabet_type::sigma_type sdsl_sigma_type
The type of the alphabet size of the underlying SDSL index.
Definition fm_index.hpp:198
bool empty() const noexcept
Checks whether the index is empty.
Definition fm_index.hpp:486
bool operator==(fm_index const &rhs) const noexcept
Compares two indices.
Definition fm_index.hpp:502
typename sdsl_index_type::alphabet_type::char_type sdsl_char_type
The type of the reduced alphabet type. (The reduced alphabet might be smaller than the original alpha...
Definition fm_index.hpp:196
~fm_index()=default
Defaulted.
void construct(text_t &&text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition fm_index.hpp:263
alphabet_t alphabet_type
The type of the underlying character of the indexed text.
Definition fm_index.hpp:385
fm_index(fm_index &&rhs)
When move constructing, also update internal data structures.
Definition fm_index.hpp:418
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition fm_index.hpp:387
static output_it_t copy_sequence_ranks_shifted_by_one(output_it_t output_it, sequence_t &&sequence)
Eagerly convert sequence into ranks, shift by one and copy them into output_it.
Definition fm_index.hpp:215
sdsl::sd_vector text_begin
Bitvector storing begin positions for collections.
Definition fm_index.hpp:207
sdsl::select_support_sd< 1 > text_begin_ss
Select support for text_begin.
Definition fm_index.hpp:209
fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition fm_index.hpp:453
fm_index(fm_index const &rhs)
When copy constructing, also update internal data structures.
Definition fm_index.hpp:407
A wrapper type around an existing view adaptor that enables "deep view" behaviour for that view.
Definition deep.hpp:101
Provides various transformation traits used by the range module.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition search/fm_index/concept.hpp:88
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition fm_index.hpp:142
sdsl::csa_wt< sdsl::wt_blcd< sdsl::bit_vector, sdsl::rank_support_v<>, sdsl::select_support_scan<>, sdsl::select_support_scan< 0 > >, 16, 10 '000 '000, sdsl::sa_order_sa_sampling<>, sdsl::isa_sampling<>, sdsl::plain_byte_alphabet > sdsl_wt_index_type
The FM Index Configuration using a Wavelet Tree.
Definition fm_index.hpp:135
@ single
The text is a single range.
Definition search/fm_index/concept.hpp:90
@ collection
The text is a range of ranges.
Definition search/fm_index/concept.hpp:92
The basis for seqan3::alphabet, but requires only rank interface (not char).
The generic concept for a (biological) sequence.
The internal SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
SeqAn specific customisations in the standard namespace.
Provides the concept for seqan3::detail::sdsl_index.
Internal representation of the node of an FM index cursor.
Definition detail/fm_index_cursor.hpp:27
Class used to validate the requirements on the input text of the fm_index.
Definition fm_index.hpp:30
static void validate(text_t &&text)
Validates the fm_index template parameters and text.
Definition fm_index.hpp:46
Provides seqan3::views::to_rank.