SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
|
The SeqAn FM Index Cursor. More...
#include <seqan3/search/fm_index/fm_index_cursor.hpp>
Public Member Functions | |
Constructors, destructor and assignment | |
fm_index_cursor () noexcept=default | |
Default constructor. Accessing member functions on a default constructed object is undefined behavior. | |
fm_index_cursor (fm_index_cursor const &) noexcept=default | |
Defaulted. | |
fm_index_cursor & | operator= (fm_index_cursor const &) noexcept=default |
Defaulted. | |
fm_index_cursor (fm_index_cursor &&) noexcept=default | |
Defaulted. | |
fm_index_cursor & | operator= (fm_index_cursor &&) noexcept=default |
Defaulted. | |
~fm_index_cursor ()=default | |
Defaulted. | |
fm_index_cursor (index_t const &_index) noexcept | |
Construct from given index. | |
bool | operator== (fm_index_cursor const &rhs) const noexcept |
Compares two cursors. | |
bool | operator!= (fm_index_cursor const &rhs) const noexcept |
Compares two cursors. | |
bool | extend_right () noexcept |
Tries to extend the query by the smallest possible character to the right such that the query is found in the text. Goes down the leftmost (i.e. lexicographically smallest) edge. . | |
template<typename char_t > requires std::convertible_to<char_t, index_alphabet_type> | |
bool | extend_right (char_t const c) noexcept |
Tries to extend the query by the character c to the right. | |
template<typename char_type > requires detail::is_char_adaptation_v<char_type> | |
bool | extend_right (char_type const *cstring) noexcept |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
template<std::ranges::range seq_t> | |
bool | extend_right (seq_t &&seq) noexcept |
Tries to extend the query by seq to the right. | |
bool | cycle_back () noexcept |
Tries to replace the rightmost character of the query by the next lexicographically larger character such that the query is found in the text. Moves the cursor to the right sibling of the current suffix tree node. It would be equivalent to going up an edge and going down that edge with the smallest character that is larger than the previous searched character. Calling cycle_back() on an cursor pointing to the root node is undefined behaviour! . | |
size_type | last_rank () const noexcept |
Outputs the rightmost rank. | |
seqan3::suffix_array_interval | suffix_array_interval () const noexcept |
Returns the half-open suffix array interval. | |
size_type | query_length () const noexcept |
Returns the length of the searched query. Returns the depth of the cursor node in the implicit suffix tree. . | |
template<std::ranges::range text_t> requires (index_t::text_layout_mode == text_layout::single) | |
auto | path_label (text_t &&text) const noexcept |
Returns the searched query. | |
template<std::ranges::range text_t> requires (index_t::text_layout_mode == text_layout::collection) | |
auto | path_label (text_t &&text) const noexcept |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
size_type | count () const noexcept |
Counts the number of occurrences of the searched query in the text. | |
locate_result_type | locate () const |
Locates the occurrences of the searched query in the text. | |
locate_result_type | locate () const |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
auto | lazy_locate () const |
Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is returned and every position is located once it is accessed. | |
auto | lazy_locate () const |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
template<cereal_archive archive_t> | |
void | serialize (archive_t &archive) |
Serialisation support function. | |
Private Member Functions | |
bool | backward_search (sdsl_index_type const &csa, sdsl_char_type const c, size_type &l, size_type &r) const noexcept |
Optimized backward search without alphabet mapping. | |
size_type | offset () const noexcept |
Helper function to recompute text positions since the indexed text is reversed. | |
Private Attributes | |
index_type const * | index {nullptr} |
Underlying FM index. | |
node_type | node {} |
Underlying index from the SDSL. | |
size_type | parent_lb {} |
Left suffix array interval of the parent node. Needed for cycle_back(). | |
size_type | parent_rb {} |
Right suffix array interval of the parent node. Needed for cycle_back(). | |
sdsl_sigma_type | sigma {} |
Alphabet size of the index without delimiters. | |
Friends | |
template<typename _index_t > | |
class | bi_fm_index_cursor |
Member types | |
using | index_type = index_t |
Type of the index. | |
using | size_type = typename index_type::size_type |
Type for representing positions in the indexed text. | |
using | node_type = detail::fm_index_cursor_node< index_t > |
Type of the representation of a suffix tree node. | |
using | sdsl_char_type = typename index_type::sdsl_char_type |
Type of the representation of characters in the underlying SDSL index. | |
using | sdsl_index_type = typename index_t::sdsl_index_type |
Type of the SDSL index. | |
using | sdsl_sigma_type = typename index_type::sdsl_sigma_type |
Type of the alphabet size in the underlying SDSL index. | |
using | index_alphabet_type = typename index_t::alphabet_type |
Alphabet type of the index. | |
using | locate_result_value_type = std::pair< size_type, size_type > |
The result value type when calling locate, a pair of reference id and reference position. | |
using | locate_result_type = std::vector< locate_result_value_type > |
The result vector type when calling locate. | |
The SeqAn FM Index Cursor.
index_t | The type of the underlying index. This is normally seqan3::fm_index. |
The cursor's interface provides searching a string from left to right in the indexed text. All methods modifying the cursor (e.g. extending by a character with extend_right()) return a bool
value whether the operation was successful or not. In case of an unsuccessful operation the cursor remains unmodified, i.e. a cursor can never be in an invalid state except default constructed cursors that are always invalid.
The behaviour is equivalent to a suffix tree with the space and time efficiency of the underlying pure FM index. The cursor traverses the implicit suffix tree beginning at the root node. The implicit suffix tree is not compacted, i.e. going down an edge using extend_right(char) will increase the query by only one character.
The asymptotic running times for using the cursor depend on the SDSL index configuration. To determine the exact running times, you have to additionally look up the running times of the used traits (configuration).
|
defaultnoexcept |
Default constructor. Accessing member functions on a default constructed object is undefined behavior.
Defaulted.
|
inlinenoexcept |
Counts the number of occurrences of the searched query in the text.
Constant.
No-throw guarantee.
|
inlinenoexcept |
Tries to replace the rightmost character of the query by the next lexicographically larger character such that the query is found in the text. Moves the cursor to the right sibling of the current suffix tree node. It would be equivalent to going up an edge and going down that edge with the smallest character that is larger than the previous searched character. Calling cycle_back() on an cursor pointing to the root node is undefined behaviour! .
true
if there exists a query in the text where the rightmost character of the query is lexicographically larger than the current rightmost character of the query.\(O(\Sigma) * O(T_{BACKWARD\_SEARCH})\)
It scans linearly over the alphabet starting from the rightmost character until it finds the query with a larger rightmost character.
No-throw guarantee.
|
inlinenoexcept |
Tries to extend the query by the smallest possible character to the right such that the query is found in the text. Goes down the leftmost (i.e. lexicographically smallest) edge. .
true
if the cursor could extend the query successfully.\(O(\Sigma) * O(T_{BACKWARD\_SEARCH})\)
It scans linearly over the alphabet until it finds the smallest character that is represented by an edge.
No-throw guarantee.
|
inlinenoexcept |
Tries to extend the query by the character c
to the right.
char_t | Type of the character needs to be convertible to the character type char_type of the index. |
[in] | c | Character to extend the query with to the right. |
true
if the cursor could extend the query successfully.\(O(T_{BACKWARD\_SEARCH})\)
No-throw guarantee.
|
inlinenoexcept |
Tries to extend the query by seq
to the right.
seq_t | The type of range of the sequence to search; must model std::ranges::forward_range. |
[in] | seq | Sequence to extend the query with to the right. |
true
if the cursor could extend the query successfully.If extending fails in the middle of the sequence, all previous computations are rewound to restore the cursor's state before calling this method.
\(|seq| * O(T_{BACKWARD\_SEARCH})\)
No-throw guarantee.
|
inlinenoexcept |
Outputs the rightmost rank.
Constant.
No-throw guarantee.
|
inline |
Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is returned and every position is located once it is accessed.
\(count() * O(T_{BACKWARD\_SEARCH} * SAMPLING\_RATE)\)
Strong exception guarantee (no data is modified in case an exception is thrown).
|
inline |
Locates the occurrences of the searched query in the text.
\(count() * O(T_{BACKWARD\_SEARCH} * SAMPLING\_RATE)\)
Strong exception guarantee (no data is modified in case an exception is thrown).
|
inlinenoexcept |
Compares two cursors.
[in] | rhs | Other cursor to compare it to. |
true
if the cursors are not equal, false
otherwise.Constant.
No-throw guarantee.
|
inlinenoexcept |
Compares two cursors.
[in] | rhs | Other cursor to compare it to. |
true
if both cursors are equal, false
otherwise.Constant.
No-throw guarantee.
|
inlinenoexcept |
Returns the searched query.
text_t | The type of the text used to build the index; must model std::ranges::input_range. |
[in] | text | Text that was used to build the index. |
Returns the concatenation of all edges from the root node to the cursors current node.
\(O(SAMPLING\_RATE * T_{BACKWARD\_SEARCH}) + query\_length()\)
No-throw guarantee.
|
inlinenoexcept |
Returns the length of the searched query. Returns the depth of the cursor node in the implicit suffix tree. .
Constant.
No-throw guarantee.
|
inline |
Serialisation support function.
archive_t | Type of archive ; must satisfy seqan3::cereal_archive. |
archive | The archive being serialised from/to. |
|
inlinenoexcept |
Returns the half-open suffix array interval.
Constant.
No-throw guarantee.