82 reference_type
const & reference,
83 uint32_t
const zero_based_reference_start_position,
84 sequence_type
const & query)
86 if (cigar_vector.
empty())
87 throw std::logic_error{
"An empty CIGAR is not a valid alignment representation."};
92 uint32_t reference_length{0};
93 uint32_t query_length{0};
95 for (
auto [cigar_count, cigar_operation] : cigar_vector)
97 if ((
'M'_cigar_operation == cigar_operation) || (
'='_cigar_operation == cigar_operation)
98 || (
'X'_cigar_operation == cigar_operation))
100 reference_length += cigar_count;
101 query_length += cigar_count;
103 else if (
'D'_cigar_operation == cigar_operation)
105 reference_length += cigar_count;
107 else if (
'I'_cigar_operation == cigar_operation)
109 query_length += cigar_count;
113 if (
static_cast<size_t>(zero_based_reference_start_position + reference_length) > std::ranges::size(reference))
114 throw std::logic_error{
"The CIGAR string indicates a reference length of at least "
115 +
std::to_string(zero_based_reference_start_position + reference_length)
116 +
", but the supplied reference sequence is only of size"
121 uint32_t soft_clipping_start{0};
122 uint32_t soft_clipping_end{0};
125 auto soft_clipping_at = [&cigar_vector](
size_t const index)
127 return cigar_vector[index] ==
'S'_cigar_operation;
130 auto hard_clipping_at = [&](
size_t const index)
132 return cigar_vector[index] ==
'H'_cigar_operation;
135 auto vector_size_at_least = [&](
size_t const min_size)
137 return cigar_vector.
size() >= min_size;
140 auto cigar_count_at = [&](
size_t const index)
142 return get<0>(cigar_vector[index]);
147 if (soft_clipping_at(0))
148 soft_clipping_start = cigar_count_at(0);
149 else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
150 soft_clipping_start = cigar_count_at(1);
154 auto const last_index = cigar_vector.
size() - 1;
155 auto const second_last_index = last_index - 1;
157 if (vector_size_at_least(2) && soft_clipping_at(last_index))
158 soft_clipping_end = cigar_count_at(last_index);
159 else if (vector_size_at_least(3) && hard_clipping_at(last_index) && soft_clipping_at(second_last_index))
160 soft_clipping_end = cigar_count_at(second_last_index);
162 if (soft_clipping_start + query_length + soft_clipping_end != std::ranges::size(query))
163 throw std::logic_error{
"The CIGAR string indicates a query/read sequence length of "
164 +
std::to_string(soft_clipping_start + query_length + soft_clipping_end)
165 +
", but the supplied query/read sequence is of size"
179 zero_based_reference_start_position + reference_length));
181 assign_unaligned(get<1>(
alignment), query |
views::slice(soft_clipping_start, soft_clipping_start + query_length));
189 for (
auto [cigar_count, cigar_operation] : cigar_vector)
191 if ((
'M'_cigar_operation == cigar_operation) || (
'='_cigar_operation == cigar_operation)
192 || (
'X'_cigar_operation == cigar_operation))
194 std::ranges::advance(current_ref_pos, cigar_count);
195 std::ranges::advance(current_read_pos, cigar_count);
197 else if ((
'D'_cigar_operation == cigar_operation) || (
'N'_cigar_operation == cigar_operation))
200 current_read_pos = get<1>(
alignment).insert_gap(current_read_pos, cigar_count);
202 std::ranges::advance(current_ref_pos, cigar_count);
204 else if ((
'I'_cigar_operation == cigar_operation))
206 current_ref_pos = get<0>(
alignment).insert_gap(current_ref_pos, cigar_count);
208 std::ranges::advance(current_read_pos, cigar_count);
210 else if ((
'P'_cigar_operation == cigar_operation))
212 current_ref_pos = get<0>(
alignment).insert_gap(current_ref_pos, cigar_count);
215 current_read_pos = get<1>(
alignment).insert_gap(current_read_pos, cigar_count);
auto alignment_from_cigar(std::vector< cigar > const &cigar_vector, reference_type const &reference, uint32_t const zero_based_reference_start_position, sequence_type const &query)
Construct an alignment from a CIGAR string and the corresponding sequences.
Definition alignment_from_cigar.hpp:81