Source code for ogilo.utils

"""Miscellaneous utilities used in ogilo."""

from typing import Sequence
import csv
import os

import streq as sq

from .types import Oligo, PCRHandle, Seq

PCR_HANDLE_SETS = ('sanson2018', 'illumina')

def _load_pcr_handles(filename: str) -> Sequence[PCRHandle]:

    with open(filename, 'r') as f:
    
        c = csv.DictReader(f, delimiter=',')

        pcr_handles0 = tuple(PCRHandle(name=row['pcr_handle_id'],
                                       f=row['pcr_handle_f'],
                                       r=row['pcr_handle_r']) 
                             for row in c)
    pcr_handles = []

    for handle in pcr_handles0:

        pcr_handles.append(handle._replace(f=Seq(group=None,
                                                 seq=handle.f,
                                                 name=handle.name + '_f',
                                                 type='handle',
                                                 reverse=False),
                                           r=Seq(group=None,
                                                 seq=handle.r,
                                                 name=handle.name + '_r',
                                                 type='handle',
                                                 reverse=False)))
    return tuple(pcr_handles)   


def _get_pcr_handles(handle_set: str) -> Sequence[PCRHandle]:

    if handle_set in PCR_HANDLE_SETS:

        this_data_path = os.path.join(os.path.dirname(__file__), 
                                    f'{handle_set}-pcr-handles.csv')
        pcr_handles = _load_pcr_handles(this_data_path)
    
    elif handle_set == 'all':

        pcr_handles = tuple()

        for handle_set in PCR_HANDLE_SETS:
            pcr_handles += _get_pcr_handles(handle_set)

    else:

        if handle_set.endswith('.csv') and os.path.exists(handle_set):
             pcr_handles = _load_pcr_handles(handle_set)
        else:
            raise ValueError(f'The PCR handle set {handle_set} does not exist.')

    return pcr_handles


[docs]def find_all(p: str, s: str) -> Sequence[int]: """Find all instances of pattern p in the string s. Parameters ---------- p : str Pattern to find s : str String in which to find pattern Yields ------ int Position in `s` of each instance of `p` """ s = s.upper() i = s.find(p) while i != -1: yield i i = s.find(p, i + 1)
[docs]def n_found(p: str, s: str, with_rc: bool = True) -> int: """Count the occurences of a pattern in a string. Parameters ---------- p : str Pattern to find s : str String in which to find pattern with_rc : bool Whether to also search the reverse complement of `s` Returns ------- int The number of occurences of `p` in `s` """ n = len(list(find_all(p, s))) if with_rc: n += len(list(find_all(sq.reverse_complement(p), s))) return n
[docs]def grouping_key(x: Oligo) -> str: """Use as `key` parameter for `sort()` or `itertools.groupby()` to sort a list of list of `Seq` objects by their group attribute. When the group attribute is not set for a given `Seq` object, then it is ignored. The group strings within the inner list of `Seq` objects are concatenated to allow sorting. Parameters ---------- x : list of Seq objects Returns ------- str Concatenated group attributes, where the group is not `None` """ return '-'.join(map(lambda y: y.group, filter(lambda y: y.group is not None, x)))