import re
from collections import namedtuple
from typing import Optional
import ngs_tools as ngs
import pysam
from typing_extensions import Literal
[docs]Technology = namedtuple('Technology', ['name', 'chemistry', 'additional_args'])
[docs]BARCODE_UMI_TECHNOLOGIES = [
Technology('dropseq',
ngs.chemistry.get_chemistry('dropseq').reorder([1, 0]), None),
Technology('scifate',
ngs.chemistry.get_chemistry('scifate').reorder([1, 0]), None),
Technology('10xv2',
ngs.chemistry.get_chemistry('10xv2').reorder([1, 0]), None),
Technology('10xv3',
ngs.chemistry.get_chemistry('10xv3').reorder([1, 0]), None),
]
[docs]PLATE_TECHNOLOGIES = [
Technology(
'smartseq', ngs.chemistry.get_chemistry('smartseq2'), {
'--soloUMIdedup': 'Exact',
'--outSAMattributes': ['RG'],
}
),
]
[docs]TECHNOLOGIES = BARCODE_UMI_TECHNOLOGIES + PLATE_TECHNOLOGIES
[docs]TECHNOLOGIES_MAP = {t.name: t for t in TECHNOLOGIES}
[docs]STRAND_MAP = {
ngs.chemistry.SequencingStrand.UNSTRANDED: 'unstranded',
ngs.chemistry.SequencingStrand.FORWARD: 'forward',
ngs.chemistry.SequencingStrand.REVERSE: 'reverse',
}
[docs]BAM_STRAND_PARSER = re.compile('(--readStrand|--soloStrand)(=| +)(?P<strand>Forward|Reverse|Unstranded)')
[docs]def detect_strand(bam_path: str) -> Optional[Literal['forward', 'reverse', 'unstranded']]:
"""Attempt to detect strandness by parsing the BAM header.
Args:
bam_path: Path to BAM
Returns:
'unstranded', 'forward', or 'reverse if the strand was successfully detected. `None` otherwise.
"""
with pysam.AlignmentFile(bam_path, 'rb') as f:
pg = f.header.get('PG')
if pg:
for entry in pg:
if entry.get('PN') == 'STAR':
search = BAM_STRAND_PARSER.search(entry.get('CL', ''))
if search:
return search.groupdict()['strand'].lower()
return None