Source code for msys.sequence

from __future__ import print_function
import msys

# convert residue name to one-letter code
code = {
        "ALA" : 'A',
        "ARG" : 'R',
        "ASH" : 'D',    # neutral Asp
        "ASN" : 'N',
        "ASP" : 'D',
        "ASX" : 'B',    # Asn/Asp
        "CYM" : 'C',    # Negative Cys
        "CYS" : 'C',
        "CYX" : 'C',    # S-S bonded Cys
        "GLH" : 'E',    # neutral Glu
        "GLN" : 'Q',
        "GLU" : 'E',
        "GLX" : 'Z',    # Gln/Glu
        "GLY" : 'G',
        "HID" : 'H',    # Delta His
        "HIE" : 'H',    # Epsilon His
        "HIP" : 'H',    # Positive His
        "HIS" : 'H',
        "HSD" : 'H',    # Delta His
        "HSE" : 'H',    # Epsilon His
        "ILE" : 'I',
        "LEU" : 'L',
        "LYN" : 'K',    # neutral Lys
        "LYS" : 'K',
        "MET" : 'M',
        "PHE" : 'F',
        "PRO" : 'P',
        "SER" : 'S',
        "THR" : 'T',
        "TRP" : 'W',
        "TYM" : 'Y',    # Negative Tyr
        "TYR" : 'Y',
        "UNK" : 'X',
        "VAL" : 'V',
        }

[docs]def Sequences(system_or_chain, distinct=True): ''' return list of sequences, one for each chain, for the given input. The sequence will be returned as a string, with characters corresponding to the 1-letter sequence codes. If a Chain is provided instead of a System, only one sequence will be returned. If distinct is True, only distinct sequences will be returned. ''' if isinstance(system_or_chain, msys.Chain): chains = [system_or_chain] elif isinstance(system_or_chain, msys.System): chains = system_or_chain.chains else: t=type(system_or_chain) raise TypeError("Expected System or Chain, got %s" % t) seqs = [] for c in chains: seq = ''.join(code.get(r.name,'X') for r in c.residues) seq = seq.strip('X') if set(seq) == {'X'}: seq = '' seqs.append(seq) if distinct: seqs = sorted(set(seqs)) return seqs
doc = ''' dms-sequence [--with-duplicates] [--without-duplicates] [ dms files ] Write sequences in a dms file to stdout. ''' def main(): import optparse parser = optparse.OptionParser(doc) parser.add_option('--with-duplicates', action='store_false', default=True, dest='distinct', help="Print duplicate sequences") parser.add_option('--without-duplicates', action='store_true', dest='distinct', help="Don't print duplicate sequences") opts, args = parser.parse_args() for path in args: mol = msys.Load(path) for s in Sequences(mol, **opts.__dict__): print(s)