partrita · September 21, 2024 00:16
diff --git a/chem_sorting.py b/chem_sorting.py
 import re
 import glob
 import csv
 from typing import List, Dict
 from pathlib import Path

 def make_file_list(extension: str) -> List[str]:
    return glob.glob(f"*.{extension}")

 def parse_chemical_formula(formula: str) -> Dict[str, str]:
    segments = re.findall(r'[A-Z][a-z]*[0-9]*', formula)
    return {
        re.findall(r'[A-Z][a-z]*', segment)[0]: re.findall(r'[0-9]+', segment)[0] if re.findall(r'[0-9]+', segment) else '1'
        for segment in segments
    }

 def process_csv_file(input_file: str, chemical_elements: List[str]):
    output_file = Path(input_file).with_suffix('.CHNOSPSi.csv')
    
    with open(input_file, 'r', newline='') as infile, open(output_file, 'w', newline='') as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)

        for row in reader:
            if row[3] == 'Formula':
                writer.writerow(row + chemical_elements)
            else:
                formula_dict = parse_chemical_formula(row[3])
                new_row = row + [formula_dict.get(element, '0') for element in chemical_elements]
                writer.writerow(new_row)

 def main():
    chemical_elements = ['C', 'H', 'N', 'O', 'S', 'P', 'Si']
    csv_files = make_file_list('csv')
    
    for file in csv_files:
        process_csv_file(file, chemical_elements)

 if __name__ == '__main__':
    main()
diff --git a/PDB_fetcher.py b/PDB_fetcher.py
 import urllib.request
 import xml.etree.ElementTree as ET
 import os
 import gzip
 import shutil
 from typing import List, Optional

 def fetch_pdb_list(sequence: str, e_cutoff: str, resolution: str) -> List[str]:
    url: str = 'http://www.rcsb.org/pdb/rest/search'
    query_xml: str = f"""
    <?xml version="1.0" encoding="UTF-8"?>
    <orgPdbCompositeQuery version="1.0">
        <queryRefinement>
            <queryRefinementLevel>0</queryRefinementLevel>
                <orgPdbQuery>   
                    <queryType>org.pdb.query.simple.SequenceQuery</queryType>
                    <sequence>{sequence}</sequence>
                    <eCutOff>{e_cutoff}</eCutOff>
                    <searchTool>blast</searchTool>
                </orgPdbQuery>
    </queryRefinement>
    <queryRefinement>
        <queryRefinementLevel>1</queryRefinementLevel>
            <orgPdbQuery>
                <queryType>org.pdb.query.simple.ResolutionQuery</queryType>
                <description>ResolutionQuery </description>
                <refine.ls_d_res_high.comparator>between</refine.ls_d_res_high.comparator>
                <refine.ls_d_res_high.max>{resolution}</refine.ls_d_res_high.max>
            </orgPdbQuery>
    </queryRefinement>
    <queryRefinement>
        <queryRefinementLevel>2</queryRefinementLevel>
            <orgPdbQuery>
                <queryType>org.pdb.query.simple.StructTitleQuery</queryType>
                <struct.title.comparator>!contains</struct.title.comparator>
                <struct.title.value>arp</struct.title.value>
            </orgPdbQuery>
    </queryRefinement>
    </orgPdbCompositeQuery>
    """
    
    print("Querying PDB...\n")
    req: urllib.request.Request = urllib.request.Request(url, data=query_xml.encode())
    with urllib.request.urlopen(req) as response:
        result: str = response.read().decode()
    
    return result.split() if result else []

 def download_pdb_files(pdb_list: List[str], biological_unit: bool = True) -> None:
    pdb_url: str = "ftp://ftp.wwpdb.org/pub/pdb/data/biounit/coordinates/all/{0}.pdb1.gz" if biological_unit else "http://www.rcsb.org/pdb/files/{0}.pdb.gz"
    
    for pdb in pdb_list:
        print(f"Downloading {pdb}")
        url: str = pdb_url.format(pdb.lower())
        filename: str = f"{pdb}.pdb.gz"
        urllib.request.urlretrieve(url, filename)
        
        with gzip.open(filename, 'rb') as f_in:
            with open(f"{pdb}.pdb", 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        
        os.remove(filename)

 def generate_pymol_script(pdb_list: List[str]) -> None:
    with open('load.pml', 'w') as f:
        for pdb in pdb_list:
            f.write(f"load {os.getcwd()}/{pdb}.pdb\n")
        
        f.write("hide all\n")
        f.write("show cartoon\n")
        
        for i in range(1, len(pdb_list)):
            f.write(f"cealign {pdb_list[0]}, {pdb_list[i]}\n")

 def fetch_pdb_info(pdb_list: List[str]) -> None:
    report_url: str = "http://www.rcsb.org/pdb/rest/customReport?pdbids={0}&customReportColumns=structureId,structureTitle,resolution,experimentalTechnique,depositionDate,structureAuthor,classification,structureMolecularWeight&service=wsdisplay&format=xml"
    
    req: urllib.request.Request = urllib.request.Request(report_url.format(",".join(pdb_list)))
    with urllib.request.urlopen(req) as response:
        result: str = response.read().decode()
    
    if result:
        root: ET.Element = ET.fromstring(result)
        with open('list.txt', 'w') as f:
            for record in root.iter('record'):
                record_line: List[Optional[str]] = [field.text for field in record]
                f.write("\t".join(filter(None, record_line)) + "\n")
    else:
        print("Failed to retrieve PDB info")

 def main() -> None:
    sequence: str = """
    MSMILKEIRMNNFKSHVNSRIKFEKGIVAIIGENGSGKSSIFEAVFFALFGAGSNFNYDT
    IITKGKKSVYVELDFEVNGNNYKIIREYDSGRGGAKLYKNGKPYATTISAVNKAVNEILG
    VDRNMFLNSIYIKQGEIAKFLSLKPSEKLETVAKLLG
    """
    resolution: str = "3"
    e_cutoff: str = "1e-30"
    
    pdb_list: List[str] = fetch_pdb_list(sequence, e_cutoff, resolution)
    
    if pdb_list:
        download_pdb_files(pdb_list)
        generate_pymol_script(pdb_list)
        fetch_pdb_info(pdb_list)
    else:
        print("No PDB structures found")

 if __name__ == "__main__":
    main()
diff --git a/pomodoro_timer.py b/pomodoro_timer.py
 import sys
 import time
 import argparse
 from tqdm import tqdm

 def parse_arguments() -> argparse.Namespace:
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(description='Set time of timer. It should be in minutes.')
    parser.add_argument('-t', '--time', type=int, default=25, help='Set pomodoro interval in minutes.')
    parser.add_argument('-b', '--break_time', type=int, default=5, help='Set break interval in minutes.')
    parser.add_argument('-r', '--repeat', type=int, default=3, help='Set number of pomodoro rounds.')
    return parser.parse_args()

 def run_pomodoro(pomodoro_time: int, break_time: int, rounds: int) -> None:
    """Run the Pomodoro timer."""
    for round_number in range(1, rounds + 1):
        print(f'Starting Pomodoro {round_number}. May the focus be with you.')
        countdown(pomodoro_time, "Working")
        
        if round_number < rounds:
            print('Take a break!')
            countdown(break_time, "Break")

    print('Pomodoro timer is over. Great job!')

 def countdown(duration: int, label: str) -> None:
    """Display a countdown timer."""
    for _ in tqdm(range(duration), desc=f"{label} Time"):
        time.sleep(1)

 def main() -> None:
    args = parse_arguments()
    pomodoro_time = args.time * 60  # Convert minutes to seconds
    break_time = args.break_time * 60  # Convert minutes to seconds
    rounds = args.repeat

    run_pomodoro(pomodoro_time, break_time, rounds)

 if __name__ == "__main__":
    main()
diff --git a/smi2srt.py b/smi2srt.py
 import os
 import sys
 import re
 import chardet
 from typing import List, Optional

 class SMIConverter:
    def __init__(self):
        self.srt_list: List[SMIItem] = []

    @staticmethod
    def usage(msg: Optional[str] = None, exit_code: int = 1) -> None:
        print_msg = f"""
 usage {os.path.basename(sys.argv[0])} smifile.smi [...]
    convert smi into srt subtitle file with same filename.
    By MoonChang Chae <[email protected]>
 """
        if msg:
            print_msg += f'{msg}\n'
        print(print_msg)
        sys.exit(exit_code)

    @staticmethod
    def ms2ts(ms: int) -> str:
        hours, ms = divmod(ms, 3600000)
        minutes, ms = divmod(ms, 60000)
        seconds, ms = divmod(ms, 1000)
        return f'{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}'

    def convert_smi(self, smi_file: str) -> bool:
        if not os.path.exists(smi_file):
            sys.stderr.write(f'Cannot find smi file <{smi_file}>\n')
            return False

        srt_file = f'{os.path.splitext(smi_file)[0]}.srt'

        with open(smi_file, 'rb') as ifp:
            smi_sgml: bytes = ifp.read()

        chdt = chardet.detect(smi_sgml)
        if chdt['encoding'] != 'UTF-8':
            smi_sgml = smi_sgml.decode(chdt['encoding']).encode('utf-8')

        fndx = smi_sgml.find(b'<SYNC')
        if fndx < 0:
            return False
        smi_sgml = smi_sgml[fndx:]
        lines: List[str] = smi_sgml.decode('utf-8').split('\n')

        self._parse_smi_lines(lines)
        self._write_srt_file(srt_file)
        return True

    def _parse_smi_lines(self, lines: List[str]) -> None:
        sync_cont: str = ''
        si: Optional[SMIItem] = None
        last_si: Optional[SMIItem] = None
        for linecnt, line in enumerate(lines, 1):
            sndx = line.upper().find('<SYNC')
            if sndx >= 0:
                m = re.search(r'<sync\s+start\s*=\s*(\d+)>(.*)$', line, flags=re.IGNORECASE)
                if not m:
                    raise ValueError(f'Invalid format tag of <Sync start=nnnn> with "{line}"')
                sync_cont += line[:sndx]
                last_si = si
                if last_si:
                    last_si.end_ms = int(m.group(1))
                    last_si.contents = sync_cont
                    self.srt_list.append(last_si)
                    last_si.linecount = linecnt
                sync_cont = m.group(2)
                si = SMIItem(start_ms=int(m.group(1)))
            else:
                sync_cont += line

    def _write_srt_file(self, srt_file: str) -> None:
        with open(srt_file, 'w', encoding='utf-8') as ofp:
            for ndx, si in enumerate(self.srt_list, 1):
                si.convert_srt()
                if not si.contents:
                    continue
                ofp.write(f'{ndx}\n{si.start_ts} --> {si.end_ts}\n{si.contents}\n\n')

 class SMIItem:
    def __init__(self, start_ms: int = 0):
        self.start_ms: int = start_ms
        self.start_ts: str = '00:00:00,000'
        self.end_ms: int = 0
        self.end_ts: str = '00:00:00,000'
        self.contents: Optional[str] = None
        self.linecount: int = 0

    def convert_srt(self) -> None:
        self.start_ts = SMIConverter.ms2ts(self.start_ms)
        self.end_ts = SMIConverter.ms2ts(self.end_ms - 10)
        if self.contents:
            self.contents = re.sub(r'\s+', ' ', self.contents)
            self.contents = re.sub(r'&[a-z]{2,5};', '', self.contents)
            self.contents = re.sub(r'(<br>)+', '\n', self.contents, flags=re.IGNORECASE)
            self._process_tags()
            self.contents = self.contents.strip()

    def _process_tags(self) -> None:
        if self.contents:
            fndx = self.contents.find('<')
            if fndx >= 0:
                contents = self.contents[fndx:]
                sb = self.contents[:fndx]
                while True:
                    m = re.match(r'</?([a-z]+)[^>]*>([^<>]*)', contents, flags=re.IGNORECASE)
                    if not m:
                        break
                    contents = contents[m.end(2):]
                    if m.group(1).lower() in ['b', 'i', 'u']:
                        sb += m.string[:m.start(2)]
                    sb += m.group(2)
                self.contents = sb

    def __repr__(self) -> str:
        return f'{self.start_ms}:{self.end_ms}:<{self.contents}>:{self.linecount}'

 def main() -> None:
    if len(sys.argv) <= 1:
        SMIConverter.usage()
    
    converter = SMIConverter()
    for smi_file in sys.argv[1:]:
        if converter.convert_smi(smi_file):
            print(f"Converting <{smi_file}> OK!")
        else:
            print(f"Converting <{smi_file}> Failure!")

 if __name__ == '__main__':
    main()
	import re
	import glob
	import csv
	from typing import List, Dict
	from pathlib import Path

	def make_file_list(extension: str) -> List[str]:
	return glob.glob(f"*.{extension}")

	def parse_chemical_formula(formula: str) -> Dict[str, str]:
	segments = re.findall(r'[A-Z][a-z][0-9]', formula)
	return {
	re.findall(r'[A-Z][a-z]*', segment)[0]: re.findall(r'[0-9]+', segment)[0] if re.findall(r'[0-9]+', segment) else '1'
	for segment in segments
	}

	def process_csv_file(input_file: str, chemical_elements: List[str]):
	output_file = Path(input_file).with_suffix('.CHNOSPSi.csv')

	with open(input_file, 'r', newline='') as infile, open(output_file, 'w', newline='') as outfile:
	reader = csv.reader(infile)
	writer = csv.writer(outfile)

	for row in reader:
	if row[3] == 'Formula':
	writer.writerow(row + chemical_elements)
	else:
	formula_dict = parse_chemical_formula(row[3])
	new_row = row + [formula_dict.get(element, '0') for element in chemical_elements]
	writer.writerow(new_row)

	def main():
	chemical_elements = ['C', 'H', 'N', 'O', 'S', 'P', 'Si']
	csv_files = make_file_list('csv')

	for file in csv_files:
	process_csv_file(file, chemical_elements)

	if __name__ == '__main__':
	main()
	import urllib.request
	import xml.etree.ElementTree as ET
	import os
	import gzip
	import shutil
	from typing import List, Optional

	def fetch_pdb_list(sequence: str, e_cutoff: str, resolution: str) -> List[str]:
	url: str = 'http://www.rcsb.org/pdb/rest/search'
	query_xml: str = f"""
	<?xml version="1.0" encoding="UTF-8"?>
	<orgPdbCompositeQuery version="1.0">
	<queryRefinement>
	<queryRefinementLevel>0</queryRefinementLevel>
	<orgPdbQuery>
	<queryType>org.pdb.query.simple.SequenceQuery</queryType>
	<sequence>{sequence}</sequence>
	<eCutOff>{e_cutoff}</eCutOff>
	<searchTool>blast</searchTool>
	</orgPdbQuery>
	</queryRefinement>
	<queryRefinement>
	<queryRefinementLevel>1</queryRefinementLevel>
	<orgPdbQuery>
	<queryType>org.pdb.query.simple.ResolutionQuery</queryType>
	<description>ResolutionQuery </description>
	<refine.ls_d_res_high.comparator>between</refine.ls_d_res_high.comparator>
	<refine.ls_d_res_high.max>{resolution}</refine.ls_d_res_high.max>
	</orgPdbQuery>
	</queryRefinement>
	<queryRefinement>
	<queryRefinementLevel>2</queryRefinementLevel>
	<orgPdbQuery>
	<queryType>org.pdb.query.simple.StructTitleQuery</queryType>
	<struct.title.comparator>!contains</struct.title.comparator>
	<struct.title.value>arp</struct.title.value>
	</orgPdbQuery>
	</queryRefinement>
	</orgPdbCompositeQuery>
	"""

	print("Querying PDB...\n")
	req: urllib.request.Request = urllib.request.Request(url, data=query_xml.encode())
	with urllib.request.urlopen(req) as response:
	result: str = response.read().decode()

	return result.split() if result else []

	def download_pdb_files(pdb_list: List[str], biological_unit: bool = True) -> None:
	pdb_url: str = "ftp://ftp.wwpdb.org/pub/pdb/data/biounit/coordinates/all/{0}.pdb1.gz" if biological_unit else "http://www.rcsb.org/pdb/files/{0}.pdb.gz"

	for pdb in pdb_list:
	print(f"Downloading {pdb}")
	url: str = pdb_url.format(pdb.lower())
	filename: str = f"{pdb}.pdb.gz"
	urllib.request.urlretrieve(url, filename)

	with gzip.open(filename, 'rb') as f_in:
	with open(f"{pdb}.pdb", 'wb') as f_out:
	shutil.copyfileobj(f_in, f_out)

	os.remove(filename)

	def generate_pymol_script(pdb_list: List[str]) -> None:
	with open('load.pml', 'w') as f:
	for pdb in pdb_list:
	f.write(f"load {os.getcwd()}/{pdb}.pdb\n")

	f.write("hide all\n")
	f.write("show cartoon\n")

	for i in range(1, len(pdb_list)):
	f.write(f"cealign {pdb_list[0]}, {pdb_list[i]}\n")

	def fetch_pdb_info(pdb_list: List[str]) -> None:
	report_url: str = "http://www.rcsb.org/pdb/rest/customReport?pdbids={0}&customReportColumns=structureId,structureTitle,resolution,experimentalTechnique,depositionDate,structureAuthor,classification,structureMolecularWeight&service=wsdisplay&format=xml"

	req: urllib.request.Request = urllib.request.Request(report_url.format(",".join(pdb_list)))
	with urllib.request.urlopen(req) as response:
	result: str = response.read().decode()

	if result:
	root: ET.Element = ET.fromstring(result)
	with open('list.txt', 'w') as f:
	for record in root.iter('record'):
	record_line: List[Optional[str]] = [field.text for field in record]
	f.write("\t".join(filter(None, record_line)) + "\n")
	else:
	print("Failed to retrieve PDB info")

	def main() -> None:
	sequence: str = """
	MSMILKEIRMNNFKSHVNSRIKFEKGIVAIIGENGSGKSSIFEAVFFALFGAGSNFNYDT
	IITKGKKSVYVELDFEVNGNNYKIIREYDSGRGGAKLYKNGKPYATTISAVNKAVNEILG
	VDRNMFLNSIYIKQGEIAKFLSLKPSEKLETVAKLLG
	"""
	resolution: str = "3"
	e_cutoff: str = "1e-30"

	pdb_list: List[str] = fetch_pdb_list(sequence, e_cutoff, resolution)

	if pdb_list:
	download_pdb_files(pdb_list)
	generate_pymol_script(pdb_list)
	fetch_pdb_info(pdb_list)
	else:
	print("No PDB structures found")

	if __name__ == "__main__":
	main()
	import sys
	import time
	import argparse
	from tqdm import tqdm

	def parse_arguments() -> argparse.Namespace:
	"""Parse command-line arguments."""
	parser = argparse.ArgumentParser(description='Set time of timer. It should be in minutes.')
	parser.add_argument('-t', '--time', type=int, default=25, help='Set pomodoro interval in minutes.')
	parser.add_argument('-b', '--break_time', type=int, default=5, help='Set break interval in minutes.')
	parser.add_argument('-r', '--repeat', type=int, default=3, help='Set number of pomodoro rounds.')
	return parser.parse_args()

	def run_pomodoro(pomodoro_time: int, break_time: int, rounds: int) -> None:
	"""Run the Pomodoro timer."""
	for round_number in range(1, rounds + 1):
	print(f'Starting Pomodoro {round_number}. May the focus be with you.')
	countdown(pomodoro_time, "Working")

	if round_number < rounds:
	print('Take a break!')
	countdown(break_time, "Break")

	print('Pomodoro timer is over. Great job!')

	def countdown(duration: int, label: str) -> None:
	"""Display a countdown timer."""
	for _ in tqdm(range(duration), desc=f"{label} Time"):
	time.sleep(1)

	def main() -> None:
	args = parse_arguments()
	pomodoro_time = args.time * 60 # Convert minutes to seconds
	break_time = args.break_time * 60 # Convert minutes to seconds
	rounds = args.repeat

	run_pomodoro(pomodoro_time, break_time, rounds)

	if __name__ == "__main__":
	main()