Source code for transplanttoolbox_victor.conversion_functions_for_VXM

#! usr/bin/python 

from __future__ import division, print_function, absolute_import
import argparse
import sys
import logging

import os
import re
import requests   
import operator
import glob
import transplanttoolbox_victor.vxm_hla
from transplanttoolbox_victor.vxm_hla import allele_truncate, locus_string_geno_list, expand_ac, single_locus_allele_codes_genotype


from transplanttoolbox_victor import __version__

__author__ = "Gragert Lab"
__copyright__ = "Gragert Lab"
__license__ = "gpl3"

_logger = logging.getLogger(__name__)


allele_to_ag_dict = {}
population_allele_frequencies = {}
allele_frequencies = {}
b_bw_dict = {}
bw4_list = []
bw6_list = []


unosEQags = []
agbw46 = {}
### Dictionary with alleles and equivalent antigens


BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
UNOS_conversion_table_filename = os.path.join(BASE_DIR,"transplanttoolbox_victor/UNOS_conversion_table_with_rules.csv")
UNOS_conversion_table_file = open(UNOS_conversion_table_filename, 'r')
for row in UNOS_conversion_table_file:
	expression_character = ""
	if row.startswith("Allele"):
		continue 
	else:
		allele = row.split(',')[0]
		allele_4d = transplanttoolbox_victor.vxm_hla.allele_truncate(allele)
		antigen = row.split(',')[1]
		unosEQags.append(antigen)
		rule = row.split(',') [2]
		bw4_6 = row.split(',')[3]
		if bw4_6 != "NA":
			agbw46[antigen] = bw4_6
		if bw4_6 == "Bw4":
			bw4_list.append(antigen)
		if bw4_6 == "Bw6":
			bw6_list.append(antigen)

		
		
	allele_to_ag_dict[allele] = antigen, rule, bw4_6 
	allele_to_ag_dict[allele_4d] = antigen, rule, bw4_6

bw4_list = list(set(bw4_list))
bw6_list = list(set(bw6_list))

unosagslist = list(set(unosEQags))

#print(unosagslist)
#print(len(unosagslist))

b_bw_dict["Bw4"] = bw4_list
b_bw_dict["Bw6"] = bw6_list

#print(agbw46)

race_list = ["AAFA", "AFA", "CAU", "HIS", "NAM", "AFB", "AINDI", "API",
			 "AISC", "ALANAM", "AMIND", "CARB", "CARHIS", "CARIBI", 
			"EURCAU", "FILII", "HAWI", "JAPI", "KORI", "MENAFC", "MSWHIS", "NCHI", "SCAHIS", "SCAMB", "SCSEAI", "VIET"] 

#print(b_bw_dict)	

for pop in race_list:
	file = BASE_DIR + "/transplanttoolbox_victor/freqs_6loc/" + pop + ".ARS.freqs"
	freq_file = open(file, 'r')
	for line in freq_file:
		if line.startswith("Haplo"):
			continue
		else:
			line_split = line.split(",")
			allele_list = line_split[0]
			count = line_split[1]
			haplotype_frequency = line_split[2]
			allele_split = allele_list.split("~")

			for allele in allele_split:
				allele = allele.rstrip("g")
				key = pop + "%" + allele
				if key in population_allele_frequencies:
					population_allele_frequencies[key] += float(haplotype_frequency)
				else:
					population_allele_frequencies[key] = float(haplotype_frequency)



#print(population_allele_frequencies)
[docs]def convert_allele_list_to_ags(hla_allele_list): """This function can be called if a list of alleles has to be converted to antigens. Input format is a list and the corresponding antigens and rules will be printed out""" allele_list_dict = {} ag_list = [] bw4_6_list = [] for allele in hla_allele_list: allele = allele.rstrip("p P g G") if allele in allele_to_ag_dict: ag = allele_to_ag_dict[allele][0] ag_list.append(ag) bw4_6 = allele_to_ag_dict[allele][2] if bw4_6 != "NA": bw4_6_list.append(bw4_6) else: continue bw46_list = list(set(bw4_6_list)) ags = ag_list + bw46_list return ags
[docs]def gl_string_ags(gl_string, pop): gl_dict = {} gl_string = gl_string.replace("HLA-", "") locus_split = gl_string.split("^") ag_freq_1 = 0.0 ag_freq_2 = 0.0 geno_antigen_freq = {} ag_list = "" One_locus_typing = 0 Two_locus_typing = 0 Three_locus_typing = 0 Four_locus_typing = 0 Five_locus_typing = 0 Six_locus_typing = 0 if len(locus_split) == 1: One_locus_typing = 1 print("One locus typing") geno_antigen_freq = {} a_locus = locus_split[0] a_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(a_locus) a_ags = genotype_ags(a_genotype_list,pop) ag_list = a_ags if len(locus_split) == 2: Two_locus_typing = 1 print("Two locus typing") geno_antigen_freq = {} a_locus = locus_split[0] a_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(a_locus) a_ags = genotype_ags(a_genotype_list,pop) geno_antigen_freq = {} b_locus = locus_split[1] b_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(b_locus) b_ags = genotype_ags(b_genotype_list,pop) ag_list = a_ags + b_ags if len(locus_split) == 3: Three_locus_typing = 1 print("Three locus typing") geno_antigen_freq = {} a_locus = locus_split[0] a_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(a_locus) a_ags = genotype_ags(a_genotype_list,pop) geno_antigen_freq = {} b_locus = locus_split[1] b_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(b_locus) b_ags = genotype_ags(b_genotype_list,pop) geno_antigen_freq = {} c_locus = locus_split[2] c_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(c_locus) c_ags = genotype_ags(c_genotype_list,pop) ag_list = a_ags + b_ags + c_ags if len(locus_split) == 4: Four_locus_typing = 1 print("Four locus typing") geno_antigen_freq = {} a_locus = locus_split[0] a_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(a_locus) a_ags = genotype_ags(a_genotype_list,pop) geno_antigen_freq = {} b_locus = locus_split[1] b_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(b_locus) b_ags = genotype_ags(b_genotype_list,pop) geno_antigen_freq = {} c_locus = locus_split[2] c_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(c_locus) c_ags = genotype_ags(c_genotype_list,pop) geno_antigen_freq = {} dr_locus = locus_split[3] dr_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(dr_locus) dr_ags = genotype_ags(dr_genotype_list,pop) ag_list = a_ags + b_ags + c_ags + dr_ags if len(locus_split) == 5: Five_locus_typing = 1 print("Five locus typing") geno_antigen_freq = {} a_locus = locus_split[0] a_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(a_locus) a_ags = genotype_ags(a_genotype_list,pop) geno_antigen_freq = {} b_locus = locus_split[1] b_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(b_locus) b_ags = genotype_ags(b_genotype_list,pop) geno_antigen_freq = {} c_locus = locus_split[2] c_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(c_locus) c_ags = genotype_ags(c_genotype_list,pop) geno_antigen_freq = {} dr_locus = locus_split[3] dr_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(dr_locus) dr_ags = genotype_ags(dr_genotype_list,pop) geno_antigen_freq = {} dqb_locus = locus_split[4] dqb_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(dqb_locus) dqb_ags = genotype_ags(dqb_genotype_list,pop) ag_list = a_ags + b_ags + c_ags + dr_ags + dqb_ags if len(locus_split) == 6: Six_locus_typing = 1 print("Six locus typing") geno_antigen_freq = {} a_locus = locus_split[0] a_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(a_locus) a_ags = genotype_ags(a_genotype_list,pop) geno_antigen_freq = {} b_locus = locus_split[1] b_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(b_locus) b_ags = genotype_ags(b_genotype_list,pop) geno_antigen_freq = {} c_locus = locus_split[2] c_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(c_locus) c_ags = genotype_ags(c_genotype_list,pop) geno_antigen_freq = {} dr_locus = locus_split[3] dr_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(dr_locus) dr_ags = genotype_ags(dr_genotype_list,pop) geno_antigen_freq = {} dqb_locus = locus_split[4] dqb_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(dqb_locus) dqb_ags = genotype_ags(dqb_genotype_list,pop) geno_antigen_freq = {} dr345_locus = locus_split[5] dr345_genotype_list = transplanttoolbox_victor.vxm_hla.locus_string_geno_list(dr345_locus) dr345_ags = genotype_ags(dr345_genotype_list,pop) ag_list = a_ags + "," + b_ags + "," + c_ags + "," + dr_ags + "," + dqb_ags + "," + dr345_ags #print(ag_list) #ages = ag_list[0::3] #bw46_list = ag_list[1::3] #probs = ag_list[2::3] #gl_dict = {"GL_string" : gl_string, "UNOS antigens": ages, "Bw4/6 epitopes": bw46_list, "Antigen Probablities": probs } #print(ag_list) return ag_list
[docs]def genotype_ags(genotype_list, pop): ag_freq_1 = 0.0 ag_freq_2 = 0.0 geno_antigen_freq = {} for genotype in genotype_list: allele_1 = genotype.split("+")[0] allele_1 = allele_1.rstrip("g p P G") allele_1 = transplanttoolbox_victor.vxm_hla.allele_truncate(allele_1) allele_pop1 = pop + "%" + allele_1 allele_2 = genotype.split("+")[1] allele_2 = allele_2.rstrip("g p P G") allele_2 = transplanttoolbox_victor.vxm_hla.allele_truncate(allele_2) allele_pop2 = pop + "%" + allele_2 ag_1 = allele_to_ag_dict[allele_1][0] bw46_1 = allele_to_ag_dict[allele_1][2] ag_2 = allele_to_ag_dict[allele_2][0] bw46_2 = allele_to_ag_dict[allele_2][2] if allele_1 in population_allele_frequencies: ag_freq_1 = population_allele_frequencies[allele_pop1] if allele_2 in population_allele_frequencies: ag_freq_2 = population_allele_frequencies[allele_pop2] gf = 0 if (ag_1 == ag_2): gf = float(ag_freq_1) * float(ag_freq_2) else: gf = 2 * float(ag_freq_1) * float(ag_freq_2) geno_antigen = ag_1 + "+" + ag_2 if bw46_1 != "NA": geno_antigen = geno_antigen + "+" + bw46_1 if bw46_2 != "NA": geno_antigen = geno_antigen + "+" + bw46_2 #print(geno_antigen) if geno_antigen in geno_antigen_freq.keys(): geno_antigen_freq[geno_antigen] += float(gf) else: geno_antigen_freq[geno_antigen] = float(gf) TF = sum(geno_antigen_freq.values()) if TF == 0.0: TF = 1 else: TF = TF for i,j in geno_antigen_freq.items(): ag_probs = j/TF geno_antigen_freq[i] = round(ag_probs, 4) #print(geno_antigen_freq) sorted_gf = sorted(geno_antigen_freq.items(), key = operator.itemgetter(1), reverse = True) #print(sorted_gf) #if len(sorted_gf) == 1: #ag_prob = 1 #else: #antigen_list = sortef_gf[1::2] #print(sorted_gf) # top_ag_geno = sorted_gf[0][0] # top_gf = sorted_gf[0][1] #print(top_ag_geno) # ag_1 = top_ag_geno.split("+")[0] # ag_2 = top_ag_geno.split("+")[1] #print(ag_1) #print(ag_2) # ag_list = ag_1 + "," + ag_2 #bw46_list = bw46_1 + "," + bw46_2 return (sorted_gf)
[docs]def allele_freq(allele_list, pop): allele_pop_freqs = {} for i in allele_list: allele_pop_key = pop + "%" + i if allele_pop_key in population_allele_frequencies: allele_pop_freqs[i] = population_allele_frequencies[allele_pop_key] else: allele_pop_freqs[i] = 0 return allele_pop_freqs
[docs]def allele_code_ags(allele_codes_list, pop): ag_freq_1 = 0.0 ag_freq_2 = 0.0 geno_antigen_freq = {} ag_list = "" if len(allele_codes_list) == 2: print("One locus typing") One_locus_typing = 1 A_1_code = allele_codes_list[0] A_2_code = allele_codes_list[1] A_codes_pair = [A_1_code, A_2_code] geno_antigen_freq = {} acodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(A_codes_pair) a_ags = genotype_ags(acodes_genotype, pop) ag_list = a_ags if len(allele_codes_list) == 4: print("Two locus typing") Two_locus_typing = 1 A_1_code = allele_codes_list[0] A_2_code = allele_codes_list[1] A_codes_pair = [A_1_code, A_2_code] geno_antigen_freq = {} acodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(A_codes_pair) a_ags = genotype_ags(acodes_genotype, pop) C_1_code = allele_codes_list[2] C_2_code = allele_codes_list[3] C_codes_pair = [C_1_code, C_2_code] geno_antigen_freq = {} ccodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(C_codes_pair) c_ags = genotype_ags(ccodes_genotype, pop) ag_list = a_ags + c_ags if len(allele_codes_list) == 6: print("Three locus typing") Three_locus_typing = 1 A_1_code = allele_codes_list[0] A_2_code = allele_codes_list[1] A_codes_pair = [A_1_code, A_2_code] geno_antigen_freq = {} acodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(A_codes_pair) a_ags = genotype_ags(acodes_genotype, pop) C_1_code = allele_codes_list[2] C_2_code = allele_codes_list[3] C_codes_pair = [C_1_code, C_2_code] geno_antigen_freq = {} ccodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(C_codes_pair) c_ags = genotype_ags(ccodes_genotype, pop) B_1_code = allele_codes_list[4] B_2_code = allele_codes_list[5] B_codes_pair = [B_1_code, B_2_code] geno_antigen_freq = {} bcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(B_codes_pair) b_ags = genotype_ags(bcodes_genotype, pop) ag_list = a_ags + b_ags + c_ags if len(allele_codes_list) == 8: print("Four locus typing") Four_locus_typing = 1 A_1_code = allele_codes_list[0] A_2_code = allele_codes_list[1] A_codes_pair = [A_1_code, A_2_code] geno_antigen_freq = {} acodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(A_codes_pair) a_ags = genotype_ags(acodes_genotype, pop) C_1_code = allele_codes_list[2] C_2_code = allele_codes_list[3] C_codes_pair = [C_1_code, C_2_code] geno_antigen_freq = {} ccodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(C_codes_pair) b_ags = genotype_ags(ccodes_genotype, pop) B_1_code = allele_codes_list[4] B_2_code = allele_codes_list[5] B_codes_pair = [B_1_code, B_2_code] geno_antigen_freq = {} bcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(B_codes_pair) c_ags = genotype_ags(bcodes_genotype, pop) dr_1_code = allele_codes_list[6] dr_2_code = allele_codes_list[7] dr_codes_pair = [dr_1_code, dr_2_code] geno_antigen_freq = {} drcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(dr_codes_pair) dr_ags = genotype_ags(drcodes_genotype, pop) ag_list = a_ags + b_ags + c_ags + dr_ags if len(allele_codes_list) == 10: print("Five locus typing") Five_locus_typing = 1 A_1_code = allele_codes_list[0] A_2_code = allele_codes_list[1] A_codes_pair = [A_1_code, A_2_code] geno_antigen_freq = {} acodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(A_codes_pair) a_ags = genotype_ags(acodes_genotype, pop) C_1_code = allele_codes_list[2] C_2_code = allele_codes_list[3] C_codes_pair = [C_1_code, C_2_code] geno_antigen_freq = {} ccodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(C_codes_pair) c_ags = genotype_ags(ccodes_genotype, pop) B_1_code = allele_codes_list[4] B_2_code = allele_codes_list[5] B_codes_pair = [B_1_code, B_2_code] geno_antigen_freq = {} bcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(B_codes_pair) b_ags = genotype_ags(bcodes_genotype, pop) dr_1_code = allele_codes_list[6] dr_2_code = allele_codes_list[7] dr_codes_pair = [dr_1_code, dr_2_code] geno_antigen_freq = {} drcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(dr_codes_pair) dr_ags = genotype_ags(drcodes_genotype, pop) dqb_1_code = allele_codes_list[8] dqb_2_code = allele_codes_list[9] dqb_codes_pair = [dqb_1_code, dqb_2_code] geno_antigen_freq = {} dqbcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(dqb_codes_pair) dqb_ags = genotype_ags(dqbcodes_genotype, pop) ag_list = a_ags + b_ags + c_ags + dr_ags + dqb_ags if len(allele_codes_list) == 12: print("Six locus typing") Six_locus_typing = 1 A_1_code = allele_codes_list[0] A_2_code = allele_codes_list[1] A_codes_pair = [A_1_code, A_2_code] geno_antigen_freq = {} acodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(A_codes_pair) a_ags = genotype_ags(acodes_genotype, pop) C_1_code = allele_codes_list[2] C_2_code = allele_codes_list[3] C_codes_pair = [C_1_code, C_2_code] geno_antigen_freq = {} ccodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(C_codes_pair) c_ags = genotype_ags(ccodes_genotype, pop) B_1_code = allele_codes_list[4] B_2_code = allele_codes_list[5] B_codes_pair = [B_1_code, B_2_code] geno_antigen_freq = {} bcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(B_codes_pair) b_ags = genotype_ags(bcodes_genotype, pop) dr_1_code = allele_codes_list[6] dr_2_code = allele_codes_list[7] dr_codes_pair = [dr_1_code, dr_2_code] geno_antigen_freq = {} drcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(dr_codes_pair) dr_ags = genotype_ags(drcodes_genotype, pop) dqb_1_code = allele_codes_list[8] dqb_2_code = allele_codes_list[9] dqb_codes_pair = [dqb_1_code, dqb_2_code] geno_antigen_freq = {} dqbcodes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(dqb_codes_pair) dqb_ags = genotype_ags(dqbcodes_genotype, pop) dr345_1_code = allele_codes_list[10] dr345_2_code = allele_codes_list[11] dr345_codes_pair = [dr345_1_code, dr345_2_code] geno_antigen_freq = {} dr345codes_genotype = transplanttoolbox_victor.vxm_hla.single_locus_allele_codes_genotype(dr345_codes_pair) dr345_ags = genotype_ags(dr345codes_genotype, pop) ag_list = a_ags + b_ags + c_ags + dr_ags + dqb_ags + dr345_ags #ages = ag_list[0::3] #bw46_list = ag_list[1::3] #probs = ag_list[2::3] #al_dict = {"Allele Codes" : allele_codes_list, "Antigens": ages, "Bw4/6 epitopes": bw46_list, "Antigen Probablities": probs} print(ag_list) return ag_list
[docs]def parse_args(args): """Parse command line parameters Args: args ([str]): command line parameters as list of strings Returns: :obj:`argparse.Namespace`: command line parameters namespace """ parser = argparse.ArgumentParser( description="Just a Fibonnaci demonstration") parser.add_argument( '--version', action='version', version='allan {ver}'.format(ver=__version__)) parser.add_argument( dest="n", help="n-th Fibonacci number", type=int, metavar="INT") parser.add_argument( '-v', '--verbose', dest="loglevel", help="set loglevel to INFO", action='store_const', const=logging.INFO) parser.add_argument( '-vv', '--very-verbose', dest="loglevel", help="set loglevel to DEBUG", action='store_const', const=logging.DEBUG) return parser.parse_args(args)
[docs]def setup_logging(loglevel): """Setup basic logging Args: loglevel (int): minimum loglevel for emitting messages """ logformat = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s" logging.basicConfig(level=loglevel, stream=sys.stdout, format=logformat, datefmt="%Y-%m-%d %H:%M:%S")
[docs]def main(args): """Main entry point allowing external calls Args: args ([str]): command line parameter list """ args = parse_args(args) setup_logging(args.loglevel) _logger.debug("Starting crazy calculations...") print("The {}-th Fibonacci number is {}".format(args.n, fib(args.n))) _logger.info("Script ends here")
[docs]def run(): """Entry point for console_scripts """ main(sys.argv[1:])
if __name__ == "__main__": run()