import sys from fuzzywuzzy import fuzz from fuzzywuzzy import process #arguments test : lectures_maj.txt structures.txt output.txt def stop_err( msg ): sys.stderr.write( msg ) sys.exit() def reverse_strand(strand): #Given a RNA strand string, it returns its reversed strand rev_strand=[] i=1 for base in strand : if base == "A" : rev_strand.append("U") if base == "U" : rev_strand.append("A") if base == "G" : rev_strand.append("C") if base == "C" : rev_strand.append("G") i=i+1 return str(rev_strand[::-1]).replace(',','').replace("'","").replace('[','').replace(']','').replace(' ','') def __main__(): #converting input files into maps try: f_1=open(sys.argv[1]) except: stop_err( 'Cannot open a file\n' ) lect_maj={} for line in f_1: line=line.replace("\n","") if ">" in line : key=line[5:] else : lect_maj[key]=line f_1.close() try: f_2=open(sys.argv[2]) except: stop_err( 'Cannot open a file\n' ) structures={} for line in f_2: line=line.replace("\n","") if ">" in line : key=line.replace(">","") structures[key]=[] else : structures[key].append(line) f_2.close() #for each structure, it returns the reads that are included at 75% or more in the structure f_output=open(sys.argv[3],"w") scores_list=[] for i in structures.keys(): struct=structures[i][0] for j in lect_maj.keys() : lect=lect_maj[j] similarity_score_forward=fuzz.partial_ratio(lect,struct) similarity_score_reverse=fuzz.partial_ratio(reverse_strand(lect),struct) if similarity_score_forward > similarity_score_reverse : similarity_score=similarity_score_forward else : similarity_score=similarity_score_reverse if similarity_score>= 75 : line="%s\t%s\t%s\t%s\n"%(i,j,similarity_score,lect) f_output.write(line) scores_list.append(similarity_score) f_output.close() f_output_infos=open("informations.txt","w") f_output_infos.write("%s miRNA reads are found to be good candidates of hairpin structures.\n"%len(scores_list)) f_output_infos.write("Some miRNA may be selected multiple times if they are good candidates for more than one hairpin structure.\n%s miRNA were tested with %s hairpin structures.\n"%(len(lect_maj.keys()),len(structures.keys()))) f_output_infos.write("The minimal similarity score is %s.\n"%min(scores_list)) f_output_infos.write("The maximal similarity score is %s and is found %s times.\n"%(max(scores_list),scores_list.count(max(scores_list)))) f_output_infos.write("The average similarity score is %s."%(sum(scores_list)/len(scores_list))) f_output_infos.close() if __name__ == "__main__": __main__()