#!/usr/bin/env python #David Depierre,Taris Raphael """ From a depth text file produced with samtools, create contig and give in return: chromosome name, Start and end of the contig, start and end from the most represented locus seperated by tabulation and output a text file version: 0.1.0 """ import os import re import subprocess import sys import tempfile def stop_err(msg): sys.stderr.write(msg) sys.exit() def Contig_locus_count(infile,fname): """Read a .txt file and find contig and loci. """ in_handle = open(infile)#read the file in input out_handle = open(fname, "w")#right a file in output bestNbRead=[0,0,0,0] #start,end,numberOfReads,length inContig=False inLoci=False tmpLength=0 tmpStart=0 lastPos=0 contigInf=["",0,0] #namecontig,start,end for line in in_handle: splitline=line.split("\t") if inContig==False:#Initiate the contig if int(splitline[2])>0: inContig=True bestNbRead=[int(splitline[1]),0,int(splitline[2]),1] contigInf=[splitline[0],int(splitline[1]),0] inLoci=True tmpLength=1 else:#When we enter in a contig if int(splitline[1])==lastPos+1:#Verify that the current position is equal to the last +1 if inLoci==True:#construct the best Loci if int(splitline[2])>bestNbRead[2]: bestNbRead=[int(splitline[1]),int(splitline[1]),int(splitline[2]),1] tmpLength=1 elif int(splitline[2])==bestNbRead[2]: tmpLength+=1 bestNbRead[1]=int(splitline[1]) elif int(splitline[2])bestNbRead[2]:#read in the contig if a loci is more represented. bestNbRead=[int(splitline[1]),int(splitline[1]),int(splitline[2]),1] tmpLength=1 inLoci=True elif int(splitline[2])==bestNbRead[2]: tmpLength+=1 if tmpLength==1: tmpStart=int(splitline[1]) elif tmpLength>bestNbRead[3]: bestNbRead=[tmpStart,int(splitline[1]),int(splitline[2]),tmpLength] inLoci=True tmpStart=0 elif int(splitline[2])