#!/usr/bin/env Rscript

library('getopt')

params=c(
	"zero_flag", "z", 0, "logical",		# if specified eliminate rows that are all 0/blank/null or sum to zero
	"col_only_flag", "C", 0, "logical",	# if specified only remove columns not rows
	"input_file", "i", 1, "character",	# file containing matrix/data frame/table with row and column headers
	"output_file", "o", 1, "character",	# output file name to write subset of input matrix
	"column_labels", "c", 1, "character",	# column names to keep
	"row_labels", "r", 2, "character"	# row names to keep (optional if not specified then same as column names
)

opt=getopt(spec=matrix(params, ncol=4, byrow=TRUE), debug=FALSE)

script_name=unlist(strsplit(commandArgs(FALSE)[4],"=")[1])[2]

usage = paste (
	"\nUsage:\n\n", script_name,
	"\n",
	"	-i \n",
	"	-o \n",
	"	-c \n",
	"	-r \n",
	"	-C \n",
	"	-z \n",
	"\n",	
	"This script will generate the specified subset of the inputted matrix.\n",
	"\n",
	"\n")

if((!length(opt$input_file)) | (!length(opt$output_file))){
	cat(usage)
	q(status=-1)
}

###############################################################################

OutputFileName <- opt$output_file
#OutputFileNameTXT <- paste(OutputFileName, ".txt", sep="")
options(width=160)
#sink(OutputFileNameTXT)

InputFileName <- opt$input_file
cat("\nInput File Name: ", InputFileName, "\n\n")
cat("\nOutput File Name: ", OutputFileName, "\n\n")

ColsToKeep <- scan(file=opt$column_labels, what="character")
cat("\nColumns to keep: ", "\n")
print(ColsToKeep)

if (is.null(opt$row_labels)) {
	RowsToKeep <- ColsToKeep
} else {
	RowsToKeep <- scan(file=opt$row_labels, what="character")
}

if (is.null(opt$cols_only_flag)) {
	cat("\nRows to keep: ", "\n")
	print(RowsToKeep)
}

###############################################################################

###############################################################################
# Load data - assumes all values in table are numeric to save memory on input
colClasses="numeric"
# header=TRUE assumes there is a header line with column genome names
# row.names=1 specifies that the first column is a genome name for the row
input_dataframe <- read.delim(InputFileName, sep="\t", header=TRUE, check.names=FALSE, comment.char="", quote="", row.names=1, strip.white=TRUE)

if (is.null(opt$cols_only_flag)) {
	sub_dataframe <- input_dataframe[RowsToKeep,ColsToKeep]
} else {
	sub_dataframe <- input_dataframe[ ,ColsToKeep]
}

write.table(sub_dataframe, file=OutputFileName, sep="\t", quote=FALSE)