#!/usr/bin/env Rscript

argv <- commandArgs(TRUE)
if (length(argv) != 2) {
  cat("Usage: rsem-for-ebseq-generate-ngvector-from-clustering-info input_file output_file\n")
  q(status = 1)
}

data <- read.table(file = argv[1], stringsAsFactors = F, sep = "\t")
idx <- data[,2] >= 0
kmr <- kmeans(data[idx, 2], 3)
order <- order(kmr$centers)

ngvec <- rep(0, length(idx))
ngvec[idx] <- order[kmr$cluster]
ngvec[!idx] <- 3

write.table(ngvec, file = argv[2], row.names = F, col.names = F)
