In your case, the data has a long tail, which is expected for gene expression data (lognormal).
data <- read.table(file='http://pastebin.com/raw.php?i=ZaGkPTGm',
header=TRUE, row.names=1)
mat <- as.matrix(data[,-1])
As can be seen from the distribution of quantiles, genes with the highest expression expand the range from 1.5 to 300.
quantile(mat)
# 0% 25% 50% 75% 100%
# 0.000 0.769 1.079 1.544 346.230
, , . , z-score, (). values == 0, , log(0) - undefined.
Z- () heatmap.2, , . , scale='row' , . ( / R).
, heatmap.2:
z <- t(scale(t(mat)))
quantile(z)
hclustfunc <- function(x) hclust(x, method="complete")
distfunc <- function(x) dist(x,method="maximum")
# obtain the clusters
fit <- hclustfunc(distfunc(z))
clusters <- cutree(fit, 5)
# require(gplots)
pdf(file='heatmap.pdf', height=50, width=10)
heatmap.2(z, trace='none', dendrogram='row', Colv=F, scale='none',
hclust=hclustfunc, distfun=distfunc, col=greenred(256), symbreak=T,
margins=c(10,20), keysize=0.5, labRow=data$Gene.symbol,
lwid=c(1,0.05,1), lhei=c(0.03,1), lmat=rbind(c(5,0,4),c(3,1,2)),
RowSideColors=as.character(clusters))
dev.off()
. , , lmat, lwid lhei.
( ):
