####################################################### ################# ###################### ################# UI Function ###################### ################# ###################### ####################################################### ui<-navbarPage("Metabolomics Statisitcal Analysis R Shiny App 0.1", tabPanel("Load all Packages", helpText("All packages must be initialised before uploading the data."), actionButton("loadPack","Load All Packages"), helpText("After clicking on the Load All Packages button. Please wait unitl the application displays below \"TRUE\" for all the packages"), tableOutput("pacL") ), tabPanel("Upload Data", sidebarLayout( sidebarPanel( radioButtons("radioButtons1", label = h3("Data Type"), choices = list("Concentration" = 1, "Spectral Bins" = 2, "Intensity Table" = 3), selected = 1), selectInput("select", label = h3("Format"), choices = list("Samples in rows (unpaired)" = 1, "Samples in columns (unpaired)" = 2, "Samples in rows (paired)" = 3, "Samples in columns (paired)"=4), selected = 1), fileInput('file1', 'Choose CSV File', accept=c('text/csv', 'text/comma-separated-values,text/plain', '.csv')) ), mainPanel(textOutput('contents')) ) ), navbarMenu("Data Processing", tabPanel("Missing value estimation", helpText(" Too many missing values will cause difficulties for downstream analysis. There are several different methods for this purpose. The default method replaces all the missing values with a small values (the half of the minimum positive values in the original data) assuming to be the detection limit. Move onto Normalization if you want to use the default method. The assumption of this approach is that most missing values are caused by low abundance metabolites (i.e.below the detection limit)." ), helpText("The functions in MetaboAnalyst also offers other methods, such as replace by mean/median, k-nearest neighbour (KNN), probabilistic PCA (PPCA), Bayesian PCA (BPCA) method, Singular Value Decomposition (SVD) method to impute the missing values. Please choose the one that is the most appropriate for your data." ), sidebarPanel(h4("Step 1. Remove features with too many missing values"), checkboxInput("missValue1", label = "Remove features with > x % missing values", value = TRUE), numericInput("missValue2", label=NULL, value=50) ), sidebarPanel(h4("Step 2. Estimate the remaining missing values (Select only 1 of the following 4 options"), checkboxInput("missValue3", label = "Replace by a small value (half of the minimum positive value in the original data)", value = TRUE), checkboxInput("missValue4", label = "Exclude variables with missing values", value = FALSE), selectInput("missValue5", label=h5("Replace by column (feature)"), choices = list("None"=1, "Mean"=2, "Median"=3,"Min"=4), selected =1), selectInput("missValue6", label=h5("Estimate missing values using"), choices = list("None"=1, "KNN"=2, "PPCA"=3,"BPCA"=4,"SVD Impute"=5), selected =1) ), actionButton("calc1","Process"), h3(textOutput("MVtext1")) ) ), tabPanel("Normailsation", sidebarLayout( sidebarPanel(h3("Sample Normalization"), radioButtons("radioButtons2", label = h4("Sample normalization"), choices = list("None" = 1, "Normalization by sum" = 2, "Normalization by median" = 3, "Normalization by a specific reference sample"=4, "Normalization by a pooled sample from group"=5, "Normalization by reference feature"= 6 ), selected = 1), uiOutput("refsample"), uiOutput("poolsample"), uiOutput("refFeat"), radioButtons("radioButtons3", label = h4("Data transform"), choices = list("None" = 1, "log transform" = 2, "cube root transform" = 3), selected = 1), radioButtons("radioButtons4", label = h4("Data scaling"), choices = list("None" = 1, "Mean Centering"=2, "Auto scaling" = 3, "Pareto scaling" = 4, "Range scaling" = 5, "Vast scaling"=6), selected = 1) ), mainPanel(actionButton("go","Update"), plotOutput("normPlot",width = "600", height = "800") ) ) ), navbarMenu("Univariate Analysis", tabPanel("Fold Change Analysis", h3("Fold Change Analysis"), helpText("Fold change (FC) analysis is to compare the absolute value change between two group means. Since column-wise normalization (i.e. log transformation, mean-centering) will significantly change the absolute values, FC is calculated as the ratio between two group means using data before column-wise normilzation was applied." ), helpText("For paired analysis, the program first counts the number of pairs with consistent change above the given FC threshold. If this number exceeds a given count threshold, the variable will be reported as significant." ), uiOutput("FCAnalT"), numericInput("FCThresh", label=h5("Fold Change Threshhold"), value=2 ), uiOutput("SigCountT"), uiOutput("ComparType"), actionButton("go4", "Update"), plotOutput("FCPlot"), dataTableOutput('FCtable1') ), tabPanel("T-Test", h3("T-Test"), helpText("Note, for large data set (> 1000 variables), both the paired information and the group variance will be ignored, and the default parameters will be used for t-tests to save computational time. If you choose non-parametric tests (Wilcoxon rank-sum test), the group variance will be ignored." ), uiOutput("TTAnalType"), numericInput("TTestP", label=h5("P-Value"), value=0.05 ), selectInput("grpVar", label = h5("Group variance"), choices = list("Equal" = 1, "Unequal" = 2), selected = 1), checkboxInput("NPT1", label = h5("Non-parametric tests:"), value = FALSE), # sidebarPanel(h5("Color input for the T-test graph"), textInput("Tcolo1", label=h5("Color 1:"), value="red" ), textInput("Tcolo2", label=h5("Color 2:"), value="green" ), helpText("This applications supports the following colors: "), helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\",\"grey\",\"gray\", \"lightblue\", \"limegreen\",\"magenta\", \"orange\",\"pink\", \"purple\", \"violet\", \"yellow\""), #), actionButton("go2","Update/Plot"), plotOutput("PlotTT", click ="plot_click1"), helpText("Click the points on the graph to display an interval plot and boxplot plot for that compound"), selectInput("tIntCalc", label = h5("Statistic Shown for the Interval Plot"), choices = list("se" = 1, "sd" = 2), selected = 1), uiOutput("color1"), splitLayout(cellWidths = c("50%", "50%"), plotOutput("PlotTT2", width = "500", height = "500"), plotOutput("PlotBox1", width="400", height="500") ), verbatimTextOutput("info1"), dataTableOutput('TTtable1') ), tabPanel("One-way Analysis of Variance (ANOVA)", h3("One-way ANOVA & post-hoc Tests "), helpText("You can choose to perform one-way ANOVA or its non-parametric version (Kruskal Wallis Test). Note, the post-hoc tests have only been implemented for parametric version."), checkboxInput("NPT2", label = h5("Non-parametric tests:"), value = FALSE), numericInput("ANOVAP", label=h5("Significant Level (alpha): raw p-vlaue < "), value=0.05 ), selectInput("PHA", label = h5("Post-hoc analysis:"), choices = list("Fisher's LSD" = 1, "Tuckey's HSD" = 2), selected = 1), textInput("Acolo1", label=h5("Color 1:"), value="red" ), textInput("Acolo2", label=h5("Color 2:"), value="green" ), helpText("This applications supports the following colors: "), helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\",\"grey\",\"gray\", \"lightblue\", \"limegreen\",\"magenta\", \"orange\",\"pink\", \"purple\", \"violet\", \"yellow\""), actionButton("go3","Update/Plot"), plotOutput("PlotAOV", click ="plot_click2"), helpText("Click the points on the graph to display an interval plot and boxplot plot for that compound"), selectInput("aIntCalc", label = h5("Statistic Shown for the Interval Plot"), choices = list("se" = 1, "sd" = 2), selected = 1), uiOutput("color2"), splitLayout(cellWidths = c("50%", "50%"), plotOutput("PlotAOV2", width = "500", height = "500"), plotOutput("PlotBox2", width="400", height="500") ), dataTableOutput('AOVtable1') ), tabPanel("Volcano plot", h3("Volcano plot"), helpText("The volcano plot is a combination of fold change and t-tests. Note, for unpaired samples, the x-axis is log (FC). For paired analysis, the x-axis is number of significant counts. Y-axis is -log10(p.value) for both cases."), uiOutput("VolAnalT"), sidebarPanel(h3("X-axis:"), numericInput("VolThresh", label=h5("Fold Change Threshhold"), value=2), uiOutput("VolComparType"), uiOutput("VolSigCountT") ), sidebarPanel(h3("Y-axis:"), checkboxInput("VolNPT", label = h5("Non-parametric tests:"), value = FALSE), numericInput("VolP", label=h5("P value threshold:"), value=0.1), selectInput("VolgrpVar", label = h5("Group variance"), choices = list("Equal" = 1, "Unequal" = 2), selected = 1) ), actionButton("go5","Update/Plot"), selectInput("vIntCalc", label = h5("Statistic Shown for the Interval Plot"), choices = list("se" = 1, "sd" = 2), selected = 1), uiOutput("color3"), helpText("This applications supports the following colors: "), helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\", \"grey\", \"gray\", \"lightblue\", \"limegreen\", \"magenta\", \"orange\", \"pink\", \"purple\", \"violet\", \"yellow\""), plotOutput("PlotVol",click ="plot_click3"), splitLayout(cellWidths = c("50%", "50%"), plotOutput("PlotVol2", width = "500", height = "500"), plotOutput("PlotVol3", width="400", height="500") ), verbatimTextOutput("info2"), dataTableOutput('VOLtable1') ), tabPanel("Correlation Analysis", h3("Correlation Analysis"), helpText("Note, the heatmap will only show correlations for a maximum of 1000 features. For larger datasets, only top 1000 features will be selected based on their interquantile range (IQR). When color distribution is fixed, you can potentially compare the correlation patterns among different data sets. In this case, you can choose \"do not perform clustering\" for all data set, or only to perform clustering on a single reference data set, then manually re-arranged other data sets according to the clustering pattern of the reference data set. "), selectInput("distM", label = h5("Distance Measure"), choices = list("Pearson r" = 1, "Spearman rank correlation" = 2, "Kendall rank correlation"=3), selected = 1), radioButtons("viewM", label = h5("View Mode"), choices = list("Overview" = 1, "Detailed View" = 2), selected = 1), checkboxInput("fixColD", label = h5("Fix color distribution[-1,1]:"), value = FALSE), selectInput("colorCon", label = h5("Color Contrast"), choices = list("Default"=1, "Red/Green"=2, "Heat Color"=3, "Topo Color"=4, "Gray Scale"=5,"Red/White/Blue"=6, "Red/White/Green"=7, "White/Navy/Blue"=8) ), checkboxInput("performClus", label = h5("Do no perform clustering:"), value = FALSE), sidebarPanel( sliderInput("CorrHeatWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("CorrHeatHeight", "Plot Height (px)", min = 0, max = 1500, value = 500) ), actionButton("go6","Update"), uiOutput("corrHeat") ), tabPanel("Pattern Searching", h3("Pattern Searching"), helpText("Correlation analysis can be performed either against a given feature or against a given pattern. The pattern is specified as a series of numbers separated by \"-\". Each number corresponds to the expected expression pattern in the corresponding group. For example, a 1-2-3-4 pattern is used to search for features that increase linearly with time in a time-series data with four time points (or four groups). The order of the groups is given as the first item in the predefined patterns. "), radioButtons("pattern", label = h5("Define a pattern using:"), choices = list("a feature of interest:" = 1, "a predefined profile:"=2, "a custom profile:" = 3), selected = 1), uiOutput("interestFt"), uiOutput("profile"), textInput("customPro", h5("Custom Profile input") ), selectInput("distM2", label = h5("Distance Measure"), choices = list("Pearson r" = 1, "Spearman rank correlation" = 2, "Kendall rank correlation"= 3) ), actionButton("go7","Update"), plotOutput("patternGraph"), dataTableOutput("CORRtable1") ) ), navbarMenu("PCA", tabPanel("Overview", uiOutput("pcNum"), actionButton("update1", "Update/Plot"), sidebarPanel( sliderInput("OverPWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("OverPHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("overPlot") ), tabPanel("Scree Plot", uiOutput("pcNum2"), helpText("The green line on top shows the accumulated variance explained; the blue line underneath shows the variance explained by individual PC"), actionButton("update2", "Update/Plot"), sidebarPanel( sliderInput("ScreePWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("ScreePHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("screePlot") ), tabPanel("2D Score Plot", numericInput("pcX", label=h5("Specify PC on x-axis:"), value=1), numericInput("pcY", label=h5("Specify PC on y-axis:"), value=2), checkboxInput("pcaConf", label=h5("Display 95% confidence regions:"), value=TRUE), checkboxInput("disSmplName", label=h5("Display sample names:"), value=TRUE), checkboxInput("gscale", label=h5("Use grey-scale colors:"), value=FALSE), actionButton("update3", "Update/Plot"), sidebarPanel( sliderInput("PCA2DWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("PCA2DHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("pca2dP") ), tabPanel("PCA trajectory plot", numericInput("pcaTraX", label=h5("Specify PC on x-axis:"), value=1), numericInput("pcaTraY", label=h5("Specify PC on y-axis:"), value=2), textInput("PCATraTitle", label=h5("Title"), value="PCA Trajectory Plot"), uiOutput("PCATraCol"), numericInput("errBarWidth", label=h5("Error Bar Width"), value=0.03), numericInput("traPtS", label=h5("Point Size"), value=2), numericInput("traLimit", label=h5("Increase The range of the scale by (%)"), value=20), actionButton("update16", "Update/Plot Graph"), sidebarPanel( sliderInput("PCATrAWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("PCATRAHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("pcaTrajPlot") ), tabPanel("3D Score Plot", sidebarPanel(h3("3D PCA plot using raw rgl"), numericInput("pSize", label=h5("Point Size"), value=0.7), numericInput("transparency", label=h5("Transparency for ecllipses"), value=0.1), checkboxInput("ell", label=h5("Add ellipses"), value=TRUE), checkboxInput("grid", label=h5("add grid to plot"), value=FALSE), textInput("PcaTiltle", label=h5("Title"), value = ""), uiOutput("pcaCol"), actionButton("plot3dpca1", "plot pca 1"), # plotOutput("PCA3D"), actionButton("snapShot1","Snap Shot"), helpText("Note: the SnapShot will only capture latest plotted graph.") ), sidebarPanel(h3("3D pca plot using PCA3d package"), checkboxInput("dC", label=h5("Data Scaling"), value=TRUE), checkboxInput("dS", label=h5("data Centering"), value=FALSE), checkboxInput("showScale", label=h5("show scale"), value=FALSE), checkboxInput("showlabels", label=h5("show labels"), value=FALSE), checkboxInput("showP", label=h5("Show Plane"), value=FALSE), checkboxInput("shadow", label=h5("show Shadow"), value=FALSE), checkboxInput("ell2", label=h5("Add ellipses"), value=TRUE), checkboxInput("showGrpLab", label=h5("show group labels"), value= FALSE), actionButton("plot3dpca2", "plot pca 2"), actionButton("snapShot2","Snap Shot"), #plotOutput("PCA3D2") helpText("Note: the SnapShot will only capture latest plotted graph.") ) ), tabPanel("Loading Plot", numericInput("loadingX", label=h5("Specify PC on x-axis:"), value=1), numericInput("loadingY", label=h5("Specify PC on y-axis:"), value=2), checkboxInput("loadingFeat", label=h5("Display feature names:"), value= FALSE), actionButton("update4", "Update/Plot"), splitLayout(cellWidths = c("50%", "50%"), plotOutput("loadingPCA1",click ="plot_click4"), plotOutput("loadingPCA2",width="1000") ), selectInput("lIntCalc", label = h5("Statistic Shown for the Interval Plot"), choices = list("se" = 1, "sd" = 2), selected = 1), uiOutput("color4"), helpText("This applications supports the following colors: "), helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\", \"grey\", \"gray\", \"lightblue\", \"limegreen\", \"magenta\", \"orange\", \"pink\", \"purple\", \"violet\", \"yellow\""), splitLayout(cellWidths = c("50%", "50%"), plotOutput("loadingCmp1", width = "500", height = "500"), plotOutput("loadingCmp2", width="400", height="500") ), verbatimTextOutput("info3"), dataTableOutput('Loadingtable1') ), tabPanel("Biplot", numericInput("biplotPCX", label=h5("Enter PC for X-axis:"), value=1), numericInput("biplotPCY", label=h5("Enter PC for Y-axis:"), value=2), actionButton("update5", "Update/Plot"), plotOutput("biplot") ) ), navbarMenu("PLSDA", tabPanel("OverviewPLS", numericInput("plsNum", label= h5("Display pairwise score plot for top PCs:"), value=5), actionButton("update6", "Update/Plot"), sidebarPanel( sliderInput("PLSOWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("PLSOHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("overPlotPls") ), tabPanel("2D Score Plot", numericInput("plsX", label=h5("Specify PC on x-axis:"), value=1), numericInput("plsY", label=h5("Specify PC on y-axis:"), value=2), checkboxInput("plsConf", label=h5("Display 95% confidence regions:"), value=TRUE), checkboxInput("plsName", label=h5("Display sample names:"), value=TRUE), checkboxInput("plsgscale", label=h5("Use grey-scale colors:"), value=FALSE), actionButton("update7", "Update/Plot"), sidebarPanel( sliderInput("dplsWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("dplsHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("pls2dP") ), tabPanel("PLSDA trajectory plot", numericInput("plsdaTraX", label=h5("Specify PC on x-axis:"), value=1), numericInput("plsdaTraY", label=h5("Specify PC on y-axis:"), value=2), textInput("PlsdaTraTitle", label=h5("Title"), value="PLS-DA Trajectory Plot"), uiOutput("PLSDATraCol"), numericInput("plserrBarWidth", label=h5("Error Bar Width"), value=0.03), numericInput("plsdatraPtS", label=h5("Point Size"), value=2), numericInput("plsdatraLimit", label=h5("Increase The range of the scale by (%)"), value=20), actionButton("update17", "Update/Plot Graph"), sidebarPanel( sliderInput("plsTRAWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("plsTRAHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("plsdaTrajPlot") ), tabPanel("3D Score Plot", sidebarPanel(h3("3D PLSA plot using raw rgl"), numericInput("plsPSize", label=h5("Point Size"), value=0.7), numericInput("plsTransparency", label=h5("Transparency for ecllipses"), value=0.1), checkboxInput("plsEll", label=h5("Add ellipses"), value=TRUE), checkboxInput("plsGrid", label=h5("add grid to plot"), value=FALSE), textInput("PlsTiltle", label=h5("Title"), value = ""), uiOutput("plsCol"), actionButton("plot3dpls1", "Plot 3D PLSDA 1"), actionButton("snapShot3","Snap Shot"), #plotOutput("PCA3D2") helpText("Note: the SnapShot will only capture latest plotted graph.") #plotOutput("PLS3D") ), sidebarPanel(h3("3D PLSDA plot using PCA3d package"), checkboxInput("showScalePls", label=h5("show scale"), value=FALSE), checkboxInput("showlabelsPls", label=h5("show labels"), value=FALSE), checkboxInput("showPPls", label=h5("Show Plane"), value=FALSE), checkboxInput("shadowPls", label=h5("show Shadow"), value=FALSE), checkboxInput("ell2Pls", label=h5("Add ellipses"), value=TRUE), checkboxInput("showGrpLabPls", label=h5("show group labels"), value= FALSE), actionButton("plot3dpls2", "Plot 3D PLSDA 2"), actionButton("snapShot4","Snap Shot"), #plotOutput("PCA3D2") helpText("Note: the SnapShot will only capture latest plotted graph.") #plotOutput("PLS3D2") ) ), tabPanel("Loading Plot", numericInput("plsloadingX", label=h5("Specify PC on x-axis:"), value=1), numericInput("plsloadingY", label=h5("Specify PC on y-axis:"), value=2), checkboxInput("plsloadingFeat", label=h5("Display feature names:"), value= FALSE), actionButton("update8", "Update/Plot"), splitLayout(cellWidths = c("50%", "50%"), plotOutput("loadingPLS1",click ="plot_click5"), plotOutput("loadingPLS2",width="1000") ), selectInput("l2IntCalc", label = h5("Statistic Shown for the Interval Plot"), choices = list("se" = 1, "sd" = 2), selected = 1), uiOutput("color5"), helpText("This applications supports the following colors: "), helpText(" \"black\",\"blue\",\"brown\",\"cyan\",\"darkblue\",\"darkred\",\"green\", \"grey\", \"gray\", \"lightblue\", \"limegreen\", \"magenta\", \"orange\", \"pink\", \"purple\", \"violet\", \"yellow\""), splitLayout(cellWidths = c("50%", "50%"), plotOutput("plsloadingCmp1", width = "500", height = "500"), plotOutput("plsloadingCmp2", width="400", height="500") ), verbatimTextOutput("info5"), dataTableOutput('Loadingtable2') ), tabPanel("Cross Validation", h3("Select optimal number of components for classification"), uiOutput("CVcompNo"), selectInput("CVMethod", label=h5("Cross validation (CV) method:"), choices=list("10-fold CV"=1, "LOOCV"=2), selected = 2), selectInput("performMea", label=h5("Performance measure:"), choices=list("Q2"= 1, "Accuracy" = 2, "R2" = 3), selected = 1), actionButton("update9", "update"), sidebarPanel( sliderInput("plsCVWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("plsCVHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), tableOutput("CVTab"), uiOutput("PLSDACVPlot") ), tabPanel("Imp.Features", helpText("There are two important measures in PLS-DA: one is variable importance in projection (VIP) and the other is weighted sum of absolute regression coefficients (coef.). The colored boxes on the right indicate the relateive concentrations of the corresponding metabolite in each group under study."), sidebarPanel(h3("Importance measure:"), checkboxInput("impMeasure1", label=h5("VIP score"), value=TRUE ), uiOutput("vip"), checkboxInput("impMeasure2", label=h5("Coefficient score"), value=FALSE ), uiOutput("coef") ), numericInput("topFeatNo", label=h5("Show top feature number:"), value=15), checkboxInput("BW", label=h5("Use grey scale color:"), value=FALSE ), actionButton("update10", "Update/Plot"), plotOutput("PLSDAImp"), dataTableOutput('ImpFeatTab') ), tabPanel("Permutation", selectInput("permTest", label=h5("Select test statistic:"), choices=list("Prediction accuracy during training"=1, "Separation distance (B/W)"=2), selected=1), selectInput("permNumber", label=h5("Set permutation numbers:"), choices=list("100"=1, "1000"=2, "2000"=3), selected=1), actionButton("update11", "Update/Plot"), plotOutput("permPlot") ) ), navbarMenu("OPLSDA", tabPanel("Score Plot", checkboxInput("opls95", label=h5("Display 95% confidence region:"), value=TRUE), checkboxInput("oplsSmpNam", label=h5("Display sample names:"), value=TRUE), checkboxInput("oplsgScale", label=h5("Use grey-scale colors:"), value=FALSE), actionButton("update12","Update/Plot"), sidebarPanel( sliderInput("OPlsWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("OPlsHeight", "Plot Height (px)", min = 0, max = 800, value = 500) ), uiOutput("oplsScore") ), tabPanel("S-Plot", actionButton("update13","Update/Plot"), plotOutput("oplsSPlot", click="plot_click6"), uiOutput("color6"), splitLayout(cellWidths = c("50%", "50%"), plotOutput("oplsCmp1", width = "500", height = "500"), plotOutput("oplsCmp2", width="400", height="500") ), verbatimTextOutput("info6"), dataTableOutput('OPLSDATab') ), tabPanel("Model Overview", actionButton("update14","Update/Plot"), plotOutput("OPLSOver") ), tabPanel("Permutation", selectInput("oplsdaPer", label="Set permutation numbers:", choices=list("100"=1,"1000"=2,"2000"=3), selected=1), actionButton("update15","Update/Plot"), plotOutput("OPLSDAPerm") ) ), navbarMenu("Feature Identification", tabPanel("Significance Analysis if Microarry (and Metabolites) (SAM)", uiOutput("SAMPara"), uiOutput("SAMAnlTyp"), uiOutput("SAMVar"), uiOutput("SAMtxt"), plotOutput("emptyPlot", width="1",height="1"), actionButton("compute1","Calculate"), uiOutput("SAMFDR"), #actionButton("updateFDR","Update Plot"), plotOutput("SAMFDRPLOT"), plotOutput("SAMResultPlot"), dataTableOutput("SAMTab") ) ), navbarMenu("Cluster Analysis: Hiearchical Clustering", tabPanel("Dendrogram", selectInput("DendroDist", label = "Distance Measure:", choices = list("Euclidean" = 1, "Spearman" = 2, "Pearson" = 3), selected = 1), selectInput("DendroAlgor", label = "Clustering Algorithm:", choices = list("Ward"=1, "Average"=2, "Complete"=3, "Single"=4), selected=1), actionButton("update18","Update/Plot"), plotOutput("DendroPlot") ), tabPanel("HeatMap", selectInput("CAHeatDist", label = "Distance Measure:", choices = list("Euclidean" = 1, "Pearson" = 2, "Minkowski" = 3), selected = 1), selectInput("CAHeatAlgor", label = "Clustering Algorithm:", choices = list("Ward"=1, "Average"=2, "Complete"=3, "Single"=4), selected=1), selectInput("CAHeatCC", label = "Color Contrast:", choices = list("Default"=1, "Red/Green"=2, "Heat Color"=3, "Topo Color"=4, "Gray Scale"=5,"Red/White/Blue"=6, "Red/White/Green"=7, "White/Navy/Blue"=8), selected=1), sidebarPanel(h4("View Mode:"), radioButtons("CAHeatView", label = NULL, choices = list("Overview" = 1, "Detail View (< 2000 features)" = 2), selected=1) ), sidebarPanel(h4("View Options"), checkboxInput("CAHeatView01", label = "Do not reorganize:", value = FALSE), selectInput("CAHeatView02", label=NULL, choices = list("Samples"=1, "Features"=2, "Both"=3), selected=1), checkboxInput("CAHeatView03", label ="use top:", value = FALSE), numericInput("CAHeatView04", label=NULL, value=25), selectInput("CAHeatView05", label=NULL, choices = list("T-test/ANOVA"=1, "PLS-DA VIP"=2, "Random Forest"=3), selected =1), checkboxInput("CAHeatView06", label= "Show cell borders", value=TRUE) ), sidebarPanel(h4("Data Options"), selectInput("CAHeatData01", label=h5("Data Source:"), choices = list("Normalized data"=1, "Original data"=2), selected=1), selectInput("CAHeatData02", label=h5("Standardization:"), choices = list("Autoscale features"=1, "Autoscale samples"=2, "None"=3), selected=1) ), sidebarPanel( sliderInput("HeatWidth", "Plot Width (%)", min = 0, max = 100, value = 100), sliderInput("HeatHeight", "Plot Height (px)", min = 0, max = 1500, value = 500) ), actionButton("update19","Update/Plot"), uiOutput("plot.ui") ) ), navbarMenu("Cluster Analysis: Partitional Clustering", tabPanel("K-means", helpText("Pleas note: due to space limit, only the cluster memenber will be calculated if the specified cluster number > 20. The blue lines represent the median intensities of each cluster"), numericInput("KMclstNm", label="Specify the cluster number:", value=3), actionButton("update20","Plot/update"), plotOutput("KMPlot"), tableOutput('mytable1') ), tabPanel("Self Organising Map (SOM)", helpText("Please note:only cluster memebers will be calculated if the total cluster number (xdim*ydim) > 20. The blue lines represent the median intensities of each cluster. "), numericInput("SOMXD", label="X dimension:", value=1), numericInput("SOMYD", label="Y dimension:", value=3), selectInput("SOMInit", label = "Initialization:", choices = list("Linear" = 1, "Random" = 2,"Sample"=3), selected = 1), selectInput("SOMNeigh", label = "Neighbourhood:", choices = list("Gaunssian" = 1, "Bubble" = 2), selected = 1), actionButton("update21","Plot/update"), plotOutput("SOMPlot"), tableOutput('SOMTab') ) ), navbarMenu("Classification & Feature Selection - Random Forest", tabPanel("Classification", selectInput("RFTreesNu", label = "Number of trees to grow:", choices = list("500" = 1, "1000" = 2,"2000"=3, "5000"=4), selected = 1), numericInput("RFPredNu", label="Number of predictors to try for each node:", value=7), actionButton("update22","Plot/update"), textOutput("RFOOB"), tableOutput('RFTab'), plotOutput("RFPlot") ), tabPanel("Var.Importance", actionButton("update23","Plot/update"), textOutput("RFVipHelp"), plotOutput("RFVipPlot"), dataTableOutput("RFDaTTab") ), tabPanel("Outlier Detection", actionButton("update24","Plot/update"), textOutput("RFOutTxt"), plotOutput("RFOutPlot") ) ), navbarMenu("Classification & Feature Selection - SVM", tabPanel("Classification", helpText("R-SVM uses SVM (with linear kernel) to perform classifcation recursively using different feature subsets. Features are selected based on their relative contribution in the classification using cross validation error rates. The least important features are eliminated in the subsequent steps. This process creates a series of SVM models (levels). The features used by the best model are plotted. LOOCV: leave one out cross-validation. "), selectInput("SVMVMet", label = "Validation method:", choices = list("10-fold CV" = 1, "LOOCV" = 2,"BooStrap"=3), selected = 1), actionButton("update25","Plot/update"), plotOutput("SVMPlot") ), tabPanel("Var.Importance", helpText("Please note : features are ranked by their frequencies being selected in the best classifiers (only top 15 will be shown) "), actionButton("update26","Plot/update"), plotOutput("SVMImportPlot") ) ), navbarMenu("PLSR", tabPanel("Upload Data", sidebarLayout( sidebarPanel( radioButtons("plsrRadioButton1", label = h3("Data Type"), choices = list("Concentration" = 1, "Spectral Bins" = 2, "Intensity Table" = 3), selected = 1), selectInput("plsrSelect1", label = h3("Format"), choices = list("Samples in rows (unpaired)" = 1, "Samples in columns (unpaired)" = 2, "Samples in rows (paired)" = 3, "Samples in columns (paired)"=4), selected = 1), fileInput('file2', 'Choose CSV File', accept=c('text/csv', 'text/comma-separated-values,text/plain', '.csv')) ), mainPanel(textOutput('Pcontents')) ) ), tabPanel("Data Processing", helpText(" Too many missing values will cause difficulties for downstream analysis. There are several different methods for this purpose. The default method replaces all the missing values with a small values (the half of the minimum positive values in the original data) assuming to be the detection limit. Move onto Normalization if you want to use the default method. The assumption of this approach is that most missing values are caused by low abundance metabolites (i.e.below the detection limit)." ), helpText("The functions in MetaboAnalyst also offers other methods, such as replace by mean/median, k-nearest neighbour (KNN), probabilistic PCA (PPCA), Bayesian PCA (BPCA) method, Singular Value Decomposition (SVD) method to impute the missing values. Please choose the one that is the most appropriate for your data." ), sidebarPanel(h4("Step 1. Remove features with too many missing values"), checkboxInput("PmissValue1", label = "Remove features with > x % missing values", value = TRUE), numericInput("PmissValue2", label=NULL, value=50) ), sidebarPanel(h4("Step 2. Estimate the remaining missing values (Select only 1 of the following 4 options"), checkboxInput("PmissValue3", label = "Replace by a small value (half of the minimum positive value in the original data)", value = TRUE), checkboxInput("PmissValue4", label = "Exclude variables with missing values", value = FALSE), selectInput("PmissValue5", label=h5("Replace by column (feature)"), choices = list("None"=1, "Mean"=2, "Median"=3,"Min"=4), selected =1), selectInput("PmissValue6", label=h5("Estimate missing values using"), choices = list("None"=1, "KNN"=2, "PPCA"=3,"BPCA"=4,"SVD Impute"=5), selected =1) ), actionButton("Pcalc1","Process"), h3(textOutput("PMVtext1")) ), tabPanel("Normailsation", sidebarLayout( sidebarPanel(h3("Sample Normalization"), radioButtons("PradioButtons2", label = h4("Sample normalization"), choices = list("None" = 1, "Normalization by sum" = 2, "Normalization by median" = 3, "Normalization by a specific reference sample"=4, "Normalization by a pooled sample from group"=5, "Normalization by reference feature"= 6 ), selected = 1), uiOutput("Prefsample"), uiOutput("Ppoolsample"), uiOutput("PrefFeat"), radioButtons("PradioButtons3", label = h4("Data transform"), choices = list("None" = 1, "log transform" = 2, "cube root transform" = 3), selected = 1), radioButtons("PradioButtons4", label = h4("Data scaling"), choices = list("None" = 1, "Mean Centering"=2, "Auto scaling" = 3, "Pareto scaling" = 4, "Range scaling" = 5, "Vast scaling"=6), selected = 1) ), mainPanel(actionButton("Pgo","Update"), plotOutput("PnormPlot",width = "600", height = "800") ) ) ), tabPanel("PLSR", uiOutput("plsrNum"), actionButton("PLSRButton","Calculate/Plot"), uiOutput("PLSRcolor1"), uiOutput("PLSRcolor2"), uiOutput("PLSRptSize"), splitLayout(cellWidths = c("50%", "50%"), plotOutput("plsrModel", width = "500", height = "500"), plotOutput("plsrCV", width="500", height="500") ), plotOutput("plsrOverLay"), tableOutput('PLSRTab') ) ), navbarMenu("More", tabPanel("Sub-Component A"), tabPanel("Sub-Component B") ) ) ####################################################### ################# ###################### ################# ServerFunction ###################### ################# ###################### ####################################################### server <- function(input, output,session){ observeEvent(input$loadPack, output$pacL <- renderTable({ pakCheckTable() })) pakCheckTable<-function(){ lol=LoadAllPackages() m=data.frame(lol) k=cbind(row.names(m),m) colnames(k)[2] <- c(" ") return(k) } options(shiny.usecairo=T) colorList=c("black","blue","brown","cyan","darkblue","darkred","green", "grey", "gray", "lightblue", "limegreen", "magenta", "orange", "pink", "purple", "violet", "yellow") ############################################################################################ ####################################### Utility function ################################### ############################################################################################ dataLvls <- reactive({ input$file1 lvls = length(levels(dataSet$proc.cls)) return(lvls) }) numOfcls <- reactive({ input$file1 if(dataSet$cls.num>2){ return(TRUE) } else { return(FALSE) } }) ispaired <- reactive({ input$file1 return(dataSet$paired) }) ################################################################### ############### reading File ###################################### ################################################################### output$contents <- renderText({ inFile <- input$file1 if (is.null(inFile)){ return(NULL) } datatype <- "conc" format1 <- "rowu" format2 <- "disc" ispaired=FALSE if(input$radioButtons1==1){ datatype <- "conc" } else if (input$radioButtons1== 2){ datatype <- "specbin" } else { datatype <- "pktable" } if (input$select==1){ format1="rowu" } else if (input$select==2){ format1 <- "colu" } else if (input$select==3){ format1 <- "rowp" } else { format1 <- "colp" } if(format1=="rowp"|format1=="colp"){ ispaired=TRUE } InitDataObjects(datatype, "stat", paired=ispaired) Read.TextData(inFile$datapath, format=format1, lbl.type=format2) SanityCheckData() ReplaceMin() dataSet$check.msg }) ############################################################################################# ########################## missing values calculations ###################################### ############################################################################################# MissVPara1 <- eventReactive(input$calc1,{ return(input$missValue1) }) MissVPara2 <- eventReactive(input$calc1,{ return(input$missValue2/100) }) MissVPara3 <- eventReactive(input$calc1,{ return(input$missValue3) }) MissVPara4 <- eventReactive(input$calc1,{ return(input$missValue4) }) MissVPara5 <- eventReactive(input$calc1,{ MVmet="min" if(input$missValue5==2|3|4){ if(input$missValue5==2){ MVmet="mean" } else if (input$missValue5==3){ MVmet="median" } else if (input$missValue5==4){ MVmet="min" } } return(MVmet) }) MissVPara6 <- eventReactive(input$calc1,{ MVmet="knn" if(input$missValue6==2|3|4|5){ if(input$missValue6==2){ MVmet="knn" } else if (input$missValue6==3){ MVmet="ppca" } else if (input$missValue6==4){ MVmet="bpca" } else if (input$missValue6==5){ MVmet="svdImpute" } } return(MVmet) }) output$MVtext1<-renderText({ if(MissVPara1()==TRUE){ RemoveMissingPercent(int.mat=dataSet$preproc, percent=MissVPara2()) } if(MissVPara3()==TRUE){ ImputeVar(int.mat=dataSet$preproc, method="colmin") }else if(MissVPara4()==TRUE){ ImputeVar(int.mat=dataSet$preproc, method="exclude") } else if (MissVPara5()=="knn"|MissVPara5()=="ppca"|MissVPara5()=="bpca"){ ImputeVar(int.mat=dataSet$preproc, method=MissVPara5()) } else if (MissVPara6()=="mean"| MissVPara6()=="median"| MissVPara6()=="min"| MissVPara6()=="svdImpute"){ ImputeVar(int.mat=dataSet$preproc, method=MissVPara6()) } ReplaceMin() paste("Missing value calculations complete") }) ########################################################################################## ################################ Normlization ############################################ ########################################################################################## sampleGroups <-reactive({ input$file1 list=rownames(dataSet$proc) return(list) }) sampleFeatures <-reactive({ input$file1 list2=colnames(dataSet$proc) return(list2) }) pooledSamples <-reactive({ input$file1 numOfLvl=length(levels(dataSet$proc.cls)) groups=c() for(i in 1:numOfLvl){ groups[i]=levels(dataSet$proc.cls)[i] } return(groups) }) output$refsample <- renderUI({ selectInput("rsmpl", label = h4("Specific reference samples"), choices = sampleGroups()) }) output$poolsample <-renderUI({ selectInput("psmpl", label = h4("Pooled samples from group"), choices=pooledSamples()) }) output$refFeat <- renderUI({ selectInput("rf", label=h4("Reference features"), choices= sampleFeatures()) }) normMethod1 <- eventReactive(input$go,{ rowNorm='Nothing' if(input$radioButtons2==1){ rowNorm = 'Nothing' } else if (input$radioButtons2==2){ rowNorm = 'SumNorm' } else if(input$radioButtons2==3){ rowNorm = 'MedianNorm' } else if(input$radioButtons2==4){ rowNorm = "ProbNorm" } else if (input$radioButtons2==5){ rowNorm = "ProbNorm2" } else { rownNorm="CompNorm" } }) normMethod2 <- eventReactive(input$go, { transNorm='Nothing' if (input$radioButtons3==1){ transNorm='Nothing' } else if (input$radioButtons3==2){ transNorm='LogNorm' } else { transNorm='CrNorm' } }) normMethod3 <- eventReactive(input$go, { scaleNorm='Nothing' if(input$radioButtons4==1){ scaleNorm='Nothing' } else if (input$radioButtons4==2){ scaleNorm='MeanCenter' } else if (input$radioButtons4==3){ scaleNorm= 'AutoNorm' } else if (input$radioButtons4==4){ scaleNorm='ParetoNorm' } else if (input$radioButtons4==5){ scaleNorm='RangeNorm' } else { scaleNorm='VastNorm' } }) normMethod4 <- eventReactive(input$go,{ return(toString(input$rsmpl)) }) normMethod5 <- eventReactive(input$go,{ return(toString(input$psmpl)) }) normMethod6 <- eventReactive(input$go,{ return(toString(input$rf)) }) output$normPlot <- renderPlot({ if(normMethod1()=="ProbNorm"){ Normalization( rowNorm="ProbNormF", transNorm=normMethod2(), scaleNorm=normMethod3(), ref=normMethod4(), ratio=FALSE, ratioNum=20) } else if (normMethod1()=="ProbNorm2"){ Normalization( rowNorm="ProbNormT", transNorm=normMethod2(), scaleNorm=normMethod3(), ref=normMethod5(), ratio=FALSE, ratioNum=20) } else if (normMethod1()=="CompNorm"){ Normalization( rowNorm="CompNorm", transNorm=normMethod2(), scaleNorm=normMethod3(), ref=normMethod6(), ratio=FALSE, ratioNum=20) } else { Normalization( rowNorm=normMethod1(), transNorm=normMethod2(), scaleNorm=normMethod3(), ref=NULL, ratio=FALSE, ratioNum=20) } PlotNormSum() }) ############################################################################## ############################## Fold Change Analysis ########################## ############################################################################## ####add message here telling the user if their data is paired or not ispaired2 <-reactive({ input$file1 return(dataSet$paired) }) output$FCAnalT<-renderUI({ if (ispaired2()==TRUE){ selectInput("FCAnalType", label = h5("Analysis Type"), choices = list("Unpaired" = 1, "Paired" = 2), selected = 1) } else { selectInput("FCAnalType", label = h5("Analysis Type"), choices = list("Unpaired" = 1), selected = 1) } }) output$SigCountT <-renderUI({ if (ispaired()==FALSE){ return() } else { numericInput("SigCountThresh", label=h5("Significant count threshold % (paired only):"), value=75) } }) FCpara4 <- eventReactive(input$go4,{ if (input$FCAnalType==1){ return(TRUE) }else{ return(FALSE) } }) name<-reactive({ input$file1 groups=c() numOfLvl=length(levels(dataSet$proc.cls)) for(i in 1:numOfLvl){ groups[i]=levels(dataSet$proc.cls)[i] } group1=groups[1] group2=groups[2] group12=paste(group1,"/",group2) group21=paste(group2,"/",group1) inputOp<- list(group12,group21) return(inputOp) }) output$ComparType <- renderUI({ selectInput("ComparTyp", label=h5("Comparison Type"), choices=name()) }) FCpara1 <- eventReactive(input$go4,{ FCTvalue=input$FCThresh }) FCpara2 <-eventReactive(input$go4,{ groups=c() numOfLvl = length(levels(dataSet$proc.cls)) for(i in 1:numOfLvl){ groups[i]=levels(dataSet$proc.cls)[i] } group1=groups[1] group2=groups[2] group12=paste(group1,"/",group2) group21=paste(group2,"/",group1) ComparT=0 if(input$ComparTyp==group12){ ComparT=0 } else { ComparT=1 } }) FCpara3 <- eventReactive(input$go4,{ SigCT=input$SigCountThresh/100 }) output$FCPlot <- renderPlot({ if (FCpara4()==TRUE){ FC.Anal.unpaired(fc.thresh= FCpara1(), cmp.type = FCpara2()) }else{ FC.Anal.paired(fc.thresh=FCpara1(),percent.thresh=FCpara3(),cmp.type=FCpara2()) } MyPlotFC() }) output$FCtable1 <- renderDataTable({ if (FCpara4()==TRUE){ FC.Anal.unpaired(fc.thresh= FCpara1(), cmp.type = FCpara2()) }else{ FC.Anal.paired(fc.thresh=FCpara1(),percent.thresh=FCpara3(),cmp.type=FCpara2()) } MyFCTable() }) ############################################################################### ############################### T test ######################################## ############################################################################### ispaired <- reactive({ input$file1 return(dataSet$paired) }) output$TTAnalType<-renderUI({ if (ispaired()==TRUE){ selectInput("TTAnalT", label = h5("Analysis Type:"), choices = list("Unpaired" = 1, "Paired" = 2), selected = 1) } else { selectInput("TTAnalT", label = h5("Analysis Type:"), choices = list("Unpaired" = 1), selected = 1) } }) ttestpara4 <- eventReactive(input$go2,{ if (input$TTAnalT==1){ return(FALSE) }else{ return(TRUE) } }) ttestpara1 <- eventReactive(input$go2,{ PValue=input$TTestP }) ttestpara2 <- eventReactive(input$go2,{ grpVar=TRUE if(input$grpVar==1){ grpVar=TRUE } else { grpVar=FALSE } }) ttestpara3 <- eventReactive(input$go2,{ nonpar=input$NPT1 }) ttespara5 <- eventReactive(input$go2,{ return(toString(input$Tcolo1)) }) ttespara6 <- eventReactive(input$go2,{ return(toString(input$Tcolo2)) }) output$PlotTT <- renderPlot({ Ttests.Anal(nonpar=ttestpara3(), threshp=ttestpara1(), paired=ttestpara4(), equal.var=ttestpara2()) MyPlotTT(c1=ttespara5(), c2=ttespara6()) }) output$TTtable1 <- renderDataTable({ Ttests.Anal(nonpar=ttestpara3(), threshp=ttestpara1(), paired=ttestpara4(), equal.var=ttestpara2()) MyTTTable() },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) click1 <- reactive({ validate( need(input$plot_click1$x != "", "Click a point for a interval/box plot") ) n=c() b=c() for(i in 1:length(analSet$tt$p.log)){ n[i]=i } for(i in 1:length(analSet$tt$p.log)){ b[i]=as.numeric(analSet$tt$p.log)[i] } x=as.numeric(nearPoints(data.frame(n,b), input$plot_click1, xvar="n", yvar="b",maxpoints=1)[[1]]) return(x) }) output$color1 <- renderUI({ colorV= sample(colorList, dataLvls()) textInput("colorVector", label=h5("Colors for the interval and box plot"), value = toString(colorV)) }) observeEvent(input$plot_click1, output$PlotTT2 <-renderPlot({ colString=input$colorVector col=strsplit(colString,",")[[1]] stat=" " if(input$tIntCalc==1){ stat="se" }else if (input$tIntCalc==2){ stat="sd" } IntervalPlot(cmpdNm = click1(), dpi=200, colors=col,calc=stat) })) observeEvent(input$plot_click1, output$PlotBox1 <-renderPlot({ colString=input$colorVector colr=strsplit(colString,",")[[1]] PlotCmpdBoxView(cmpdNm=click1(), dpi=200, col=colr) })) output$info1 <- renderText({ paste0("x=", input$plot_click1$x, "\ny=", input$plot_click1$y) }) ########################################################################################### ########################################## ANOVA ########################################## ########################################################################################### anovapara1 <- eventReactive(input$go3,{ PValue=input$ANOVAP }) anovapara2 <- eventReactive(input$go3,{ if(input$PHA==1){ posthoc="fisher" } else { posthoc="tukey" } }) anovapara3 <- eventReactive(input$go3,{ nonpar=input$NPT2 }) anovapara4 <- eventReactive(input$go3,{ return(toString(input$Acolo1)) }) anovapara5 <- eventReactive(input$go3,{ return(toString(input$Acolo2)) }) output$PlotAOV <- renderPlot({ ANOVA.Anal(nonpar=anovapara3(), thresh=anovapara1(), post.hoc=anovapara2()) PlotLiveANOVA(c1=anovapara4(), c2=anovapara5()) }) output$AOVtable1 <- renderDataTable({ ANOVA.Anal(nonpar=anovapara3(), thresh=anovapara1(), post.hoc=anovapara2()) MyANOVATable() },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) click2 <- reactive({ validate( need(input$plot_click2$x != "", "Click a point for a interval/box plot") ) n=c() b=c() for(i in 1:length(analSet$aov$p.log)){ n[i]=i } for(i in 1:length(analSet$aov$p.log)){ b[i]=as.numeric(analSet$aov$p.log)[i] } x=(nearPoints(data.frame(n,b), input$plot_click2,xvar="n", yvar="b",maxpoints=1)[[1]]) return(x) }) output$color2 <- renderUI({ colorV= sample(colorList, dataLvls()) textInput("colorVector2", h5("Colors for the interval and box plot graph"), value = toString(colorV)) }) observeEvent(input$plot_click2, output$PlotAOV2 <-renderPlot({ colString=input$colorVector2 col=strsplit(colString,",")[[1]] stat=" " if(input$aIntCalc==1){ stat="se" }else if (input$aIntCalc==2){ stat="sd" } IntervalPlot(cmpdNm = click2(), dpi=200, colors=col,calc=stat) })) observeEvent(input$plot_click2, output$PlotBox2 <-renderPlot({ colString=input$colorVector2 colr=strsplit(colString,",")[[1]] PlotCmpdBoxView(cmpdNm=click2(), dpi=200, col=colr) })) ################################################################################# ################################ Volcano Plot ################################### ################################################################################# ispaired3 <-reactive({ input$file1 return(dataSet$paired) }) ispaired4 <-reactive({ input$file1 return(dataSet$paired) }) output$VolAnalT<-renderUI({ if (ispaired3()==TRUE){ selectInput("VAnalT", label = h5("Analysis Type"), choices = list("Unpaired" = 1, "Paired" = 2), selected = 1) } else { selectInput("VAnalT", label = h5("Analysis Type"), choices = list("Unpaired" = 1), selected = 1) } }) output$VolSigCountT <-renderUI({ if (ispaired4()==FALSE){ return() } else { numericInput("VolSigCountThresh", label=h5("Significant count threshold % (paired only):"), value=75) } }) name2<-reactive({ input$file1 numOfLvl = length(levels(dataSet$proc.cls)) groups=c() for(i in 1:numOfLvl){ groups[i]=levels(dataSet$proc.cls)[i] } group1=groups[1] group2=groups[2] group12=paste(group1,"/",group2) group21=paste(group2,"/",group1) inputOp<- list(group12,group21) return(inputOp) }) output$VolComparType <- renderUI({ selectInput("VolComparT", label=h5("Comparison Type"), choices=name2()) }) volpara1 <- eventReactive(input$go5,{ volAnalysisT=FALSE if(input$VAnalT==1){ volAnalysisT=FALSE } else{ volAnalysisT=TRUE } return(volAnalysisT) }) volpara2 <- eventReactive(input$go5,{ VFCTvalue=input$VolThresh }) volpara3 <-eventReactive(input$go5,{ groups=c() numOfLvl = length(levels(dataSet$proc.cls)) for(i in 1:numOfLvl){ groups[i]=levels(dataSet$proc.cls)[i] } group1=groups[1] group2=groups[2] group12=paste(group1,"/",group2) group21=paste(group2,"/",group1) VComparT=0 if(input$VolComparT==group12){ VComparT=0 } else { VComparT=1 } }) volpara4 <- eventReactive(input$go5,{ VolSigCT=input$VolSigCountThresh/100 }) volpara5 <- eventReactive(input$go5, { Volnonpar=input$VolNPT }) volpara6 <- eventReactive(input$go5,{ VolP=input$VolP }) volpara7 <- eventReactive(input$go5,{ grpVar=TRUE if(input$VolgrpVar==1){ grpVar=TRUE } else { grpVar=FALSE } }) output$PlotVol <- renderPlot({ Volcano.Anal(paired=volpara1(), fcthresh=volpara2(), cmpType=volpara3(), percent.thresh=volpara4(), nonpar=volpara5(), threshp=volpara6(), equal.var=volpara7()) MyPlotVolcano2() }) output$VOLtable1 <- renderDataTable({ Volcano.Anal(paired=volpara1(), fcthresh=volpara2(), cmpType=volpara3(), percent.thresh=volpara4(), nonpar=volpara5(), threshp=volpara6(), equal.var=volpara7()) MyVOLTable() },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) click3 <- reactive({ validate( need(input$plot_click3$x != "", "Click a point for a interval/box plot") ) vcn<-analSet$volcano n=c() b=c() c=c() for(i in 1:length(vcn$fc.log)){ n[i]=i } for(j in 1:length(vcn$fc.log)){ b[j]=as.numeric(vcn$fc.log[j]) } for(k in 1:length(vcn$p.log)){ c[k]=as.numeric(vcn$p.log[k]) } x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click3, xvar="b", yvar="c", maxpoints=1)[1]) return(x) }) output$color3 <- renderUI({ colorV= sample(colorList, dataLvls()) textInput("colorVector3", h5("Colors for the interval plot and boxplot"), value = toString(colorV)) }) observeEvent(input$plot_click3, output$PlotVol2 <-renderPlot({ colString=input$colorVector3 col=strsplit(colString,",")[[1]] if(input$vIntCalc==1){ stat="se" }else if (input$vIntCalc==2){ stat="sd" } IntervalPlot(cmpdNm = click3(), dpi=200, colors=col,calc=stat) })) observeEvent(input$plot_click3, output$PlotVol3 <-renderPlot({ colString=input$colorVector3 colr=strsplit(colString,",")[[1]] PlotCmpdBoxView(cmpdNm=click3(), dpi=200, col=colr) })) output$info2 <- renderText({ paste0("x=", input$plot_click3$x, "\ny=", input$plot_click3$y) }) ################################################################################# ################################ Correlation Analysis ########################### ################################################################################# corrPara1 <- eventReactive(input$go6,{ distanceM="pearson" if(input$distM==1){ distanceM = "pearson" } else if (input$distM==2){ distanceM = "spearman" } else { distanceM="kendall" } return(distanceM) }) corrPara2 <- eventReactive(input$go6,{ viewMethod="overview" if(input$viewM==1){ viewMethod=="overview" } else { viewMethod="detailed" } return(viewMethod) }) corrPara3 <- eventReactive(input$go6,{ fixColorDist=FALSE if(input$fixColD==TRUE){ fixColorDist=TRUE } else { fixColorDist=FALSE } return(fixColorDist) }) corrPara4 <- eventReactive(input$go6, { colContrast='default' if(input$colorCon==1){ colContrast='default' } else if (input$colorCon==2){ colContrast ='gbr' } else if (input$colorCon==3){ colContrast = 'heat' } else if (input$colorCon==4){ colContrast ='topo' } else if (input$colorCon==5){ colContrast ='gray' } else if (input$colorCon==6){ colContrast ='rwb' } else if (input$colorCon==7){ colContrast ='rwg' } else if (input$colorCon==8){ colContrast ='wnvyb' } return(colContrast) }) corrPara5 <- eventReactive(input$go6, { doNot=FALSE if(input$performClus==TRUE){ doNot=TRUE } else { doNot=FALSE } return(doNot) }) output$corrHeat <- renderUI({ plotOutput("CorrHeatMap", width = paste0(input$CorrHeatWidth, "%"), height = input$CorrHeatHeight) }) output$CorrHeatMap <- renderPlot({ MyPlotCorrHeatMap("correlation HeatMap", format="png", dpi=200, width=NA, cor.method=corrPara1(), colors=corrPara4(), viewOpt=corrPara2(), fix.col=corrPara3(), no.clst=corrPara5(), top=FALSE, topNum) }) ################################################################################# ################################ pattern searching ########################### ################################################################################# interestingFeatures <- reactive({ input$file1 ftList=colnames(dataSet$proc) return(ftList) }) profiles <- reactive({ input$file1 template1=toString(GenerateTemplates())##use this shit#### template2=as.vector(strsplit(template1, ",")[[1]]) return(template2) }) output$interestFt <- renderUI({ selectInput("iFT", label=h5("features of interest"), choices=interestingFeatures()) }) output$profile <- renderUI({ selectInput("templateProfiles", label=h5("predefined profile"), choices=profiles()) }) patternPara1 <- eventReactive(input$go7,{ patt=1 if(input$pattern==1){ patt=1 } else if (input$pattern==2){ patt=2 } else { patt=3 } return(patt) }) patternPara2 <- eventReactive(input$go7,{ patternMethod="pearson" if(input$distM2==1){ patternMethod="pearson" } else if (input$distM2==2){ patternMethod="spearman" } else { patternMethod="kendall" } return(patternMethod) }) patternPara3 <- eventReactive(input$go7,{ return(toString(input$iFT)) }) patternPara4 <- eventReactive(input$go7,{ return(toString(input$templateProfiles)) }) patternPara5 <- eventReactive(input$go7,{ uTempl=input$customPro return(uTempl) }) output$patternGraph <- renderPlot({ if(patternPara1()==1){ FeatureCorrelation(patternPara2(), patternPara3()) } else if (patternPara1()==2) { Match.Pattern(dist.name=patternPara2(), pattern=patternPara4()) } else { Match.Pattern(dist.name=patternPara2(), pattern=patternPara5()) } MyPlotCorr() }) output$CORRtable1 <- renderDataTable({ if(patternPara1()==1){ FeatureCorrelation(patternPara2(), patternPara3()) } else if (patternPara1()==2) { Match.Pattern(dist.name=patternPara2(), pattern=patternPara4()) } else { Match.Pattern(dist.name=patternPara2(), pattern=patternPara5()) } MyCORRTable() },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) ####################################################################################### ################################## Summary Plot ####################################### ####################################################################################### pcNumbers <- reactive({ input$file1 PCA.Anal() n=c() for(i in 1:GetMaxPCAComp()-1){ n[i]=i+1 } x=n }) output$pcNum <- renderUI({ selectInput("pcNo", label = h5("Display pairwise score plot for top PCs:"), choices=pcNumbers()) }) pcSummPara1 <- eventReactive(input$update1,{ return(input$pcNo) }) output$overPlot <- renderUI({ plotOutput("overplt", width = paste0(input$OverPWidth, "%"), height = input$OverPHeight) }) output$overplt <- renderPlot({ MyPlotPCAPairSummary(pc.num=pcSummPara1()) }) ####################################################################################### ################################## Scree plot ######################################### ####################################################################################### pcNumbers2 <- reactive({ input$file1 PCA.Anal() n=c() for(i in 1:GetMaxPCAComp()-1){ n[i]=i+1 } x=n }) output$pcNum2 <- renderUI({ selectInput("pcNo2", label = h5("Display pairwise score plot for top PCs:"), choices=pcNumbers2()) }) screePlotPara1 <- eventReactive(input$update2,{ return(input$pcNo2) }) output$screePlot <- renderUI({ plotOutput("ScreePlt", width = paste0(input$ScreePWidth, "%"), height = input$ScreePHeight) }) output$ScreePlt <- renderPlot({ MyPlotPCAScree(scree.num=screePlotPara1()) }) ####################################################################################### ################################## 2d pca plot ######################################## ####################################################################################### pca2dPara1 <- eventReactive(input$update3,{ pcx=input$pcX }) pca2dPara2 <- eventReactive(input$update3,{ pcy=input$pcY }) pca2dPara3 <- eventReactive(input$update3,{ if(input$pcaConf==TRUE){ reg=0.95 } else { reg=0 } return(reg) }) pca2dPara4 <- eventReactive(input$update3,{ if(input$disSmplName==TRUE){ show=1 } else { show=0 } return(show) }) pca2dPara5 <- eventReactive(input$update3,{ if(input$gscale==TRUE){ grey.scale=1 } else { grey.scale=0 } return(grey.scale) }) output$pca2dP <- renderUI({ plotOutput("pca2dPlt", width = paste0(input$PCA2DWidth, "%"), height = input$PCA2DHeight) }) output$pca2dPlt <- renderPlot({ PCA.Anal() MyPlotPCA2DScore(pcx=pca2dPara1(), pcy=pca2dPara2(), reg=pca2dPara3(), show=pca2dPara4(), grey.scale=pca2dPara5()) }) ######################################################################################## ################################### PCA Trajectory Plot ################################ ######################################################################################## pcaTraPara1 <- eventReactive(input$update16,{ return(input$pcaTraX) }) pcaTraPara2 <- eventReactive(input$update16,{ return(input$pcaTraY) }) pcaTraPara3 <- eventReactive(input$update16,{ return(input$PCATraTitle) }) pcaTraPara4 <- eventReactive(input$update16,{ return(input$traPtS) }) pcaTraPara5 <- eventReactive(input$update16,{ decPer=input$traLimit/100 return(decPer) }) pcaTraPara7 <- eventReactive(input$update16,{ return(input$errBarWidth) }) output$PCATraCol <- renderUI({ colorV=sample(colorList, dataLvls()) textInput("traColor", h5("Colors for the graph"), value = toString(colorV)) }) pcaTraPara6 <- eventReactive(input$update16,{ col=strsplit(input$traColor, ",")[[1]] return(col) }) output$pcaTrajPlot <- renderUI({ plotOutput("pcaTrajP", width = paste0(input$PCATrAWidth, "%"), height = input$PCATRAHeight) }) output$pcaTrajP <- renderPlot({ PCA.Anal() PlotTraPCA(pc1=pcaTraPara1(), pc2=pcaTraPara2(), title=pcaTraPara3(), ptsSize=pcaTraPara4(), extPer=pcaTraPara5(), colors=pcaTraPara6(), errW=pcaTraPara7() ) }) ####################################################################################### ################################## 3d pca plot ######################################## ####################################################################################### PCA3DPara1 <- eventReactive(input$plot3dpca1,{ s=input$pSize }) PCA3DPara2 <- eventReactive(input$plot3dpca1,{ t=input$transparency }) PCA3DPara3 <- eventReactive(input$plot3dpca1,{ e=input$ell }) PCA3DPara4 <- eventReactive(input$plot3dpca1,{ g=input$grid }) PCA3DPara5 <- eventReactive(input$plot3dpca1,{ ti=input$PcaTiltle }) output$pcaCol <- renderUI({ lvls=length(levels(dataSet$proc.cls)) colorV=sample(colorList, lvls) textInput("pcaColor", h5("Colors for PCA"), value=toString(colorV)) }) PCA3DPara6 <- eventReactive(input$plot3dpca1,{ col=strsplit(input$pcaColor, ",")[[1]] return(col) }) observeEvent(input$plot3dpca1,{ PCA.Anal() open3d() par3d(windowRect = c(216,30, 958, 695)) Graphs3DPCA(pointSize=PCA3DPara1(), transparency=PCA3DPara2(), ell=PCA3DPara3(), grd=PCA3DPara4(), Title=PCA3DPara5(), group.col=PCA3DPara6()) #dev.off() }) # output$PCA3D <- renderPlot({ #PCA.Anal() # open3d() # par3d(windowRect = c(216,30, 958, 695)) # Graphs3DPCA(pointSize=PCA3DPara1(), # transparency=PCA3DPara2(), # ell=PCA3DPara3(), # grd=PCA3DPara4(), # Title=PCA3DPara5(), # group.col=PCA3DPara6()) # dev.off() # }) observeEvent(input$snapShot1, { snapshot3d( filename="3D Plot.png", fmt = "png", top = TRUE) }) # actionButton("snapShot1","Snap Shot") PCA3D2para1 <- eventReactive(input$plot3dpca2,{ return(input$dC) }) PCA3D2para2 <- eventReactive(input$plot3dpca2,{ return(input$dS) }) PCA3D2para3 <- eventReactive(input$plot3dpca2,{ return(input$showScale) }) PCA3D2para4 <- eventReactive(input$plot3dpca2,{ if(input$showlabels==TRUE){ return(input$showlabels) } else { return(" ") } }) PCA3D2para5 <- eventReactive(input$plot3dpca2,{ return(input$showP) }) PCA3D2para6 <- eventReactive(input$plot3dpca2,{ return(input$shadow) }) PCA3D2para7 <- eventReactive(input$plot3dpca2,{ return(input$ell2) }) PCA3D2para8 <- eventReactive(input$plot3dpca2,{ return(input$showGrpLab) }) observeEvent(input$plot3dpca2,{ pca3d(pca=prcomp(dataSet$norm, center=PCA3D2para1(), scale=PCA3D2para2()), group=dataSet$cls, show.scale=PCA3D2para3(), show.labels=PCA3D2para4(), show.plane=PCA3D2para5(), show.shadows= PCA3D2para6(), show.ellipses=PCA3D2para7(), show.group.labels=PCA3D2para8(), new=TRUE) }) observeEvent(input$snapShot2, { snapshotPCA3d("3D Plot 2.png") }) ########################################################################################### ##################################### Loading Plot ######################################## ########################################################################################### loadingPara1 <- eventReactive(input$update4,{ inx1=input$loadingX }) loadingPara2 <- eventReactive(input$update4,{ inx2=input$loadingY }) loadingPara3 <- eventReactive(input$update4,{ return(input$loadingFeat) }) output$color4 <- renderUI({ colorV= sample(colorList, dataLvls()) textInput("colorVector4", h5("Colors for the interval/box plot graph"), value = toString(colorV)) }) output$loadingPCA1 <- renderPlot({ PCA.Anal() MyPlotPCALoading(imgName="loading", format="png", dpi=72, width=NA, inx1=loadingPara1(), inx=loadingPara2(), plotType="scatter", lbl.feat=loadingPara3()) }) output$loadingPCA2 <- renderPlot({ PCA.Anal() MyPlotPCALoading(imgName="loading", format="png", dpi=72, width=NA, inx1=loadingPara1(), inx=loadingPara2(), plotType="bar", lbl.feat=loadingPara3()) }) output$Loadingtable1 <- renderDataTable({ PCA.Anal() MyLOADTable(x=loadingPara1(), y=loadingPara2()) },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) click4 <- reactive({ validate( need(input$plot_click4$x != "", "Click a point for a interval/box plot") ) PCA.Anal() n=c() b=c() c=c() for(i in 1:length(analSet$pca$rotation[,loadingPara1()])){ n[i]=i } for(i in 1:length(analSet$pca$rotation[,loadingPara1()])){ b[i]=signif(as.numeric(analSet$pca$rotation[,loadingPara1()])[i],5) } for(i in 1:length(analSet$pca$rotation[,loadingPara2()])){ c[i]=signif(as.numeric(analSet$pca$rotation[,loadingPara2()])[i],5) } x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click4, xvar="b", yvar="c", maxpoints=1)[1]) return(x) }) observeEvent(input$plot_click4, output$loadingCmp1 <-renderPlot({ colString=input$colorVector4 col=strsplit(colString,",")[[1]] stat=" " if(input$lIntCalc==1){ stat="se" }else if (input$tIntCalc==2){ stat="sd" } IntervalPlot(cmpdNm = click4(), dpi=200, colors=col,calc=stat) })) observeEvent(input$plot_click4, output$loadingCmp2<-renderPlot({ colString=input$colorVector4 colr=strsplit(colString,",")[[1]] PlotCmpdBoxView(cmpdNm=click4(), dpi=200, col=colr) })) output$info3 <- renderText({ paste0("x=", input$plot_click4$x, "\ny=", input$plot_click4$y) }) ########################################################################################### ##################################### BiPlot ############################################# ########################################################################################### biplotPara1 <- eventReactive(input$update5,{ return(input$biplotPCX) }) biplotPara2 <- eventReactive(input$update5,{ return(input$biplotPCY) }) output$biplot <- renderPlot ({ PCA.Anal() MyPlotPCABiplot(inx1=biplotPara1(), inx2= biplotPara2()) }) ###################################################################################### ################################## PLS Summary Plot ################################## ###################################################################################### plsSummPara1 <- eventReactive(input$update6,{ pcNo=input$plsNum }) output$overPlotPls <- renderUI({ plotOutput("overPPls", width = paste0(input$PLSOWidth, "%"), height = input$PLSOHeight) }) output$overPPls <- renderPlot({ PLSR.Anal() MyPlotPLSPairSummary( pc.num=plsSummPara1() ) }) ####################################################################################### ################################## 2d pLS plot ######################################## ####################################################################################### pls2dPara1 <- eventReactive(input$update7,{ return(input$plsX) }) pls2dPara2 <- eventReactive(input$update7,{ return(input$plsY) }) pls2dPara3 <- eventReactive(input$update7,{ if(input$plsConf==TRUE){ reg=0.95 } else { reg=0 } return(reg) }) pls2dPara4 <- eventReactive(input$update7,{ if(input$plsName==TRUE){ s=1 } else { s=0 } return(s) }) pls2dPara5 <- eventReactive(input$update7,{ if(input$plsgscale==TRUE){ grey.scale=1 } else { grey.scale=0 } return(grey.scale) }) output$pls2dP <- renderUI({ plotOutput("pls2dPlot", width = paste0(input$dplsWidth, "%"), height = input$dplsHeight) }) output$pls2dPlot <- renderPlot({ PLSR.Anal() MyPlotPLS2DScore(inx1=pls2dPara1(), inx2=pls2dPara2(), reg=pls2dPara3(), show=pls2dPara4(), grey.scale=pls2dPara5() ) }) ########################################################################################### ################################## PLSDA Trajectory plot ################################## ########################################################################################### plsTraPara1 <- eventReactive(input$update17,{ return(input$plsdaTraX) }) plsTraPara2 <- eventReactive(input$update17,{ return(input$plsdaTraY) }) plsTraPara3 <- eventReactive(input$update17,{ return(input$PlsdaTraTitle) }) plsTraPara4 <- eventReactive(input$update17,{ return(input$plsdatraPtS) }) plsTraPara5 <- eventReactive(input$update17,{ decPer=input$plsdatraLimit/100 return(decPer) }) plsTraPara7 <- eventReactive(input$update17,{ return(input$plserrBarWidth) }) output$PLSDATraCol <- renderUI({ colorV=sample(colorList, dataLvls()) textInput("PLStraColor", h5("Colors for the graph"), value = toString(colorV)) }) plsTraPara6 <- eventReactive(input$update17,{ col=strsplit(input$PLStraColor, ",")[[1]] return(col) }) output$plsdaTrajPlot <- renderUI({ plotOutput("plsTRAP", width = paste0(input$plsTRAWidth, "%"), height = input$plsTRAHeight) }) output$plsTRAP<- renderPlot({ PLSR.Anal() PlotTraPLSDA(inx1=plsTraPara1(), inx2=plsTraPara2(), title=plsTraPara3(), ptsSize=plsTraPara4(), extPer=plsTraPara5(), colors=plsTraPara6(), errW=plsTraPara7()) }) ####################################################################################### ################################## 3d plsda plot ######################################## ####################################################################################### PLS3DPara1 <- eventReactive(input$plot3dpls1,{ return(input$plsPSize) }) PLS3DPara2 <- eventReactive(input$plot3dpls1,{ return(input$plsTransparency) }) PLS3DPara3 <- eventReactive(input$plot3dpls1,{ return(input$plsEll) }) PLS3DPara4 <- eventReactive(input$plot3dpls1,{ return(input$plsGrid) }) PLS3DPara5 <- eventReactive(input$plot3dpls1,{ return(input$PlsTiltle) }) output$plsCol <- renderUI({ textInput("plsColor", h5("Colors for PLSDA"), value=toString(sample(colorList, length(levels(dataSet$proc.cls))))) }) PLS3DPara6 <- eventReactive(input$plot3dpls1,{ return(strsplit(input$plsColor, ",")[[1]]) }) observeEvent(input$plot3dpls1,{ PLSR.Anal() open3d() par3d(windowRect = c(216,30, 958, 695)) Graphs3DPLSDA(pointSize = PLS3DPara1(), transparency = PLS3DPara2(), ell = PLS3DPara3(), grd = PLS3DPara4(), Title = PLS3DPara5(), group.col = PLS3DPara6()) }) observeEvent(input$snapShot3, { snapshot3d( filename="3D Plot.png", fmt = "png", top = TRUE) }) PLS3D2para1 <- eventReactive(input$plot3dpls2,{ return(input$showScalePls) }) PLS3D2para2 <- eventReactive(input$plot3dpls2,{ if(input$showlabelsPls==TRUE){ return(input$showlabelsPls) } else { return(" ") } }) PLS3D2para3 <- eventReactive(input$plot3dpls2,{ return(input$showPPls) }) PLS3D2para4 <- eventReactive(input$plot3dpls2,{ return(input$shadowPls) }) PLS3D2para5 <- eventReactive(input$plot3dpls2,{ return(input$ell2Pls) }) PLS3D2para6 <- eventReactive(input$plot3dpls2,{ return(input$showGrpLabPls) }) observeEvent(input$plot3dpls2,{ PC1=analSet$plsr$scores[,1] PC2=analSet$plsr$scores[,2] PC3=analSet$plsr$scores[,3] d=data.frame(PC1,PC2,PC3) m <- as.matrix(d) row.names(m)<-row.names(dataSet$norm) pca3d(pca=m, group=dataSet$cls, show.scale=PLS3D2para1(), show.labels=PLS3D2para2(), show.plane=PLS3D2para3(), show.shadows= PLS3D2para4(), show.ellipses=PLS3D2para5(), show.group.labels=PLS3D2para6(), new=TRUE) }) observeEvent(input$snapShot4, { snapshotPCA3d("3D Plot 2.png") }) ################################################################################################ ################################### PLSDA Loading ############################################## ################################################################################################ plsloadingPara1 <- eventReactive(input$update8,{ return(input$plsloadingX) }) plsloadingPara2 <- eventReactive(input$update8,{ return(input$plsloadingY) }) plsloadingPara3 <- eventReactive(input$update8,{ return(input$plsloadingFeat) }) output$color5 <- renderUI({ colorV= sample(colorList, dataLvls()) textInput("colorVector5", h5("Colors for the interval plot graph"), value = toString(colorV)) }) output$loadingPLS1 <- renderPlot({ PLSR.Anal() MyPlotPLSLoading(imgName="PLSDA loading", format="png", dpi=72, width=NA, inx1=plsloadingPara1(), inx=plsloadingPara2(), plotType="scatter", lbl.feat=plsloadingPara3()) }) output$loadingPLS2 <- renderPlot({ PLSR.Anal() MyPlotPLSLoading(imgName="PLSDA loading", format="png", dpi=72, width=NA, inx1=plsloadingPara1(), inx=plsloadingPara2(), plotType="bar", lbl.feat=plsloadingPara3()) }) output$Loadingtable2 <- renderDataTable({ PLSR.Anal() MyLOADTable2(inx=plsloadingPara1(), iny=plsloadingPara2()) },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) click5 <- reactive({ validate( need(input$plot_click5$x != "", "Click a point for a interval plot") ) PLSR.Anal() n=c() b=c() c=c() for(i in 1:length(analSet$plsr$loadings[,plsloadingPara1()])){ n[i]=i } for(i in 1:length(analSet$plsr$loadings[,plsloadingPara1()])){ b[i]=signif(as.numeric(analSet$plsr$loadings[,plsloadingPara1()])[i],5) } for(i in 1:length(analSet$plsr$loadings[,plsloadingPara2()])){ c[i]=signif(as.numeric(analSet$plsr$loadings[,plsloadingPara2()])[i],5) } x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click5, xvar="b", yvar="c", maxpoints=1)[1]) return(x) }) observeEvent(input$plot_click5, output$plsloadingCmp1 <-renderPlot({ colString=input$colorVector5 col=strsplit(colString,",")[[1]] stat=" " if(input$l2IntCalc==1){ stat="se" }else if (input$tIntCalc==2){ stat="sd" } IntervalPlot(cmpdNm = click5(), dpi=200, colors=col,calc=stat) })) observeEvent(input$plot_click5, output$plsloadingCmp2<-renderPlot({ colString=input$colorVector5 colr=strsplit(colString,",")[[1]] PlotCmpdBoxView(cmpdNm=click5(), dpi=200, col=colr) })) output$info5 <- renderText({ paste0("x=", input$plot_click5$x, "\ny=", input$plot_click5$y) }) ################################################################################################ ################################### PLSDA CV ################################################### ################################################################################################ defultCompNo <- reactive({ PLSR.Anal() input$file1 no=GetDefaultPLSCVComp() return(no) }) output$CVcompNo <- renderUI({ numericInput("crossVNo", label=h5("Maximum components to search:"), value=defultCompNo()) }) CVpara1 <- eventReactive(input$update9,{ return(input$crossVNo) }) CVpara2 <- eventReactive(input$update9,{ meth='L' if(input$CVMethod==1){ meth='T' } else { meth='L' } return(meth) }) CVpara3 <- eventReactive(input$update9,{ per="" if(input$performMea==1){ per="Q2" } else if (input$performMea==3){ per="R2" } else { per=" " } return(per) }) output$PLSDACVPlot <- renderUI({ plotOutput("PLSDACVP", width = paste0(input$plsCVWidth, "%"), height = input$plsCVHeight) }) output$PLSDACVP <- renderPlot({ PLSDA.CV(methodName= CVpara2(), compNum=CVpara1(), choice=CVpara3()) MyPlotPLS.Classification() }) output$CVTab <- renderTable({ PLSDA.CV(methodName= CVpara2(), compNum=CVpara1(), choice=CVpara3()) MyCVTable() },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) ################################################################################################ ################################### PLSDA IMP ################################################## ################################################################################################ vips <- reactive({ input$file1 vipNames=GetPLSSigColNames("vip") return(vipNames) }) coefN <- reactive({ input$file1 coefNames=GetPLSSigColNames("coef") return(coefNames) }) output$vip <- renderUI({ selectInput("impVIP", label=NULL, choices=vips()) }) output$coef <- renderUI({ selectInput("impCoef", label=NULL, choices=coefN()) }) impPara1 <- eventReactive(input$update10,{ m="" if(input$impMeasure1==TRUE){ m="vip" } else if (input$impMeasure2==TRUE) { m="coef" } return(m) }) impPara2 <- eventReactive(input$update10,{ impNm=" " if(impPara1()=='vip'){ impNm = input$impVIP } else if (impPara1()=="coef"){ impNm = input$impCoef } return(impNm) }) impPara3 <- eventReactive(input$update10,{ return(input$topFeatNo) }) impPara4 <- eventReactive(input$update10,{ return(input$BW) }) output$PLSDAImp <- renderPlot({ MyPlotPLS.Imp(type=impPara1(), feat.nm=impPara2(), feat.num=impPara3(), color.BW=impPara4()) }) output$ImpFeatTab <- renderDataTable({ if(impPara1()=="vip"){ VIPTab() }else if(impPara1()=="coef"){ COEFTab() } },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) ################################################################################################ ################################### PLSDA Perm ################################################# ################################################################################################ permuPara1 <- eventReactive(input$update11,{ testst=" " if(input$permTest==1){ testst="accu" } else { testst=" " } return(testst) }) permuPara2 <- eventReactive(input$update11,{ reps=100 if(input$permNumber==1){ reps=100 } else if (input$permNumber==2){ reps=1000 } else { reps=2000 } return(reps) }) output$permPlot <- renderPlot({ options(warn=-1) PLSDA.Permut(num=permuPara2(), type=permuPara1()) MyPlotPLS.Permutation() options(warn=0) }) ############################################################################### ############################### OPLSDA 2D Score ############################### ############################################################################### oplsSPara1 <- eventReactive(input$update12,{ conf=0.95 if(input$opls95==TRUE){ conf=0.95 } else { conf=0 } return(conf) }) oplsSPara2 <- eventReactive(input$update12,{ na=1 if(input$oplsSmpNam==TRUE){ na=1 } else { na=0 } return(na) }) oplsSPara3 <- eventReactive(input$update12,{ sca=0 if(input$oplsgScale==TRUE){ sca=1 } else { sca=0 } return(sca) }) output$oplsScore <- renderUI({ plotOutput("oplsSc", width = paste0(input$OPlsWidth, "%"), height = input$OPlsHeight) }) output$oplsSc <- renderPlot({ OPLSR.Anal() MyPlotOPLS2DScore(reg=oplsSPara1(), show=oplsSPara2(), grey.scale=oplsSPara3()) }) ############################################################################### ############################### OPLSDA S-plot ################################# ############################################################################### output$color6 <- renderUI({ colorV= sample(colorList, dataLvls()) textInput("colorVector6", h5("Colors for the interval plot graph"), value = toString(colorV)) }) observeEvent(input$update13, output$oplsSPlot <- renderPlot({ OPLSR.Anal() MyPlotOPLS.Splot(plotType="custom") })) click6 <- reactive({ validate( need(input$plot_click6$x != "", "Click a point for a interval plot") ) OPLSR.Anal() s <- as.matrix(dataSet$norm); T <- as.matrix(analSet$oplsda$scoreMN) n <- c() b <- c() c <- c() for(i in 1:ncol(s)){ n[i]=i } for (i in 1:ncol(s)) { scov <- cov(s[,i], T) b <- matrix(c(b, scov), ncol=1) } for (i in 1:nrow(b)) { den <- apply(T, 2, sd)*sd(s[,i]) corr1 <- b[i,]/den c <- matrix(c(c, corr1), ncol=1) } x=as.numeric(nearPoints(data.frame(n,b,c), input$plot_click6, xvar="b", yvar="c", maxpoints=1)[1]) return(x) }) observeEvent(input$plot_click6, output$oplsCmp1 <-renderPlot({ colString=input$colorVector6 col=strsplit(colString,",")[[1]] IntervalPlot(cmpdNm = click6(), dpi=200, colors=col) })) observeEvent(input$plot_click6, output$oplsCmp2 <-renderPlot({ colString=input$colorVector6 colr=strsplit(colString,",")[[1]] PlotCmpdBoxView(cmpdNm=click6(), dpi=200,col=colr) })) output$info6 <- renderText({ paste0("x=", input$plot_click6$x, "\ny=", input$plot_click6$y) }) output$OPLSDATab <- renderDataTable({ OPLSR.Anal() MyPlotOPLS.Splot(plotType="custom") OPLSTab() },options = list(lengthMenu = c(10, 20,50,100), pageLength = 20) ) ############################################################################### ############################### OPLSDA Overview Plot ########################## ############################################################################### observeEvent(input$update14, output$OPLSOver <- renderPlot({ MyPlotOPLS.MDL() })) ############################################################################### ############################### OPLSDA Permutation plot ####################### ############################################################################### oplsdaPermPara1 <- eventReactive(input$update15,{ return(as.numeric(input$oplsdaPer)) }) output$OPLSDAPerm <- renderPlot({ MyPlotOPLS.Permutation(num=oplsdaPermPara1()) }) ############################################################################### ########################### Feature Identification SAM ######################## ############################################################################### ispaired5 <- reactive({ input$file1 return(dataSet$paired) }) output$SAMtxt<-renderUI({ if (numOfcls()==TRUE){ textOutput("SAMinfo") } }) output$SAMinfo <- renderText({ "Perform multi-class SAM based on F-statistic" }) output$SAMPara <- renderUI({ if (numOfcls()==FALSE){ checkboxInput("sampara", label = h5("Non-parametric tests:"), value = FALSE) } }) output$SAMAnlTyp <- renderUI({ if (numOfcls()==FALSE){ if(ispaired5()==TRUE){ selectInput("samanltyp", label = h5("Analysis Type"), choices = list("Unpaired" = 1, "Paired" = 2), selected = 1) } else { selectInput("samanltyp", label = h5("Analysis Type"), choices = list("Unpaired" = 1), selected = 1) } } }) output$SAMVar <- renderUI({ if(numOfcls()==FALSE){ selectInput("samvar", label =h5("Group variance"), choices = list("Equal"=1, "Unequal"=2), selected =1) } }) SAMPara1 <- eventReactive(input$compute1,{ method="d.stat" if(input$sampara==FALSE){ method="d.stat" } else { method="non-parametric" } return(method) }) SAMPara2 <- eventReactive(input$compute1,{ paired=FALSE if(input$samanltyp==1){ paired=FALSE } else { paired= TRUE } return(paired) }) SAMPara3 <- eventReactive(input$compute1,{ grpVar=TRUE if(input$samvar==1){ grpVar=TRUE } else { grpVar=FALSE } return(grpVar) }) output$emptyPlot<-renderPlot({ if(numOfcls()==TRUE){ SAM.Anal() #MyPlotSAM.FDR(delta=input$samfdr) } else { SAM.Anal(method=SAMPara1(), paired=SAMPara2(), varequal=SAMPara3()) } # MyPlotSAM.FDR(delta=input$samfdr) }) observeEvent(input$compute1,{ output$SAMFDR <- renderUI({ numericInput("samfdr", label = h5("Update the delta to control FDR:"), value = GetSuggestedSAMDelta()) }) }) #FDRPara <- eventReactive(input$compute1,{ # return(dv) #}) #output$SAMFDRPLOT<-renderPlot({ # MyPlotSAM.FDR(delta=input$samfdr) #}) observeEvent(input$compute1,{ output$SAMFDRPLOT<-renderPlot({ MyPlotSAM.FDR(delta=input$samfdr) }) }) observeEvent(input$compute1,{ output$SAMResultPlot<-renderPlot({ SAMResPlot(delta=input$samfdr) }) }) observeEvent(input$compute1,{ output$SAMTab<-renderDataTable({ SAMTable(del=input$samfdr) }) }) ########################################################################## ################################# Dendrogram ############################# ########################################################################## dendroPara1 <- eventReactive(input$update18,{ smplDist='euclidean' if(input$DendroDist==1){ smplDist='euclidean' } else if (input$DendroDist==2){ smplDist='spearman' } else { smplDist='pearson' } return(smplDist) }) dendroPara2 <- eventReactive(input$update18,{ clstDist="ward" if(input$DendroAlgor==1){ clstDist="ward" } else if (input$DendroAlgor==2){ clstDist="average" } else if (input$DendroAlgor==3){ clstDist="complete" } else { clstDist="single" } return(clstDist) }) output$DendroPlot <- renderPlot({ MyPlotHCTree(smplDist=dendroPara1(), clstDist=dendroPara2()) }) ############################################################################## ################################# Cluster Heatmap ############################ ############################################################################## CAHeatPara1 <- eventReactive(input$update19,{ DistMeasure='euclidean' if(input$CAHeatDist==1){ DistMeasure='euclidean' } else if (input$CAHeatDist==2){ DistMeasure='pearson' } else { DistMeasure='minkowski' } return(DistMeasure) }) CAHeatPara2 <- eventReactive(input$update19,{ clstMethod = 'ward' if(input$CAHeatAlgor==1){ clstMethod='ward' } else if (input$CAHeatAlgor==2){ clstMethod='average' } else if (input$CAHeatAlgor==3){ clstMethod='complete' } else { clstMethod='single' } return(clstMethod) }) CAHeatPara3 <- eventReactive(input$update19,{ colorContrast='default' if(input$CAHeatCC==1){ colorContrast='default' } else if (input$CAHeatCC==2){ colorContrast ='gbr' } else if (input$CAHeatCC==3){ colorContrast = 'heat' } else if (input$CAHeatCC==4){ colorContrast ='topo' } else if (input$CAHeatCC==5){ colorContrast ='gray' } else if (input$CAHeatCC==6){ colorContrast ='rwb' } else if (input$CAHeatCC==7){ colorContrast ='rwg' } else if (input$CAHeatCC==8){ colorContrast ='wnvyb' } return(colorContrast) }) CAHeatPara4 <- eventReactive(input$update19,{ ovrVOpt="overview" if(input$CAHeatView==1){ ovrVOpt="overview" } else { ovrVOpt="detail" } return(ovrVOpt) }) CAHeatPara5 <- eventReactive(input$update19,{ return(input$CAHeatView01) }) CAHeatPara6 <- eventReactive(input$update19,{ organiseBy1=TRUE if(CAHeatPara5()==TRUE){ if(input$CAHeatView02==1){ organiseBy1=FALSE } else if (input$CAHeatView02==3) { organiseBy1=FALSE } else { organiseBy1=TRUE } return(organiseBy1) } else { return(organiseBy1) } }) CAHeatPara7 <- eventReactive(input$update19,{ organiseBy2=TRUE if(CAHeatPara5()==TRUE){ if(input$CAHeatView02==2){ organiseBy2=FALSE } else if (input$CAHeatView02==3){ organiseBy2=FALSE } else { organiseBy2=TRUE } return(organiseBy2) } else { return(organiseBy2) } }) CAHeatPara8 <- eventReactive(input$update19,{ return(input$CAHeatView03) }) CAHeatPara9 <- eventReactive(input$update19,{ return(input$CAHeatView04) }) CAHeatPara10 <- eventReactive(input$update19,{ methodnm="tanova" if(input$CAHeatView05==1){ methodnm="tanova" } else if (input$CAHeatView05==2){ methodnm="vip" } else { methodnm="rf" } return(methodnm) }) CAHeatPara11<-eventReactive(input$update19,{ return(input$CAHeatView06) }) CAHeatPara12 <- eventReactive(input$update19,{ dataOpt="norm" if(input$CAHeatData01==1){ dataOpt="norm" } else { dataOpt="org" } return(dataOpt) }) CAHeatPara13 <-eventReactive(input$update19,{ scaleOpt="column" if(input$CAHeatData02==1){ scaleOpt="row" } else if (input$CAHeatData02==2){ scaleOpt="column" } else { scaleOpt="none" } return(scaleOpt) }) output$plot.ui <- renderUI({ plotOutput("CAHeatPlot", width = paste0(input$HeatWidth, "%"), height = input$HeatHeight) }) output$CAHeatPlot <-renderPlot({ if(CAHeatPara8()==TRUE){ MyPlotSubHeatMap(dataOpt=CAHeatPara12(), scaleOpt=CAHeatPara13(), smplDist=CAHeatPara1(), clstDist= CAHeatPara2(), palette=CAHeatPara3(), method.nm=CAHeatPara10(), top.num=CAHeatPara9(), viewOpt=CAHeatPara4(), rowV=CAHeatPara6(), colV=CAHeatPara7(), border=CAHeatPara11()) } else { MyPlotHeatMap(dataOpt=CAHeatPara12(), scaleOpt=CAHeatPara13(), smplDist=CAHeatPara1(), clstDist=CAHeatPara2(), palette=CAHeatPara3(), viewOpt=CAHeatPara4(), rowV=CAHeatPara6(), colV=CAHeatPara7(), var.inx=NA, border=CAHeatPara11()) } }) ############################################################################# ############################## K-means ###################################### ############################################################################# KMPara1 <- eventReactive(input$update20,{ return(input$KMclstNm) }) output$KMPlot <- renderPlot({ Kmeans.Anal(clust.num=KMPara1()) MyPlotKmeans() }) output$mytable1 <- renderTable({ Kmeans.Anal(clust.num=KMPara1()) MyGetAllKMClusterMembers() }) ############################################################################# ############################## SOM ########################################## ############################################################################# SOMPara1 <- eventReactive(input$update21,{ return(as.numeric(input$SOMXD)) }) SOMPara2 <- eventReactive(input$update21,{ return(as.numeric(input$SOMYD)) }) SOMPara3 <- eventReactive(input$update21,{ intMet='linear' if(input$SOMInit==1){ intMet='linear' } else if (input$SOMInit==2){ intMet='random' } else { intMet='sample' } return(intMet) }) SOMPara4 <- eventReactive(input$update21,{ SOMN='gaussian' if(input$SOMNeigh==1){ SOMN='gaussian' } else { SOMN='bubble' } return(SOMN) }) output$SOMPlot <- renderPlot({ SOM.Anal(x.dim=SOMPara1(), y.dim=SOMPara2(), initMethod=SOMPara3(), neigb = SOMPara4()) MyPlotSOM() }) output$SOMTab <- renderTable({ SOM.Anal(x.dim=SOMPara1(), y.dim=SOMPara2(), initMethod=SOMPara3(), neigb = SOMPara4()) MyGetAllSOMClusterMembers() }) ############################################################################# ############################## Random Forest ################################ ############################################################################# RFPara1 <- eventReactive(input$update22,{ Nu=500 if(input$RFTreesNu==1){ Nu=500 } else if (input$RFTreesNu==2){ Nu=1000 } else if (input$RFTreesNu==3){ Nu=2000 } else { Nu=5000 } return(Nu) }) RFPara2 <- eventReactive(input$update22,{ return(as.numeric(input$RFPredNu)) }) output$RFPlot <- renderPlot({ RF.Anal(treeNum=RFPara1(), tryNum=RFPara2()) MyPlotRF.Classify() }) observeEvent(input$update22,{ output$RFOOB<-renderText({ paste("The OOB Error is: ",toString(GetRFOOB())) }) }) output$RFTab <- renderTable({ RF.Anal(treeNum=RFPara1(), tryNum=RFPara2()) MyGetRFConf.Table() }) ################################################################################# ######################### Random Forest VIP Plot ################################ ################################################################################# observeEvent(input$update23,{ output$RFVipPlot <- renderPlot({ MyPlotRF.VIP() }) }) observeEvent(input$update23,{ output$RFVipHelp<-renderText({ paste("Features ranked by their contributions to classification accuracy (Mean Dicrease Accuracy) ") }) }) observeEvent(input$update23,{ output$RFDaTTab = renderDataTable({ MyRFVipTab() }) }) ################################################################################# ######################### Random Forest Outlier detection ####################### ################################################################################# observeEvent(input$update24,{ output$RFOutTxt<-renderText({ paste("Only top 5 potential outliers are labeled ") }) }) observeEvent(input$update24,{ output$RFOutPlot = renderPlot({ MyPlotRF.Outlier() }) }) ######################################################################################################## ################################## SVM Plot ############################################################ ######################################################################################################## SVMPara1 <- eventReactive(input$update25,{ cvType=10 if(input$SVMVMet==1){ cvType=10 } else if (input$SVMVMet==2){ cvType="LOO" } else if (input$SVMVMet==3){ cvType="bootstrape" } return(cvType) }) output$SVMPlot <- renderPlot({ RSVM.Anal(cvType=SVMPara1()) MyPlotRSVM.Classification() }) ############################################################################################ ####################################### SVM Importance plot ################################### ############################################################################################ observeEvent(input$update26,{ output$SVMImportPlot = renderPlot({ MyPlotRSVM.Cmpd() }) }) PLSRdataLvls <- reactive({ input$file2 lvls = length(levels(dataSet$proc.cls)) return(lvls) }) PLSRnumOfcls <- reactive({ input$file2 if(dataSet$cls.num>2){ return(TRUE) } else { return(FALSE) } }) PLSRispaired <- reactive({ input$file2 return(dataSet$paired) }) output$Pcontents <- renderText({ inFile2 <- input$file2 if (is.null(inFile2)){ return(NULL) } datatype <- "conc" format1 <- "rowu" format2 <- "cont" ispaired=FALSE if(input$plsrRadioButton1==1){ datatype <- "conc" } else if (input$plsrRadioButton1== 2){ datatype <- "specbin" } else { datatype <- "pktable" } if (input$plsrSelect1==1){ format1="rowu" } else if (input$plsrSelect1==2){ format1 <- "colu" } else if (input$plsrSelect1==3){ format1 <- "rowp" } else { format1 <- "colp" } if(format1=="rowp"|format1=="colp"){ ispaired=TRUE } InitDataObjects(datatype, "stat", paired=ispaired) Read.TextData(inFile2$datapath, format=format1, lbl.type=format2) SanityCheckData() ReplaceMin() dataSet$check.msg }) PLSRMissVPara1 <- eventReactive(input$Pcalc1,{ return(input$PmissValue1) }) PLSRMissVPara2 <- eventReactive(input$Pcalc1,{ return(input$PmissValue2/100) }) PLSRMissVPara3 <- eventReactive(input$Pcalc1,{ return(input$PmissValue3) }) PLSRMissVPara4 <- eventReactive(input$Pcalc1,{ return(input$PmissValue4) }) PLSRMissVPara5 <- eventReactive(input$Pcalc1,{ MVmet="min" if(input$PmissValue5==2|3|4){ if(input$PmissValue5==2){ MVmet="mean" } else if (input$PmissValue5==3){ MVmet="median" } else if (input$PmissValue5==4){ MVmet="min" } } return(MVmet) }) PLSRMissVPara6 <- eventReactive(input$Pcalc1,{ MVmet="knn" if(input$PmissValue6==2|3|4|5){ if(input$PmissValue6==2){ MVmet="knn" } else if (input$PmissValue6==3){ MVmet="ppca" } else if (input$PmissValue6==4){ MVmet="bpca" } else if (input$PmissValue6==5){ MVmet="svdImpute" } } return(MVmet) }) output$PMVtext1<-renderText({ if(PLSRMissVPara1()==TRUE){ RemoveMissingPercent(int.mat=dataSet$preproc, percent=PLSRMissVPara2()) } if(PLSRMissVPara3()==TRUE){ ImputeVar(int.mat=dataSet$preproc, method="colmin") }else if(PLSRMissVPara4()==TRUE){ ImputeVar(int.mat=dataSet$preproc, method="exclude") } else if (PLSRMissVPara5()=="knn"|PLSRMissVPara5()=="ppca"|PLSRMissVPara5()=="bpca"){ ImputeVar(int.mat=dataSet$preproc, method=PLSRMissVPara5()) } else if (PLSRMissVPara6()=="mean"| PLSRMissVPara6()=="median"| PLSRMissVPara6()=="min"| PLSRMissVPara6()=="svdImpute"){ ImputeVar(int.mat=dataSet$preproc, method=PLSRMissVPara6()) } ReplaceMin() paste("Missing value calculations complete") }) PLSRsampleGroups <-reactive({ input$file2 list=rownames(dataSet$proc) return(list) }) PLSRsampleFeatures <-reactive({ input$file2 list2=colnames(dataSet$proc) return(list2) }) PLSRpooledSamples <-reactive({ input$file2 numOfLvl=length(levels(dataSet$proc.cls)) groups=c() for(i in 1:numOfLvl){ groups[i]=levels(dataSet$proc.cls)[i] } return(groups) }) output$Prefsample <- renderUI({ selectInput("Prsmpl", label = h4("Specific reference samples"), choices = PLSRsampleGroups()) }) output$Ppoolsample <-renderUI({ selectInput("Ppsmpl", label = h4("Pooled samples from group"), choices=PLSRpooledSamples()) }) output$PrefFeat <- renderUI({ selectInput("Prf", label=h4("Reference features"), choices= PLSRsampleFeatures()) }) PLSRnormMethod1 <- eventReactive(input$Pgo,{ rowNorm='Nothing' if(input$PradioButtons2==1){ rowNorm = 'Nothing' } else if (input$PradioButtons2==2){ rowNorm = 'SumNorm' } else if(input$PradioButtons2==3){ rowNorm = 'MedianNorm' } else if(input$PradioButtons2==4){ rowNorm = "ProbNorm" } else if (input$PradioButtons2==5){ rowNorm = "ProbNorm2" } else { rownNorm="CompNorm" } }) PLSRnormMethod2 <- eventReactive(input$Pgo, { transNorm='Nothing' if (input$PradioButtons3==1){ transNorm='Nothing' } else if (input$PradioButtons3==2){ transNorm='LogNorm' } else { transNorm='CrNorm' } }) PLSRnormMethod3 <- eventReactive(input$Pgo, { scaleNorm='Nothing' if(input$PradioButtons4==1){ scaleNorm='Nothing' } else if (input$PradioButtons4==2){ scaleNorm='MeanCenter' } else if (input$PradioButtons4==3){ scaleNorm= 'AutoNorm' } else if (input$PradioButtons4==4){ scaleNorm='ParetoNorm' } else if (input$PradioButtons4==5){ scaleNorm='RangeNorm' } else { scaleNorm='VastNorm' } }) PLSRnormMethod4 <- eventReactive(input$Pgo,{ return(toString(input$Prsmpl)) }) PLSRnormMethod5 <- eventReactive(input$Pgo,{ return(toString(input$Ppsmpl)) }) PLSRnormMethod6 <- eventReactive(input$Pgo,{ return(toString(input$Prf)) }) output$PnormPlot <- renderPlot({ if(PLSRnormMethod1()=="ProbNorm"){ Normalization( rowNorm="ProbNormF", transNorm=PLSRnormMethod2(), scaleNorm=PLSRnormMethod3(), ref=PLSRnormMethod4(), ratio=FALSE, ratioNum=20) } else if (PLSRnormMethod1()=="ProbNorm2"){ Normalization( rowNorm="ProbNormT", transNorm=PLSRnormMethod2(), scaleNorm=PLSRnormMethod3(), ref=PLSRnormMethod5(), ratio=FALSE, ratioNum=20) } else if (PLSRnormMethod1()=="CompNorm"){ Normalization( rowNorm="CompNorm", transNorm=PLSRnormMethod2(), scaleNorm=PLSRnormMethod3(), ref=PLSRnormMethod6(), ratio=FALSE, ratioNum=20) } else { Normalization( rowNorm=PLSRnormMethod1(), transNorm=PLSRnormMethod2(), scaleNorm=PLSRnormMethod3(), ref=NULL, ratio=FALSE, ratioNum=20) } PlotNormSum() }) plrNumbers1<- reactive({ input$file2 n=c() comp.num <- dim(dataSet$norm)[1]-1; if(comp.num > 8) { comp.num <- 8; } max=comp.num for(i in 1:max-1){ n[i]=i+1 } x=n }) plrNumbers2<- reactive({ input$file2 return(GetDefaultPLSPairComp()) }) output$plsrNum <- renderUI({ selectInput("plsrCNo", label = h5("Select the number of components to be taken into account by the PLSR model"), selected =plrNumbers2(), choices=plrNumbers1()) }) observeEvent(input$PLSRButton,output$PLSRcolor1 <- renderUI({ textInput("PLSRC1", label=h5("Color for regression plot:"), value="green" ) })) observeEvent(input$PLSRButton,output$PLSRcolor2 <- renderUI({ textInput("PLSRC2", label=h5("Color for CV plot:"), value="red" ) })) observeEvent(input$PLSRButton,output$PLSRptSize <- renderUI({ numericInput("PLSRpts", label=h5("Point Size"), value=0.5 ) })) PLSRPARA1<-reactive({ return(as.numeric(input$plsrCNo)) }) PLSRPARA2<-reactive({ return(input$PLSRC1) }) PLSRPARA3<-reactive({ return(input$PLSRC2) }) PLSRPARA4<-reactive({ return(as.numeric(input$PLSRpts)) }) # next decclare some reactive expressions for these input paraemter then plot the graph. # finally work on the table observeEvent(input$PLSRButton,output$plsrModel <- renderPlot({ PlsRegPlot(no=PLSRPARA1(), color=PLSRPARA2(), ptsize=PLSRPARA4()) })) observeEvent(input$PLSRButton,output$plsrCV <- renderPlot({ plsRegPlotCV(no=PLSRPARA1(), color=PLSRPARA3(), ptsize=PLSRPARA4()) })) observeEvent(input$PLSRButton,output$plsrOverLay <- renderPlot({ predOvrlyPlt(no=PLSRPARA1(), color1=PLSRPARA2(), color2=PLSRPARA3(), ptsize=PLSRPARA4()) })) observeEvent(input$PLSRButton,output$PLSRTab <- renderTable({ m=PLSR.Table(no=PLSRPARA1()) k=cbind(row.names(m),m) colnames(k)[1] <- c(" ") return(k) })) ################################################################################################# ################################################################################################# ################################################################################################# ################################################################################################# ################################### ############################# ################################### ORIGINAL METABOANALYST R SCRIPTS ############################ ################################### ############################ ################################################################################################# ################################################################################################# ################################################################################################# ################################################################################################# ################################################## ## R script for MetaboAnalyst ## Description: data I/O ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### # create objects for storing data # data type: list, conc, specbin, pktable, nmrpeak, mspeak, msspec # anal type: stat, pathora, pathqea, msetora, msetssp, msetqea, ts, cmpdmap, smpmap InitDataObjects <- function(dataType, analType, paired=F){ dataSet <<- list(); dataSet$type <<- dataType; dataSet$design.type <<- "regular"; # one factor to two factor dataSet$cls.type <<- "disc"; # default until specified otherwise dataSet$format <<- "rowu"; dataSet$paired <<- paired; analSet <<- list(); analSet$type <<- analType; imgSet <<- list(); msg.vec <<- vector(mode="character"); current.msetlib <<- NULL; conc.db <<- NULL; cmpd.db <<- NULL; # record the current name(s) to be transferred to client require('Cairo'); # plotting required by all # fix Mac font issue CairoFonts("Arial:style=Regular","Arial:style=Bold","Arial:style=Italic","Helvetica","Symbol") print("R objects intialized ..."); } # for two factor time series only SetDesignType <-function(design){ dataSet$design.type <<- tolower(design); } # Read in the user uploaded CSV or TXT data, # format: rowp, rowu, colp, colu # label type: disc (for discrete) or cont (for continuous) Read.TextData<-function(filePath, format="rowu", lbl.type="disc"){ dataSet$cls.type <<- lbl.type; dataSet$format <<- format; formatStr <- substr(filePath, nchar(filePath)-2, nchar(filePath)) if(formatStr == "txt"){ dat <-try(read.table(filePath,header=TRUE,check.names=F, as.is=T)); }else{ # note, read.csv is more than read.table with sep="," dat <-try(read.csv(filePath,header=TRUE,check.names=F, as.is=T)); } # try to guess column numers and class labels (starts with #) from the top 20 rows if(class(dat) == "try-error") { AddErrMsg("Data format error. Failed to read in the data!"); AddErrMsg("Please check the followings: "); AddErrMsg("Either sample or feature names must in UTF-8 encoding; Latin, Greek letters are not allowed."); AddErrMsg("We recommend using a combination of English letters, underscore, and numbers for naming purpose"); AddErrMsg("Make sure sample names and feature (peak, compound) names are unique;"); AddErrMsg("Missing values should be blank or NA without quote."); return(0); } if(ncol(dat) == 1){ AddErrMsg("Error: Make sure the data table is saved as comma separated values (.csv) format!"); AddErrMsg("Please also check the followings: "); AddErrMsg("Either sample or feature names must in UTF-8 encoding; Latin, Greek letters are not allowed."); AddErrMsg("We recommend to use a combination of English letters, underscore, and numbers for naming purpose."); AddErrMsg("Make sure sample names and feature (peak, compound) names are unique."); AddErrMsg("Missing values should be blank or NA without quote."); return(0); } msg <- NULL; if(substring(format,4,5)=="ts"){ # two factor time series data if(substring(format,1,3)=="row"){ # sample in row msg<-c(msg, "Samples are in rows and features in columns"); smpl.nms <-dat[,1]; all.nms <- colnames(dat); facA.lbl <- all.nms[2]; cls.lbl<-facA <- dat[,2]; # default assign facA to cls.lbl in order for one-factor analysis facB.lbl <- all.nms[3]; facB <- dat[,3]; conc <- dat[,-c(1:3)]; var.nms <- colnames(conc); }else{ # sample in col msg<-c(msg, "Samples are in columns and features in rows."); all.nms <- dat[,1]; facA.lbl <- all.nms[1]; cls.lbl <- facA <- dat[1,-1]; facB.lbl <- all.nms[2]; facB <- dat[2,-1]; var.nms <- dat[-c(1:2),1]; conc<-t(dat[-c(1:2),-1]); smpl.nms <- rownames(conc); } facA <- as.factor(as.character(facA)); facB <- as.factor(as.character(facB)); if(dataSet$design.type =="time" | dataSet$design.type =="time0"){ # determine time factor if(!(tolower(facA.lbl) == "time" | tolower(facB.lbl) == "time")){ AddErrMsg("No time points found in your data"); AddErrMsg("The time points group must be labeled as Time"); return(0); } } }else{ if(substring(format,1,3)=="row"){ # sample in row msg<-c(msg, "Samples are in rows and features in columns"); smpl.nms <-dat[,1]; dat[,1] <- NULL; if(lbl.type == "qc"){ rownames(dat) <- smpl.nms; dataSet$orig<<-dat; dataSet$cmpd<<-colnames(dat); return(1); } cls.lbl <- dat[,1]; conc <- dat[,-1]; var.nms <- colnames(conc); }else{ # sample in col msg<-c(msg, "Samples are in columns and features in rows."); var.nms <- dat[-1,1]; dat[,1] <- NULL; smpl.nms <- colnames(dat); cls.lbl <- dat[1,]; conc<-t(dat[-1,]); } } # free memory dat <- NULL; msg<-c(msg, "The uploaded file is in comma separated values (.csv) format."); # try to remove empty line if present # identified if no sample names provided empty.inx <- is.na(smpl.nms) | smpl.nms == "" if(sum(empty.inx) > 0){ msg<-c(msg, paste("", sum(empty.inx), "empty rows were detected and excluded from your data.")); smpl.nms <- smpl.nms[!empty.inx]; cls.lbl <- cls.lbl[!empty.inx]; conc <- conc[!empty.inx, ]; } # try to check & remove empty lines if class label is empty # Added by B. Han empty.inx <- is.na(cls.lbl) | cls.lbl == "" if(sum(empty.inx) > 0){ if(analSet$type != "roc"){ msg<-c(msg, paste("", sum(empty.inx), "empty labels were detected and excluded from your data.")); smpl.nms <- smpl.nms[!empty.inx]; cls.lbl <- cls.lbl[!empty.inx]; conc <- conc[!empty.inx, ]; }else{ # force all NA to empty string, otherwise NA will become "NA" class label cls.lbl[is.na(cls.lbl)] <- ""; msg<-c(msg, paste("", sum(empty.inx), "new samples were detected from your data.")); } } if(analSet$type == "roc"){ if(length(unique(cls.lbl[!empty.inx])) > 2){ AddErrMsg("ROC analysis is only defined for two-group comparisions!"); return(0); } } # try to remove check & remove empty line if sample name is empty empty.inx <- is.na(smpl.nms) | smpl.nms == ""; if(sum(empty.inx) > 0){ msg<-c(msg,paste("", sum(empty.inx), "empty samples were detected and excluded from your data.")); smpl.nms <- smpl.nms[!empty.inx]; cls.lbl <- cls.lbl[!empty.inx]; conc <- conc[!empty.inx, ]; } # check for uniqueness of dimension name if(length(unique(smpl.nms))!=length(smpl.nms)){ dup.nm <- paste(smpl.nms[duplicated(smpl.nms)], collapse=" ");; AddErrMsg("Duplicate sample names are not allowed!"); AddErrMsg(dup.nm); return(0); } # try to remove check & remove empty line if feature name is empty empty.inx <- is.na(var.nms) | var.nms == ""; if(sum(empty.inx) > 0){ msg<-c(msg,paste("", sum(empty.inx), "empty features were detected and excluded from your data.")); var.nms <- var.nms[!empty.inx]; conc <- conc[,!empty.inx]; } if(length(unique(var.nms))!=length(var.nms)){ dup.nm <- paste(var.nms[duplicated(var.nms)], collapse=" "); AddErrMsg("Duplicate feature names are not allowed!"); AddErrMsg(dup.nm); return(0); } # now check for special characters in the data labels if(sum(is.na(iconv(smpl.nms)))>0){ na.inx <- is.na(iconv(smpl.nms)); nms <- paste(smpl.nms[na.inx], collapse="; "); AddErrMsg(paste("No special letters (i.e. Latin, Greek) are allowed in sample names!", nms, collapse=" ")); return(0); } if(sum(is.na(iconv(var.nms)))>0){ na.inx <- is.na(iconv(var.nms)); nms <- paste(var.nms[na.inx], collapse="; "); AddErrMsg(paste("No special letters (i.e. Latin, Greek) are allowed in feature names!", nms, collapse=" ")); return(0); } # only keep alphabets, numbers, ",", "." "_", "-" "/" smpl.nms <- gsub("[^[:alnum:]./_-]", "", smpl.nms); var.nms <- gsub("[^[:alnum:][:space:],'./_-]", "", var.nms); # allow space, comma and period cls.lbl <- ClearStrings(as.vector(cls.lbl)); # now assgin the dimension names rownames(conc) <- smpl.nms; colnames(conc) <- var.nms; # check if paired or not if(dataSet$paired){ label<-as.numeric(cls.lbl); dataSet$orig.cls<<-as.factor(ifelse(label>0,1,0)); dataSet$pairs<<-label; }else{ if(lbl.type == "disc"){ # check for class labels at least two replicates per class if(min(table(cls.lbl)) < 3){ AddErrMsg(paste ("A total of", length(levels(as.factor(cls.lbl))), "groups found with", length(smpl.nms), "samples.")); AddErrMsg("At least three replicates are required in each group!"); AddErrMsg("Or maybe you forgot to specify the data format?"); return(0); } dataSet$orig.cls <<-dataSet$cls <<-as.factor(as.character(cls.lbl)); if(substring(format,4,5)=="ts"){ dataSet$orig.facA <<-dataSet$facA <<- as.factor(as.character(facA)); dataSet$facA.lbl <<- facA.lbl; dataSet$orig.facB <<-dataSet$facB <<- as.factor(as.character(facB)); dataSet$facB.lbl <<- facB.lbl; } }else{ # continuous dataSet$orig.cls <<- dataSet$cls <<- as.numeric(cls.lbl); } } # for the current being to support MSEA and MetPA if(dataSet$type == "conc"){ dataSet$cmpd <<- var.nms; } dataSet$orig<<-conc; # copy to be processed in the downstream dataSet$read.msg<<-c(msg, paste("The uploaded data file contains ", nrow(conc), " (samples) by ", ncol(conc), " (", tolower(GetVariableLabel()), ") data matrix.", sep="")); return(1); } # Read peak list files # NMR peak list input should be two-column numeric value (ppm, int), change ppm to mz and add dummy 'rt' # MS peak list can be 2-col (mz, int), add dummy 'rt' # MS can also be 3-col (mz, rt, int) Read.PeakList<-function(foldername){ suppressMessages(require(xcms)); msg <- c("The uploaded files are peak lists and intensities data."); # the "upload" folder should contain several subfolders (groups) # each of the subfolder contains samples (.csv files) files<-dir(foldername, pattern=".[Cc][Ss][Vv]$", recursive=T, full.name=TRUE) if (length(files) == 0) { AddErrMsg("No peak list files (.csv) were found."); return(0); } snames <- gsub("\\.[^.]*$", "", basename(files)); msg<-c(msg, paste("A total of ", length(files), "samples were found.")); sclass <- gsub("^\\.$", "sample", dirname(files)); scomp <- strsplit(substr(sclass, 1, min(nchar(sclass))), ""); scomp <- matrix(c(scomp, recursive = TRUE), ncol = length(scomp)); i <- 1 while(all(scomp[i,1] == scomp[i,-1]) && i < nrow(scomp)){ i <- i + 1; } i <- min(i, tail(c(0, which(scomp[1:i,1] == .Platform$file.sep)), n = 1) + 1) if (i > 1 && i <= nrow(scomp)){ sclass <- substr(sclass, i, max(nchar(sclass))) } # some sanity check before proceeds sclass <- as.factor(sclass); if(length(levels(sclass))<2){ AddErrMsg("You must provide classes labels (at least two classes)!"); return(0); } # check for class labels at least three replicates per class if(min(table(sclass)) < 3){ AddErrMsg("At least three replicates are required in each group!"); return(0); } # check for unique sample names if(length(unique(snames))!=length(snames)){ AddErrMsg("Duplcate sample names are not allowed!"); dup.nm <- paste(snames[duplicated(snames)], collapse=" ");; AddErrMsg("Duplicate sample names are not allowed!"); AddErrMsg(dup.nm); return(0); } # change sample names to numbers samp.num<-seq(1:length(snames)); names(samp.num)<-snames; # create a matrix all.peaks compatible with xcmsSet@peaks matrix, so that grouping algorithm can be used directly # the matrix should have "mz", "rt", "into", "sample" 4 columns used for grouping # check 2 or 3 column ############## use try block to catch any error ############## pks<-try(as.matrix(read.csv(files[1], header=T))); if(class(pks) == "try-error") { AddErrMsg("The CSV file is not formatted correctly!"); return(0); }; ######################################################## n.col<-ncol(pks); if(n.col==2){ add=TRUE; }else if(n.col==3){ add=FALSE; }else{ AddErrMsg("Peak list file can only be 2 or 3 columns."); return(0); } all.peaks<-NULL; for(i in 1:length(files)){ print(files[i]); pks<-as.matrix(read.csv(files[i], header=T)); if(ncol(pks)!=n.col){ AddErrMsg("Columns in each file are not the same!"); return(0); } if(add){ # NMR ppm+int or MS mz+int pks<-cbind(pks[,1], 1000, pks[,2],samp.num[i]); }else{ pks<-cbind(pks,samp.num[i]); } all.peaks<-rbind(all.peaks, pks); } msg<-c(msg, paste("These samples contain a total of ", dim(all.peaks)[1], "peaks." )); msg<-c(msg, paste("with an average of ", round(dim(all.peaks)[1]/length(files), 1), "peaks per sample" )); colnames(all.peaks)<-c("mz","rt","int","sample"); peakSet<-list( peaks = all.peaks, ncol = n.col, sampclass = sclass, sampnames = snames ); dataSet$peakSet<<-peakSet; dataSet$read.msg<<-msg; return (1); } # read LC/GC-MS spectra(.netCDF, .mzXML, mzData) # use functions in XCMS package Read.MSspec<-function(folderName, profmethod='bin', fwhm=30, bw=30){ suppressMessages(require(xcms)); msfiles <- list.files(folderName, recursive=T, full.names=TRUE); # first do some sanity check b4 spending more time on that # note the last level is the file names, previous one should be the class label dir.table <- t(data.frame(strsplit(msfiles, "/"))); cls.all<-dir.table[,ncol(dir.table)-1]; smpl.all <- dir.table[,ncol(dir.table)]; # check for groups if(length(levels(as.factor(cls.all))) < 2){ dataSet$read.msg <<- "At least two groups are required!"; return(0); } # check for min samples in each group if(min(table(cls.all)) < 3){ dataSet$read.msg <<- "At least three replicates are required in each group!"; return(0); } # check for unique sample names if(length(unique(smpl.all))!=length(smpl.all)){ dataSet$read.msg <<- "Duplcate sample names are not allowed!"; return(0); } xset <- xcmsSet(msfiles, profmethod = profmethod, fwhm=fwhm); msg<-c(paste("In total,", length(xset@filepaths), "sample files were detected. "), paste("They are divided into ", length(levels(xset@phenoData[,1]))," classes: ", paste(levels(xset@phenoData[,1]), collapse=', '), ".", sep="")); xset<-group(xset, bw=bw); dataSet$xset.orig<<-xset; dataSet$read.msg<<-msg; return(1); } # peak list or spectra files can be paired, the pair information # is stored in a file with each line is a pair and names are separated by :, ReadPairFile<-function(filePath="pairs.txt"){ all.pairs<-scan(filePath, what='character', strip.white = T); labels<-as.vector(rbind(1:length(all.pairs), -(1:length(all.pairs)))); all.names <- NULL; for(i in 1:length(all.pairs)){ all.names=c(all.names, unlist(strsplit(all.pairs[i],":"), use.names=FALSE)); } names(labels)<-all.names; labels; } # save the processed data with class names SaveTransformedData<-function(){ if(!is.null(dataSet$orig)){ lbls <- NULL; tsFormat <- substring(dataSet$format,4,5)=="ts"; if(tsFormat){ lbls <- cbind(as.character(dataSet$orig.facA),as.character(dataSet$orig.facB)); colnames(lbls) <- c(dataSet$facA.lbl, dataSet$facB.lbl); }else{ lbls <- cbind("Label"= as.character(dataSet$orig.cls)); } orig.data<-cbind(lbls, dataSet$orig); if(dim(orig.data)[2]>200){ orig.data<-t(orig.data); } write.csv(orig.data, file="data_original.csv"); if(!is.null(dataSet$proc)){ if(tsFormat){ lbls <- cbind(as.character(dataSet$proc.facA),as.character(dataSet$proc.facB)); colnames(lbls) <- c(dataSet$facA.lbl, dataSet$facB.lbl); }else{ lbls <- cbind("Label"= as.character(dataSet$proc.cls)); } proc.data<-cbind(lbls, dataSet$proc); if(dim(proc.data)[2]>200){ proc.data<-t(proc.data); } write.csv(proc.data, file="data_processed.csv"); if(!is.null(dataSet$norm)){ if(tsFormat){ lbls <- cbind(as.character(dataSet$facA),as.character(dataSet$facB)); colnames(lbls) <- c(dataSet$facA.lbl, dataSet$facB.lbl); }else{ lbls <- cbind("Label"= as.character(dataSet$cls)); } # for ms peaks with rt and ms, insert two columns, without labels # note in memory, features in columns if(!is.null(dataSet$three.col)){ ids <- matrix(unlist(strsplit(colnames(dataSet$norm), "/")),ncol=2, byrow=T); colnames(ids) <- c("mz", "rt"); new.data <- data.frame(ids, t(dataSet$norm)); write.csv(new.data, file="peak_normalized_rt_mz.csv"); } norm.data<-cbind(lbls, dataSet$norm); if(dim(norm.data)[2]>200){ norm.data<-t(norm.data); } write.csv(norm.data, file="data_normalized.csv"); } } } } AddErrMsg<-function(msg){ if(!exists('msg.vec')){ msg.vec <<- vector(mode="character"); # store error messages } msg.vec <<- c(msg.vec, msg); } GetErrMsg<-function(){ return (msg.vec); } GetKEGG.PathNames<-function(){ return(names(metpa$path.ids)); } # given a vector of KEGGID, return a vector of KEGG compound names KEGGID2Name<-function(ids){ hit.inx<- match(ids, cmpd.db$kegg); return(cmpd.db[hit.inx, 3]); } # given a vector of KEGG pathway ID, return a vector of SMPDB IDs (only for hsa) KEGGPATHID2SMPDBIDs<-function(ids){ hit.inx<-match(ids, path.map[,1]); return(path.map[hit.inx, 3]); } # given a vector of HMDBID, return a vector of HMDB compound names HMDBID2Name<-function(ids){ hit.inx<- match(ids, cmpd.db$hmdb); return(cmpd.db[hit.inx, "name"]); } # given a vector of KEGGID, return a vector of HMDB ID KEGGID2HMDBID<-function(ids){ hit.inx<- match(ids, cmpd.db$kegg); return(cmpd.db[hit.inx, "hmdb_id"]); } # given a vector of HMDBID, return a vector of KEGG ID HMDBID2KEGGID<-function(ids){ hit.inx<- match(ids, cmpd.db$hmdb); return(cmpd.db[hit.inx, "kegg_id"]); } # save compound name for mapping Setup.MapData<-function(qvec){ dataSet$cmpd <<- qvec; } # save concentration data Setup.ConcData<-function(conc){ dataSet$norm <<- conc; } # save biofluid type for SSP Setup.BiofluidType<-function(type){ dataSet$biofluid <<- type; } GetLiteralGroupNames <- function(){ as.character(dataSet$proc.cls); } # all groups GetGroupNames <- function(){ cls.lbl <- dataSet$proc.cls; if(analSet$type=="roc"){ empty.inx <- is.na(cls.lbl) | cls.lbl == ""; # make sure re-factor to drop level lvls <- levels(factor(cls.lbl[!empty.inx])); }else{ lvls <- levels(cls.lbl); } return(lvls); } # groups entering analysis GetNormGroupNames <- function(){ levels(dataSet$cls); } SetOrganism <- function(org){ inmex.org <<- org; } ############################################################################################# ############################################################################################# ############################################################################################# ############################################################################################# ############################################################################################# ################################################## ## R script for MetaboAnalyst ## Description: processing raw data types ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### # basic sanity check for the content # return 1 or 0 based on the result SanityCheckData<-function(){ msg = NULL; cls=dataSet$orig.cls; dataSet$small.smpl.size <<- 0; # check class info if(dataSet$cls.type == "disc"){ if(substring(dataSet$format,4,5)=="ts"){ if(dataSet$design.type =="time"){ msg<-c(msg, "The data is time-series data."); }else{ msg<-c(msg, "The data is not time-series data."); } clsA.num <- length(levels(dataSet$facA)); clsB.num <- length(levels(dataSet$facB)); msg<-c(msg, paste(clsA.num, "groups were detected in samples for factor", dataSet$facA.lbl)); msg<-c(msg, paste(clsB.num, "groups were detected in samples for factor", dataSet$facB.lbl)); }else{ # checking if too many groups but a few samples in each group cls.lbl <- dataSet$orig.cls; min.grp.size <- min(table(cls.lbl)); cls.num <- length(levels(cls.lbl)); if(cls.num/min.grp.size > 3){ dataSet$small.smpl.size <<- 1; msg <- c(msg, "Too many groups with very small number of replicates!"); msg <- c(msg, "Only a subset of methods will be available for analysis!"); } msg<-c(msg, paste(cls.num, "groups were detected in samples.")); dataSet$cls.num <<- cls.num; dataSet$min.grp.size <<- min.grp.size; if(dataSet$paired){ msg<-c(msg,"Samples are paired."); # need to first set up pair information if not csv file if(!(dataSet$type=="conc" | dataSet$type=="specbin" | dataSet$type=="pktable" )){ pairs<-ReadPairFile(); # check if they are of the right length if(length(pairs)!=nrow(dataSet$orig)){ AddErrMsg("Error: the total paired names are not equal to sample names."); return(0); }else{ # matching the names of the files inx<-match(rownames(dataSet$orig),names(pairs)); #check if all matched exactly if(sum(is.na(inx))>0){ AddErrMsg("Error: some paired names not match the sample names."); return(0); }else{ dataSet$pairs<<-pairs[inx]; } } } pairs<-dataSet$pairs; lev<-unique(pairs); uni.cl<-length(lev); uni.cl.abs<-uni.cl/2; sorted.pairs<-sort(pairs,index=TRUE); if(!all(sorted.pairs$x==c(-uni.cl.abs:-1,1:uni.cl.abs))){ AddErrMsg("There are some problems in paired sample labels! "); if(uni.cl.abs != round(uni.cl.abs)){ AddErrMsg("The total samples must be of even number!"); }else{ AddErrMsg(paste("And class labels between ",-uni.cl.abs, " and 1, and between 1 and ",uni.cl.abs,".",sep="")); } return(0); }else{ msg<-c(msg,"The labels of paired samples passed sanity check."); msg<-c(msg, paste("A total of", uni.cl.abs, "pairs were detected.")); # make sure paired samples are sorted 1:n/2 and -1:-n/2 x<-sorted.pairs$ix[(uni.cl.abs+1):uni.cl] y<-sorted.pairs$ix[uni.cl.abs:1] index<-as.vector(cbind(x,y)); dataSet$pairs<<-pairs[index]; dataSet$orig.cls<<-cls[index]; dataSet$orig<<-dataSet$orig[index,]; } }else{ msg<-c(msg,"Samples are not paired."); } } } msg<-c(msg,"Only English letters, numbers, underscore, hyphen and forward slash (/) are allowed."); msg<-c(msg,"Other special characters or punctuations (if any) will be stripped off."); int.mat=dataSet$orig; # check numerical matrix rowNms <- rownames(int.mat); colNms <- colnames(int.mat); naNms <- sum(is.na(int.mat)); num.mat<-apply(int.mat, 2, as.numeric) if(sum(is.na(num.mat)) > naNms){ # try to remove "," in thousand seperator if it is the cause num.mat <- apply(int.mat,2,function(x) as.numeric(gsub(",", "", x))); if(sum(is.na(num.mat)) > naNms){ msg<-c(msg,"Non-numeric values were found and replaced by NA."); }else{ msg<-c(msg,"All data values are numeric."); } }else{ msg<-c(msg,"All data values are numeric."); } int.mat <- num.mat; rownames(int.mat)<-rowNms; colnames(int.mat)<-colNms; # check for columns with all constant (var =0) varCol <- apply(int.mat, 2, var, na.rm=T); constCol <- (varCol == 0 | is.na(varCol)); constNum <- sum(constCol, na.rm=T); if(constNum > 0){ msg<-c(msg, paste("", constNum, "columns with constant or a single value were found and deleted.")); int.mat <- int.mat[,!constCol]; } # check zero, NA values totalCount <-nrow(int.mat)*ncol(int.mat); naCount<-sum(is.na(int.mat)); naPercent<-round(100*naCount/totalCount,1) msg<-c(msg, paste("A total of ", naCount, " (", naPercent, "%) missing values were detected.", sep="")); msg<-c(msg, " By default, these values will be replaced by a small value. " #"Click Skip button if you accept the default practice", #"Or click Missing value imputation to use other methods" ); # obtain original half of minimal positive value (threshold) minConc<-min(int.mat[int.mat>0], na.rm=T)/2; dataSet$minConc<<-minConc; dataSet$preproc <<- as.data.frame(int.mat); dataSet$proc.cls <<- dataSet$orig.cls; if(substring(dataSet$format,4,5)=="ts"){ dataSet$proc.facA <<- dataSet$orig.facA; dataSet$proc.facB <<- dataSet$orig.facB; } dataSet$check.msg <<- c(dataSet$read.msg, msg); return(1); } GetGroupNumber<-function(){ return(dataSet$cls.num); } IsSmallSmplSize<-function(){ return(dataSet$small.smpl.size); } GetMinGroupSize<-function(){ return(dataSet$min.grp.size); } IsDataContainsNegative<-function(){ return(dataSet$containsNegative); } ################################################################ # Note: the following step directly modifies the dataSet$proc ################################################################# # replace zero/missing values by half of the minimum pos values, this is the default # also we will call this method after all missing value imputation if conducted ReplaceMin<-function(int.mat=as.matrix(dataSet$preproc)){ minConc<-dataSet$minConc; # replace zero and missing values # we leave nagative values unchanged! ? not sure if this is the best way int.mat[int.mat==0 | is.na(int.mat)] <- minConc; # note, this is last step of processing, also save to proc dataSet$proc <<- as.data.frame(int.mat); dataSet$replace.msg <<- paste("Zero or missing variables were replaced with a small value:", minConc); rm(int.mat); gc(); } # remove variable with over certain percentage values are missing RemoveMissingPercent<-function(int.mat=dataSet$preproc, percent=perct){ minConc<-dataSet$minConc; good.inx<-apply(is.na(int.mat), 2, sum)/nrow(int.mat)0){ x[is.na(x)]<-min(x,na.rm=T)/2; } x; }); msg <- c(msg,"Missing variables were replaced with the half of minimum values for each feature column."); }else if (method=="mean"){ new.mat<-apply(int.mat, 2, function(x){ if(sum(is.na(x))>0){ x[is.na(x)]<-mean(x,na.rm=T); } x; }); msg <- c(msg,"Missing variables were replaced with mean."); }else if (method == "median"){ new.mat<-apply(int.mat, 2, function(x){ if(sum(is.na(x))>0){ x[is.na(x)]<-median(x,na.rm=T); } x; }); msg <- c(msg,"Missing variables were replaced with median."); }else { if(method == "knn"){ suppressMessages(require(impute)); #print("loading for KNN..."); new.mat<-t(impute.knn(t(int.mat))$data); }else{ suppressMessages(require(pcaMethods)); if(method == "bpca"){ new.mat<-pca(int.mat, nPcs =5, method="bpca", center=T)@completeObs; }else if(method == "ppca"){ new.mat<-pca(int.mat, nPcs =5, method="ppca", center=T)@completeObs; }else if(method == "svdImpute"){ new.mat<-pca(int.mat, nPcs =5, method="svdImpute", center=T)@completeObs; } } msg <- c(msg, paste("Missing variables were imputated using", toupper(method))); } dataSet$proc <<- as.data.frame(new.mat); dataSet$replace.msg <<- msg; } # to deal with negative values, this is after dealing with negative values # so operate on dataSet$proc ClearNegatives <- function(int.mat=as.matrix(dataSet$proc), method="abs"){ if(dataSet$containsNegative){ if(method == "min"){ int.mat[int.mat < 0] <- dataSet$minConc; msg <- paste("Negative variables were replaced with a small value:", dataSet$minConc); }else if(method =="abs"){ int.mat <- abs(int.mat); msg <- paste("Negative variables were replaced with their absolute values"); }else{ # exclude good.inx<-apply(int.mat<0, 2, sum)==0 new.mat<-int.mat[,good.inx]; msg <- paste("Columns contains negative variables were excluded"); } dataSet$containsNegative <<- 0; dataSet$replace.msg <<- c(dataSet$replace.msg, msg); dataSet$proc <<- as.data.frame(int.mat); } } # Group peak list basede on position using xcms algorithm (align peaks wrt rt and mz) # NMR peaks change ppm -> mz and add dummy rt # 2-col MS need to add dummy rt # 3-col MS can be used directly # default mzwid MS 0.25 m/z, NMR 0.03 ppm # bw 30 for LCMS, 5 for GCMS GroupPeakList<-function(mzwid = 0.25, bw = 30, minfrac = 0.5, minsamp = 1, max = 50) { peakSet<-dataSet$peakSet; samples <- peakSet$sampnames; classlabel <- peakSet$sampclass; classnames <- levels(classlabel) classlabel <- as.vector(unclass(classlabel)) classnum <- integer(max(classlabel)) for (i in seq(along = classnum)){ classnum[i] <- sum(classlabel == i) } peakmat <- peakSet$peaks porder <- order(peakmat[,"mz"]) peakmat <- peakmat[porder,,drop=F] rownames(peakmat) <- NULL retrange <- range(peakmat[,"rt"]) minpeakmat <- min(classnum)/2 mass <- seq(peakmat[1,"mz"], peakmat[nrow(peakmat),"mz"] + mzwid, by = mzwid/2) masspos <- findEqualGreaterM(peakmat[,"mz"], mass) groupmat <- matrix(nrow = 512, ncol = 7 + length(classnum)) groupindex <- vector("list", 512) endidx <- 0 num <- 0 gcount <- integer(length(classnum)) for (i in seq(length = length(mass)-2)) { startidx <- masspos[i] endidx <- masspos[i+2]-1 if (endidx - startidx + 1 < minpeakmat) next speakmat <- peakmat[startidx:endidx,,drop=FALSE] den <- density(speakmat[,"rt"], bw, from = retrange[1]-3*bw, to = retrange[2]+3*bw) maxden <- max(den$y) deny <- den$y gmat <- matrix(nrow = 5, ncol = 2+length(classnum)) snum <- 0 while (deny[maxy <- which.max(deny)] > maxden/20 && snum < max) { grange <- descendMin(deny, maxy) deny[grange[1]:grange[2]] <- 0 gidx <- which(speakmat[,"rt"] >= den$x[grange[1]] & speakmat[,"rt"] <= den$x[grange[2]]) gnum <- classlabel[unique(speakmat[gidx,"sample"])] for (j in seq(along = gcount)) gcount[j] <- sum(gnum == j) if (! any(gcount >= classnum*minfrac & gcount >= minsamp)) next snum <- snum + 1 num <- num + 1 ### Double the size of the output containers if they're full if (num > nrow(groupmat)) { groupmat <- rbind(groupmat, matrix(nrow = nrow(groupmat), ncol = ncol(groupmat))) groupindex <- c(groupindex, vector("list", length(groupindex))) } groupmat[num, 1] <- median(speakmat[gidx, "mz"]) groupmat[num, 2:3] <- range(speakmat[gidx, "mz"]) groupmat[num, 4] <- median(speakmat[gidx, "rt"]) groupmat[num, 5:6] <- range(speakmat[gidx, "rt"]) groupmat[num, 7] <- length(gidx) groupmat[num, 7+seq(along = gcount)] <- gcount groupindex[[num]] <- sort(porder[(startidx:endidx)[gidx]]) } } colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax", "npeaks", classnames) groupmat <- groupmat[seq(length = num),] groupindex <- groupindex[seq(length = num)] # Remove groups that overlap with more "well-behaved" groups numsamp <- rowSums(groupmat[,(match("npeaks", colnames(groupmat))+1):ncol(groupmat),drop=FALSE]) uorder <- order(-numsamp, groupmat[,"npeaks"]) uindex <- rectUnique(groupmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE], uorder) peakSet$groups <- groupmat[uindex,]; peakSet$groupidx<- groupindex[uindex]; dataSet$peakSet<<-peakSet; } # object is nmr.xcmsSet object SetPeakList.GroupValues<-function() { peakSet <- dataSet$peakSet; msg<-dataSet$peakMsg; peakmat <- peakSet$peaks; groupmat <- peakSet$groups; groupindex <- peakSet$groupidx; sampnum <- seq(length = length(peakSet$sampnames)) intcol <- match("int", colnames(peakmat)) sampcol <- match("sample", colnames(peakmat)) # row is peak, col is sample values <- matrix(nrow = length(groupindex), ncol = length(sampnum)) for (i in seq(along = groupindex)) { # for each group, replace multiple peaks from the same sample by their sum for(m in sampnum){ samp.inx<-which(peakmat[groupindex[[i]], sampcol]==m) if(length(samp.inx)>0){ values[i, m] <- sum(peakmat[groupindex[[i]][samp.inx], intcol]); }else{ values[i, m] <- NA; } } } msg<-c(msg, paste("A total of", length(groupindex), "peak groups were formed. ")); msg<-c(msg, paste("Peaks of the same group were summed if they are from one sample. ")); msg<-c(msg, paste("Peaks appear in less than half of samples in each group were ignored.")); colnames(values) <- peakSet$sampnames; if(peakSet$ncol==2){ rownames(values) <- paste(round(groupmat[,paste("mz", "med", sep="")],5)); }else{ rownames(values) <- paste(round(groupmat[,paste("mz", "med", sep="")],5), "/", round(groupmat[,paste("rt", "med", sep="")],2), sep=""); dataSet$three.col <<- T; } dataSet$orig<<-t(values); dataSet$proc.msg<<-msg; dataSet$orig.cls<<-as.factor(peakSet$sampclass); } # retention time correction for LC/GC-MS spectra MSspec.rtCorrection<-function(bw=30){ xset2<-retcor(dataSet$xset.orig) # re-group peaks after retention time correction xset2<-group(xset2, bw=bw) dataSet$xset.rt<<-xset2; } # plot rentention time corrected spectra PlotMS.RT<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 9; imgSet$msrt<<-imgName; }else{ w <- width; } h <- w*7/9; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); plotrt(dataSet$xset.rt); #dev.off(); } # fill in missing peaks MSspec.fillPeaks<-function(){ xset3<-fillPeaks(dataSet$xset.rt); dataSet$xset.fill<<-xset3; msg<-paste("A total of", dim(xset3@peaks)[1],"peaks were detected from these samples"); msg<-c(msg, paste("with an average of", round(dim(xset3@peaks)[1]/dim(xset3@phenoData)[1], 2), "peaks per spectrum.")); dataSet$xset.msg<<-msg; } # into: integrated area of original (raw) peak # intf: integrated area of filtered peak # maxo: maximum intensity of original (raw) peak # maxf: maximum intensity of filtered peak SetupMSdataMatrix<-function(intvalue = c("into","maxo","intb")){ values <- groupval(dataSet$xset.fill, "medret", value = intvalue); msg<-dataSet$xset.msg; # transpose to make row for samples orig<-as.data.frame(t(values)); msg<-dataSet$xset.msg; msg=c(msg, paste("These peaks were aligned", dim(orig)[2], "groups according to their mass and retention time.")); msg=c(msg, paste("Please note, some peaks were excluded if they appear in only a few samples.")); dataSet$xset.msg<<-msg; dataSet$orig<<-orig; dataSet$orig.cls<<-as.factor(sampclass(dataSet$xset.fill)) } IsSpectraProcessingOK<-function(){ msg<-dataSet$xset.msg; if(is.null(dataSet$xset.orig)){ dataSet$xset.msg<<-c(msg, "Failed to read in and process the spectra."); return(0); } if(is.null(dataSet$xset.rt)){ dataSet$xset.msg<<-c(msg, "Faiedl in retention time correction, spectra problem?"); return(0); } if(is.null(dataSet$xset.fill)){ dataSet$xset.msg<<-c(msg, "Failed in filling missing peaks, spectra problem?"); return(0); } return(1); } #################################################################### ### ========= methods for non-specific filtering of variables====### #################################################################### # the final variable should be less than 5000 for effective computing FilterVariable <- function(filter){ int.mat <- as.matrix(dataSet$proc); feat.num <- ncol(int.mat); nm <- NULL; if(filter == "none" && feat.num <= 2000){ # only allow for less than 2000 remain <- rep(TRUE, feat.num); #dataSet$proc <<- as.data.frame(int.mat); msg <- "No data filtering was applied"; }else{ if (filter == "rsd" ){ sds <- apply(int.mat, 2, sd, na.rm=T); mns <- apply(int.mat, 2, mean, na.rm=T); filter.val <- abs(sds/mns); nm <- "Relative standard deviation"; }else if (filter == "nrsd" ){ mads <- apply(int.mat, 2, mad, na.rm=T); meds <- apply(int.mat, 2, median, na.rm=T); filter.val <- abs(mads/meds); nm <- "Non-paramatric relative standard deviation"; }else if (filter == "mean"){ filter.val <- apply(int.mat, 2, mean, na.rm=T); nm <- "mean"; }else if (filter == "sd"){ filter.val <- apply(int.mat, 2, sd, na.rm=T); nm <- "standard deviation"; }else if (filter == "mad"){ filter.val <- apply(int.mat, 2, mad, na.rm=T); nm <- "Median absolute deviation"; }else if (filter == "median"){ filter.val <- apply(int.mat, 2, median, na.rm=T); nm <- "median"; }else{ # iqr filter.val <- apply(int.mat, 2, IQR, na.rm=T); nm <- "Interquantile Range"; } # get the rank of the rk <- rank(-filter.val, ties.method='random'); var.num <- ncol(int.mat); if(var.num < 250){ # reduce 5% remain <- rk < var.num*0.95; # dataSet$proc <<- as.data.frame(int.mat[,rk < var.num*0.95]); msg <- paste("Reduce 5\\% features (", sum(!(rk < var.num*0.95)), ") based on", nm); }else if(ncol(int.mat) < 500){ # reduce 10% remain <- rk < var.num*0.9; # dataSet$proc <<- as.data.frame(int.mat[,rk < var.num*0.9]); msg <- paste("Reduce 10\\% features (", sum(!(rk < var.num*0.9)), ") based on", nm); }else if(ncol(int.mat) < 1000){ # reduce 25% remain <- rk < var.num*0.75; # dataSet$proc <<- as.data.frame(int.mat[,rk < var.num*0.75]); msg <- paste("Reduce 25\\% features (", sum(!(rk < var.num*0.75)), ") based on", nm); }else{ # reduce 40%, if still over 5000, then only use top 5000 remain <- rk < var.num*0.6; msg <- paste("Reduce 40\\% features (", sum(!remain), ") based on", nm); if(sum(remain) > 5000){ remain <-rk < 5000; msg <- paste("Reduced to 5000 features based on", nm); } # dataSet$proc <<- as.data.frame(int.mat[,remain]); } } dataSet$remain <<- remain; dataSet$filter.msg <<- msg; print(msg); } # create a summary table for each type of uploaded data # csv table - 5 col: sampleID, feature #, zero, missing #, CreateSummaryTable<-function(){ suppressMessages(require(xtable)); sum.dat<-NULL; plenth<-dim(dataSet$proc)[2]; if(dataSet$type=='conc'| dataSet$type=='pktable'| dataSet$type=='specbin'){ for(i in 1:nrow(dataSet$orig)){ srow<-dataSet$orig[i,]; newrow<-c(sum(srow[!is.na(srow)]>0), (sum(is.na(srow)) + sum(srow[!is.na(srow)]<=0)), plenth); sum.dat<-rbind(sum.dat, newrow); } colnames(sum.dat)<-c("Features (positive)","Missing/Zero","Features (processed)"); rownames(sum.dat)<-row.names(dataSet$orig); }else if(dataSet$type=="nmrpeak"| dataSet$type=="mspeak"){ # peak list pkSet<-dataSet$peakSet; snames<-pkSet$sampnames; for(i in 1:length(snames)){ samp.inx<-pkSet$peaks[,"sample"]==i; srow<-dataSet$orig[i,]; newrow<-c(sum(samp.inx),(sum(is.na(srow)) + sum(srow[!is.na(srow)]<=0)), plenth); sum.dat<-rbind(sum.dat, newrow); } colnames(sum.dat)<-c("Peaks (raw)","Missing/Zero", "Peaks (processed)"); rownames(sum.dat)<-row.names(dataSet$orig); }else{ # spectra rawxset<-dataSet$xset.orig; fillxset<-dataSet$xset.fill; snames<-row.names(rawxset@phenoData) for(i in 1:length(snames)){ rawno<-sum(rawxset@peaks[,"sample"]==i); fillno<-sum(fillxset@peaks[,"sample"]==i); newrow<-c(rawno,fillno,plenth); sum.dat<-rbind(sum.dat, newrow); } colnames(sum.dat)<-c("Peaks (raw)","Peaks (fill)", "Peaks(processed)"); rownames(sum.dat)<-row.names(dataSet$orig); } print(xtable(sum.dat, caption="Summary of data processing results"), caption.placement="top", size="\\scriptsize"); } # mat are log normalized, diff will be ratio CalculatePairwiseDiff <- function(mat){ f <- function(i, mat) { z <- mat[, i-1] - mat[, i:ncol(mat), drop = FALSE] colnames(z) <- paste(colnames(mat)[i-1], colnames(z), sep = "/") z } res <- do.call("cbind", sapply(2:ncol(mat), f, mat)); round(res,5); } ############################################################################################## ############################################################################################## ############################################################################################## ############################################################################################## ################################################## ## R script for MetaboAnalyst ## Description: perform various normalization ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### ############################################################### # remove the sample or feature from data # Note: this should happen after processing and before normalization # dataSet$proc dataSet$proc.cls (make a copy of this pair for restore) ######################################################## UpdateGroupItems<-function(){ if(!exists("grp.nm.vec")){ current.msg <<- "Cannot find the current group names!"; return (0); } hit.inx <- dataSet$proc.cls %in% grp.nm.vec; dataSet$prenorm <<- dataSet$proc[hit.inx,]; dataSet$prenorm.cls <<- factor(dataSet$proc.cls[hit.inx], levels=grp.nm.vec); if(substring(dataSet$format,4,5)=="ts"){ dataSet$prenorm.facA <<- factor(dataSet$proc.facA[hit.inx],levels=grp.nm.vec); dataSet$prenorm.facB <<- factor(dataSet$proc.facB[hit.inx],levels=grp.nm.vec); } current.msg <<- "Successfully updated the group items!"; return (1); } UpdateSampleItems<-function(){ if(!exists("smpl.nm.vec")){ current.msg <<- "Cannot find the current sample names!"; return (0); } hit.inx <- rownames(dataSet$proc) %in% smpl.nm.vec; dataSet$prenorm <<- dataSet$proc[hit.inx,]; dataSet$prenorm.cls <<- dataSet$proc.cls[hit.inx]; if(substring(dataSet$format,4,5)=="ts"){ dataSet$prenorm.facA <<- dataSet$proc.facA[hit.inx]; dataSet$prenorm.facB <<- dataSet$proc.facB[hit.inx]; } current.msg <<- "Successfully updated the sample items!"; return (1); } UpdateFeatureItems<-function(){ if(!exists("feature.nm.vec")){ current.msg <<- "Cannot find the selected feature names!"; return (0); } hit.inx <- colnames(dataSet$proc) %in% feature.nm.vec; dataSet$prenorm <<- dataSet$proc[,hit.inx]; dataSet$prenorm.cls <<- dataSet$proc.cls; # this is the same current.msg <<- "Successfully updated the sample items!"; return (1); } Normalization<-function(rowNorm, transNorm, scaleNorm, ref=NULL, ratio=FALSE, ratioNum=20){ # now do actual filter if indicated if(!is.null(dataSet$remain)){ remain <- dataSet$remain; if(rowNorm == "CompNorm"){ # make sure the ref is there, not filtered out hit.inx <- match(ref, colnames(dataSet$proc)); remain[hit.inx] <- TRUE; } proc <- dataSet$proc[,remain]; }else{ proc <- dataSet$proc; } if(is.null(dataSet$prenorm)){ data<- proc; cls <- dataSet$proc.cls; if(substring(dataSet$format,4,5)=="ts"){ dataSet$facA <- dataSet$proc.facA; dataSet$facB <- dataSet$proc.facB; cls <- dataSet$facA; } }else{ data<- dataSet$prenorm; cls <- dataSet$prenorm.cls; if(substring(dataSet$format,4,5)=="ts"){ dataSet$facA <- dataSet$prenorm.facA; dataSet$facB <- dataSet$prenorm.facB; cls <- dataSet$facA; } } # note, samples may not be sorted by group labels if(substring(dataSet$format,4,5)=="ts"){ nfacA <- dataSet$facA; nfacB <- dataSet$facB; if(dataSet$design.type =="time" | dataSet$design.type =="time0"){ # determine time factor and should order first by subject then by each time points if(tolower(dataSet$facA.lbl) == "time"){ time.fac <- nfacA; exp.fac <- nfacB; }else{ time.fac <- nfacB; exp.fac <- nfacA; } # update with new index ord.inx <- order(exp.fac); dataSet$time.fac <<- time.fac[ord.inx]; dataSet$exp.fac <<- exp.fac[ord.inx]; }else{ ord.inx <- order(cls); } data<-data[ord.inx, ]; cls <-cls[ord.inx]; dataSet$facA <<- dataSet$facA[ord.inx]; dataSet$facB <<- dataSet$facB[ord.inx]; }else{ ord.inx <- order(cls); data<-data[ord.inx, ]; cls <-cls[ord.inx]; } colNames <- colnames(data); rowNames <- rownames(data); # row-wise normalization if(rowNorm=="SpecNorm"){ if(!exists("norm.vec")){ norm.vec <- rep(1,nrow(data)); # default all same weight vec to prevent error print("No sample specific information were given, all set to 1.0"); } rownm<-"Normalization by sample-specific factor"; data<-data/norm.vec; }else if(rowNorm=="ProbNormT"){ grp.inx <- cls == ref; ref.smpl <- apply(proc[grp.inx, ], 2, mean); data<-t(apply(data, 1, ProbNorm, ref.smpl)); rownm<-"Probabilistic Quotient Normalization"; }else if(rowNorm=="ProbNormF"){ ref.smpl <- proc[ref,]; data<-t(apply(data, 1, ProbNorm, ref.smpl)); rownm<-"Probabilistic Quotient Normalization"; }else if(rowNorm=="CompNorm"){ data<-t(apply(data, 1, CompNorm, ref)); rownm<-"Normalization by a reference feature"; }else if(rowNorm=="SumNorm"){ data<-t(apply(data, 1, SumNorm)); rownm<-"Normalization to constant sum"; }else if(rowNorm=="MedianNorm"){ data<-t(apply(data, 1, MedianNorm)); rownm<-"Normalization to sample median"; }else{ # nothing to do rownm<-"N/A"; } # use apply will lose dimesion info (i.e. row names and colnames) rownames(data)<-rowNames; colnames(data)<-colNames; # note: row-normed data is based on biological knowledge, since the previous # replacing zero/missing values by half of the min positive (a constant) # now may become different due to different norm factor, which is artificial # variance and should be corrected again # # stopped, this step cause troubles # minConc<-round(min(data)/2, 5); # data[dataSet$fill.inx]<-minConc; # if the reference by feature, the feature column should be removed, since it is all 1 if(rowNorm=="CompNorm" && !is.null(ref)){ inx<-match(ref, colnames(data)); data<-data[,-inx]; colNames <- colNames[-inx]; } # record row-normed data for fold change analysis (b/c not applicable for mean-centered data) dataSet$row.norm<<-as.data.frame(CleanData(data)); # this is for biomarker analysis only (for compound concentraion data) if(ratio){ min.val <- min(abs(data[data!=0]))/2; norm.data <- log2((data + sqrt(data^2 + min.val))/2); transnm<-"Log Normalization"; ratio.mat <- CalculatePairwiseDiff(norm.data); fstats <- Get.Fstat(ratio.mat, dataSet$proc.cls); hit.inx <- rank(-fstats) < ratioNum; # get top n ratio.mat <- ratio.mat[, hit.inx]; data <- cbind(norm.data, ratio.mat); colNames <- colnames(data); rowNames <- rownames(data); } if(!ratio){ # transformation if(transNorm=='LogNorm'){ min.val <- min(abs(data[data!=0]))/10; data<-apply(data, 2, LogNorm, min.val); transnm<-"Log Normalization"; }else if(transNorm=='CrNorm'){ norm.data <- abs(data)^(1/3); norm.data[data<0] <- - norm.data[data<0]; data <- norm.data; transnm<-"Cubic Root Transformation"; }else{ transnm<-"N/A"; } } # scaling if(scaleNorm=='MeanCenter'){ data<-apply(data, 2, MeanCenter); scalenm<-"MeanCenter"; }else if (scaleNorm=='AutoNorm'){ data<-apply(data, 2, AutoNorm); scalenm<-"Autoscaling"; }else if(scaleNorm=='ParetoNorm'){ data<-apply(data, 2, ParetoNorm); scalenm<-"Pareto Scaling"; }else if(scaleNorm=='RangeNorm'){ data<-apply(data, 2, RangeNorm); scalenm<-"Range Scaling"; }else if(scaleNorm=='VastNorm'){ data<-apply(data, 2, VastNorm); scalenm<-'Vast Scaling'; }else{ scalenm<-"N/A"; } # need to do some sanity check, for log there may be Inf values introduced data <- CleanData(data, T, F); # note after using "apply" function, all the attribute lost, need to add back rownames(data)<-rowNames; colnames(data)<-colNames; dataSet$norm <<- as.data.frame(data); dataSet$cls <<- cls; dataSet$rownorm.method<<-rownm; dataSet$trans.method<<-transnm; dataSet$scale.method<<-scalenm; dataSet$combined.method<<-FALSE; dataSet$norm.all <<- NULL; # this is only for biomarker ROC analysis return(1); } ######################################## ###row-wise norm methods, x is a row ### ######################################## # normalize by a sum of each sample, assume constant sum (1000) # return: normalized data SumNorm<-function(x){ 1000*x/sum(x, na.rm=T); } # normalize by median MedianNorm<-function(x){ x/median(x, na.rm=T); } # normalize by a reference sample (probability quotient normalization) # ref should be the name of the reference sample ProbNorm<-function(x, ref.smpl){ x/median(as.numeric(x/ref.smpl), na.rm=T) } # normalize by a reference reference (i.e. creatinine) # ref should be the name of the cmpd CompNorm<-function(x, ref){ 1000*x/x[ref]; } ############################################## ###column-wise norm methods, x is a column ### ############################################## # generalize log, tolerant to 0 and negative values LogNorm<-function(x,min.val){ log2((x + sqrt(x^2 + min.val^2))/2) } # normalize to zero mean and unit variance AutoNorm<-function(x){ (x - mean(x))/sd(x, na.rm=T); } # normalize to zero mean but varaince/SE ParetoNorm<-function(x){ (x - mean(x))/sqrt(sd(x, na.rm=T)); } # normalize to zero mean but varaince/SE MeanCenter<-function(x){ x - mean(x); } # normalize to zero mean but varaince/SE RangeNorm<-function(x){ if(max(x) == min(x)){ x; }else{ (x - mean(x))/(max(x)-min(x)); } } #VastNorm VastNorm<-function(x){ ((x-mean(x))/sd(x,na.rm=T))*((mean(x))/sd(x,na.rm=T)) } ####################################### ####### Combined approach ############# ####################################### QuantileNormalize <- function(){ data<-dataSet$proc; cls <- dataSet$proc.cls; cls.lvl <- levels(cls); # first log normalize data <- glog(data); require('preprocessCore'); # normalize within replicates #for (lv in cls.lvl){ # sub.inx <- dataSet$proc.cls == lv; # data[sub.inx, ] <- t(normalize.quantiles(t(data[sub.inx, ]), copy=FALSE)); #} data <- t(normalize.quantiles(t(data), copy=FALSE)); dataSet$norm <<- as.data.frame(data); dataSet$cls <<- cls; dataSet$rownorm.method<<-NULL; dataSet$colnorm.method<<-NULL; dataSet$combined.method<<-TRUE; } ############################################## ################## Summary plot ############## ############################################## # plot two summary plot, one b4 normalization, one after # for each plot top is box plot, bottom is a density plot PlotNormSummary<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10.5; h <- 12; }else if(width == 0){ w <- 7.2;h <- 9; imgSet$norm<<-imgName; }else{ w <- 7.2; h <- 9; } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); layout(matrix(c(1,2,2,2,3,4,4,4), 4, 2, byrow = FALSE)) # since there may be too many compounds, only plot a subsets (50) in box plot # but density plot will use all the data pre.inx<-GetRandomSubsetIndex(ncol(dataSet$proc), sub.num=50); namesVec <- colnames(dataSet$proc[,pre.inx]); # only get common ones nm.inx <- namesVec %in% colnames(dataSet$norm) namesVec <- namesVec[nm.inx]; pre.inx <- pre.inx[nm.inx]; norm.inx<-match(namesVec, colnames(dataSet$norm)); namesVec <- substr(namesVec, 1, 12); # use abbreviated name rangex.pre <- range(dataSet$proc[, pre.inx], na.rm=T); rangex.norm <- range(dataSet$norm[, norm.inx], na.rm=T); x.label<-GetValueLabel(); y.label<-GetVariableLabel(); # fig 1 op<-par(mar=c(4,7,4,0), xaxt="s"); plot(density(apply(dataSet$proc, 2, mean, na.rm=TRUE)), col='darkblue', las =2, lwd=2, main="", xlab="", ylab=""); mtext("Density", 2, 5); mtext("Before Normalization",3, 1) # fig 2 op<-par(mar=c(7,7,0,0), xaxt="s"); boxplot(dataSet$proc[,pre.inx], names= namesVec, ylim=rangex.pre, las = 2, col="lightgreen", horizontal=T); mtext(x.label, 1, 5); # fig 3 op<-par(mar=c(4,7,4,2), xaxt="s"); plot(density(apply(dataSet$norm, 2, mean, na.rm=TRUE)), col='darkblue', las=2, lwd =2, main="", xlab="", ylab=""); mtext("After Normalization",3, 1); # fig 4 op<-par(mar=c(7,7,0,2), xaxt="s"); boxplot(dataSet$norm[,norm.inx], names=namesVec, ylim=rangex.norm, las = 2, col="lightgreen", horizontal=T); mtext(paste("Normalized",x.label),1, 5); #dev.off(); } ################################################################################################### ################################################################################################### ################################################################################################### ################################################################################################### ######################################################### ## R script for MetaboAnalyst ## Description: perform fold change, t-tests, volcano plot ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### ##################################### ########### Fold Change ############# ##################################### # fold change analysis, method can be mean or median # note: since the interface allow user to change all parameters # the fold change has to be re-calculated each time FC.Anal.unpaired<-function(fc.thresh=2, cmp.type = 0){ # make sure threshold is above 1 fc.thresh = ifelse(fc.thresh>1, fc.thresh, 1/fc.thresh); max.thresh = fc.thresh; min.thresh = 1/fc.thresh; res <-GetFC(F, cmp.type); fc.all <- res$fc.all; fc.log <- res$fc.log; imp.inx <- fc.all > max.thresh | fc.all < min.thresh; sig.mat <- cbind(fc.all[imp.inx, drop=F], fc.log[imp.inx, drop=F]); colnames(sig.mat)<-c("Fold Change", "log2(FC)"); # order by absolute log value (since symmetrical in pos and neg) inx.ord <- order(abs(sig.mat[,2]), decreasing=T); sig.mat <- sig.mat[inx.ord,,drop=F]; fileName <- "fold_change.csv"; write.csv(sig.mat,file=fileName); # create a list object to store fc fc<-list ( paired = FALSE, raw.thresh = fc.thresh, max.thresh = max.thresh, min.thresh = min.thresh, fc.all = fc.all, # note a vector fc.log = fc.log, inx.imp = imp.inx, sig.mat = sig.mat ); analSet$fc<<-fc; } FC.Anal.paired<-function(fc.thresh=2, percent.thresh=0.75, cmp.type=0){ # make sure threshold is above 1 fc.thresh = ifelse(fc.thresh>1, fc.thresh, 1/fc.thresh); max.thresh = fc.thresh; min.thresh = 1/fc.thresh; fc.mat <-GetFC(T, cmp.type); count.thresh<-round(nrow(dataSet$norm)/2*percent.thresh); mat.up <- fc.mat >= log(max.thresh,2); mat.down <- fc.mat <= log(min.thresh,2); count.up<-apply(mat.up, 2, sum); count.down<-apply(mat.down, 2, sum); fc.all<-rbind(count.up, count.down); inx.up <- count.up>=count.thresh; inx.down <- count.down>=count.thresh; colnames(fc.all)<-colnames(dataSet$norm); rownames(fc.all)<-c("Count (up)", "Count (down)"); sig.var <- t(fc.all[,(inx.up|inx.down), drop=F]); # sort sig.var using absolute difference between count(up)-count(down) sig.dff<-abs(sig.var[,1]-sig.var[,2]) inx<-order(sig.dff, decreasing=T); sig.var<-sig.var[inx,,drop=F]; fileName <- "fold_change.csv"; write.csv(signif(sig.var,5),file=fileName); # create a list object to store fc fc<-list ( paired = TRUE, fc.mat = fc.mat, raw.thresh = fc.thresh, max.thresh = count.thresh, min.thresh = -count.thresh, fc.all = fc.all, # note: a 2-row matrix! inx.up = inx.up, inx.down = inx.down, sig.mat = sig.var ); analSet$fc<<-fc; } PlotFC<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$fc<<-imgName; }else{ w <- width; } h <- w*6/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,2,3)); fc = analSet$fc; if(fc$paired){ ylim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2); xlim<-c(0, ncol(dataSet$norm)); plot(NULL, xlim=xlim, ylim=ylim, xlab = GetVariableLabel(), ylab=paste("Count with FC >=", fc$max.thresh, "or <=", fc$min.thresh)); for(i in 1:ncol(fc$fc.all)){ segments(i,0, i, fc$fc.all[1,i], col= ifelse(fc$inx.up[i],"magenta", "darkgrey"), lwd= ifelse(fc$inx.up[i], 2, 1)); segments(i,0, i, -fc$fc.all[2,i], col= ifelse(fc$inx.down[i], "magenta", "darkgrey"), lwd= ifelse(fc$inx.down[i], 2, 1)); } abline(h=fc$max.thresh, lty=3); abline(h=fc$min.thresh, lty=3); abline(h=0, lwd=1); }else{ if(fc$raw.thresh > 0){ # be symmetrical topVal <- max(abs(fc$fc.log)); ylim <- c(-topVal, topVal); plot(fc$fc.log, ylab="Log2 (FC)", ylim = ylim, xlab = GetVariableLabel(), pch=19, axes=F, col= ifelse(fc$inx.imp, "magenta", "darkgrey")); axis(2); axis(4); # added by Beomsoo abline(h=log(fc$max.thresh,2), lty=3); abline(h=log(fc$min.thresh,2), lty=3); abline(h=0, lwd=1); }else{ # plot side by side dat1 <- dataSet$norm[as.numeric(dataSet$cls) == 1, ]; dat2 <- dataSet$norm[as.numeric(dataSet$cls) == 2, ]; mns1 <- apply(dat1, 2, mean); mn1 <- mean(mns1); sd1 <- sd(mns1); msd1.top <- mn1 + 2*sd1; msd1.low <- mn1 - 2*sd1; mns2 <- apply(dat2, 2, mean); mn2 <- mean(mns2); sd2 <- sd(mns2); msd2.top <- mn2 + 2*sd2; msd2.low <- mn2 - 2*sd2; ylims <- range(c(mns1, mns2, msd1.top, msd2.top, msd1.low, msd2.low)); new.mns <- c(mns1, rep(NA, 5), mns2); cols <- c(rep("magenta", length(mns1)), rep(NA, 5), rep("blue", length(mns2))); pchs <- c(rep(15, length(mns1)), rep(NA, 5), rep(19, length(mns2))); plot(new.mns, ylim=ylims, pch = pchs, col = cols, cex = 1.25, axes=F, ylab=""); axis(2); axis(4); # added by Beomsoo abline(h=mn1, col="magenta", lty=3, lwd=2); abline(h=msd1.low, col="magenta", lty=3, lwd=1); abline(h=msd1.top, col="magenta", lty=3, lwd=1); abline(h=mn2, col="blue", lty=3, lwd=2); abline(h=msd2.low, col="blue", lty=3, lwd=1); abline(h=msd2.top, col="blue", lty=3, lwd=1); # abline(h=mean(all.mns), col="darkgrey", lty=3); axis(1, at=1:length(new.mns), labels=c(1:length(mns1),rep(NA, 5),1:length(mns2))); } } #dev.off(); } GetSigTable.FC<-function(){ GetSigTable(analSet$fc$sig.mat, "fold change analysis"); } GetFCSigMat<-function(){ return(CleanNumber(analSet$fc$sig.mat)); } GetFCSigRowNames<-function(){ rownames(analSet$fc$sig.mat); } GetFCSigColNames<-function(){ colnames(analSet$fc$sig.mat); } # utility method to calculate FC GetFC <- function(paired=FALSE, cmpType){ if(paired){ if(dataSet$combined.method){ data <- dataSet$norm; }else{ data <- log(dataSet$row.norm,2); } G1 <- data[which(dataSet$cls==levels(dataSet$cls)[1]), ] G2 <- data[which(dataSet$cls==levels(dataSet$cls)[2]), ] if(cmpType == 0){ fc.mat <- G1-G2; }else{ fc.mat <- G2-G1; } return (fc.mat); }else{ if(dataSet$combined.method){ data <- dataSet$norm; m1 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[1]), ]); m2 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[2]), ]); # create a named matrix of sig vars for display if(cmpType == 0){ fc.log <- signif (m1-m2, 5); }else{ fc.log <- signif (m2-m1, 5); } fc.all <- signif(2^fc.log, 5); }else{ data <- dataSet$row.norm; m1 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[1]), ]); m2 <- colMeans(data[which(dataSet$cls==levels(dataSet$cls)[2]), ]); # create a named matrix of sig vars for display if(cmpType == 0){ ratio <- m1/m2; }else{ ratio <- m2/m1; } fc.all <- signif(ratio, 5); fc.log <- signif(log2(ratio), 5); } names(fc.all)<-names(fc.log)<-colnames(dataSet$norm); return(list(fc.all = fc.all, fc.log = fc.log)); } } ##################################### ########### t-Tests ################ #################################### Ttests.Anal<-function(nonpar=F, threshp=0.05, paired=FALSE, equal.var=TRUE){ res <- GetTtestRes(paired, equal.var, nonpar); t.stat <- res[,1]; p.value <- res[,2]; names(t.stat) <- names(p.value)<-colnames(dataSet$norm); p.log <- -log10(p.value); fdr.p <- p.adjust(p.value, "fdr"); inx.imp <- p.value <= threshp; sig.t <- t.stat[inx.imp]; sig.p <- p.value[inx.imp]; lod<- -log10(sig.p); sig.q <-fdr.p[inx.imp]; sig.mat <- cbind(sig.t, sig.p, lod, sig.q); colnames(sig.mat)<-c("t.stat", "p.value", "-log10(p)", "FDR"); ord.inx <- order(sig.p); sig.mat <- sig.mat[ord.inx,]; sig.mat <- signif(sig.mat, 5); if(nonpar){ tt.nm = "Wilcoxon Rank Test"; }else{ tt.nm = "T-Tests"; } write.csv(sig.mat,file="t_test.csv"); tt<-list ( tt.nm = tt.nm, paired = paired, raw.thresh = threshp, p.value = sort(p.value), p.log = p.log, thresh = -log10(threshp), # only used for plot threshold line inx.imp = inx.imp, sig.mat = sig.mat ); analSet$tt<<-tt; } GetSigTable.TT<-function(){ GetSigTable(analSet$tt$sig.mat, "t-tests"); } # return a double matrix with 2 columns - p values and lod GetTTSigMat<-function(){ return(CleanNumber(analSet$tt$sig.mat)); } GetTTSigRowNames<-function(){ rownames(analSet$tt$sig.mat); } GetTTSigColNames<-function(){ colnames(analSet$tt$sig.mat); } GetTtUpMat<-function(){ lod <- analSet$tt$p.log; red.inx<- which(analSet$tt$inx.imp); as.matrix(cbind(red.inx, lod[red.inx])); } GetTtDnMat<-function(){ lod <- analSet$tt$p.log; blue.inx <- which(!analSet$tt$inx.imp); as.matrix(cbind(blue.inx, lod[blue.inx])); } GetTtLnMat<-function(){ lod <- analSet$tt$p.log; as.matrix(rbind(c(0, analSet$tt$thresh), c(length(lod)+1,analSet$tt$thresh))); } GetTtCmpds<-function(){ names(analSet$tt$p.log); } GetMaxTtInx <- function(){ which.max(analSet$tt$p.log); } # utility method to get p values GetTtestRes<- function(paired=FALSE, equal.var=TRUE, nonpar=F){ if(nonpar){ inx1 <- which(dataSet$cls==levels(dataSet$cls)[1]); inx2 <- which(dataSet$cls==levels(dataSet$cls)[2]); res <- apply(as.matrix(dataSet$norm), 2, function(x) { tmp <- try(wilcox.test(x[inx1], x[inx2], paired = paired)); if(class(tmp) == "try-error") { return(c(NA, NA)); }else{ return(c(tmp$statistic, tmp$p.value)); } }) }else{ if(ncol(dataSet$norm) < 1000){ inx1 <- which(dataSet$cls==levels(dataSet$cls)[1]); inx2 <- which(dataSet$cls==levels(dataSet$cls)[2]); res <- apply(as.matrix(dataSet$norm), 2, function(x) { tmp <- try(t.test(x[inx1], x[inx2], paired = paired, var.equal = equal.var)); if(class(tmp) == "try-error") { return(c(NA, NA)); }else{ return(c(tmp$statistic, tmp$p.value)); } }) }else{ # use fast version require(genefilter); res <- try(rowttests(t(as.matrix(dataSet$norm)), dataSet$cls)); if(class(res) == "try-error") { res <- c(NA, NA); }else{ res <- t(cbind(res$statistic, res$p.value)); } } } return(t(res)); } # utility method to perform the univariate analysis automatically # Jeff note: # The approach is computationally expensive,and fails more often # get around: make it lazy unless users request, otherwise the default t-test will also be affected GetUnivReport <- function(){ paired <- analSet$tt$paired; threshp <- analSet$tt$raw.thresh; inx1 <- which(dataSet$cls==levels(dataSet$cls)[1]); inx2 <- which(dataSet$cls==levels(dataSet$cls)[2]); # output list (mean(sd), mean(sd), p-value, FoldChange, Up/Down) univStat.mat <- apply(as.matrix(dataSet$norm), 2, function(x) { # normality test for each group # ks <- ks.test(x[inx1], x[inx2]); sw.g1 <- shapiro.test(x[inx1]); sw.g2 <- shapiro.test(x[inx2]); method <- ifelse( ((sw.g1$p.value <= 0.05) | (sw.g2$p.value <= 0.05)), "(W)","") if (method == "(W)") { # wilcoxon test tmp <- try(wilcox.test(x[inx1], x[inx2], paired = paired)); } else { # t-test equal.var <- TRUE; if(var(cbind(x[inx1], x[inx2]), na.rm=TRUE) != 0) { anal.var <- var.test(x[inx1], x[inx2]); equal.var <- ifelse(anal.var$p.value <= 0.05, FALSE, TRUE); } tmp <- try(t.test(x[inx1], x[inx2], paired = paired, var.equal = equal.var)); } if(class(tmp) == "try-error") { return(NA); }else{ mean1 <- mean(x[inx1]); mean2 <- mean(x[inx2]); sd1 <- sd(x[inx1]); sd2 <- sd(x[inx2]); p.value <- paste(ifelse(tmp$p.value < 0.0001, "< 0.0001", sprintf("%.4f", tmp$p.value,4))," ", method, sep=""); p.value.origin <- tmp$p.value; foldChange <- mean1 / mean2; foldChange <- round(ifelse( foldChange >= 1, foldChange, (-1/foldChange) ), 2); upDown <- ifelse(mean1 > mean2, "Up","Down"); univStat <- c( meanSD1 = sprintf("%.3f (%.3f)", mean1, sd1), meanSD2 = sprintf("%.3f (%.3f)", mean2, sd2), p.value = p.value, foldChange = foldChange, upDown = upDown, p.value.origin = sprintf("%.5f", p.value.origin) ); return(univStat); } }) univStat.mat <- as.data.frame(t(univStat.mat)); # add FDR/q-value q.value <- sprintf("%.4f", p.adjust(p=as.numeric(levels(univStat.mat$p.value.origin))[univStat.mat$p.value.origin], method='fdr')); univStat.mat <- cbind(univStat.mat[, c(1,2,3)], q.value, univStat.mat[, c(4,5)], univStat.mat[,6]); names(univStat.mat)[1] <- paste("Mean (SD) of ", levels(dataSet$cls)[1], sep=''); names(univStat.mat)[2] <- paste("Mean (SD) of ", levels(dataSet$cls)[2], sep=''); names(univStat.mat)[3] <- "p-value"; names(univStat.mat)[4] <- "q-value (FDR)"; names(univStat.mat)[5] <- "Fold Change"; names(univStat.mat)[6] <- paste(levels(dataSet$cls)[1],"/", levels(dataSet$cls)[2], sep=''); names(univStat.mat)[7] <- "p.value.origin"; univStat.mat <- cbind(Name=rownames(univStat.mat), univStat.mat); rownames(univStat.mat) <- NULL ## generate univariate report file (univAnalReport.csv). ## mixed with t-test and wilcoxon test depend on each metabolite's distribution univAnal.mat <- univStat.mat; note.str <- paste("\n Univariate Analysis Result for each variable/metabolite\n\n", "[NOTE]\n", " p-value is calculated with t-test as a default.\n", " p-value with (W) is calculated by the Wilcoxon Mann Whitney test\n\n\n", sep=''); cat(note.str, file="univAnalReport.csv", append=FALSE); write.table(univAnal.mat, file="univAnalReport.csv", append=TRUE, sep=",", row.names=FALSE); ## generate subset with the threshold (p-value) sigones <- which(as.numeric(as.character(univAnal.mat$p.value.origin)) <= threshp); sigDataSet.orig <- cbind(SampleID=rownames(dataSet$orig), Label=dataSet$cls, dataSet$orig[,c(sigones)]) sigDataSet.norm <- cbind(SampleID=rownames(dataSet$orig), Label=dataSet$cls, dataSet$norm[,c(sigones)]) write.table(sigDataSet.orig, file=paste("data_subset_orig_p", threshp, ".csv", sep=''), append=FALSE, sep=",", row.names=FALSE); write.table(sigDataSet.norm, file=paste("data_subset_norm_p", threshp, ".csv", sep=''), append=FALSE, sep=",", row.names=FALSE); } ContainInfiniteTT<-function(){ if(sum(!is.finite(analSet$tt$sig.mat))>0){ return("true"); } return("false"); } ##################################### ########### Volcano ################ #################################### Volcano.Anal<-function(paired=FALSE,fcthresh,cmpType, percent.thresh, nonpar=F, threshp, equal.var=TRUE){ #### t-tests t.res <- GetTtestRes(paired, equal.var, nonpar); p.value <- t.res[,2]; inx.p <- p.value <= threshp; p.log <- -log10(p.value); ### fold change analysis # make sure threshold is above 1 fcthresh = ifelse(fcthresh>1, fcthresh, 1/fcthresh); max.xthresh <- log(fcthresh,2); min.xthresh <- log(1/fcthresh,2); if(paired){ fc.mat <- GetFC(T, cmpType); count.thresh<-round(nrow(dataSet$norm)/2*percent.thresh); mat.up <- fc.mat >= max.xthresh; mat.down <- fc.mat <= min.xthresh; count.up<-apply(mat.up, 2, sum); count.down<-apply(mat.down, 2, sum); fc.all<-rbind(count.up, count.down); inx.up <- count.up>=count.thresh; inx.down <- count.down>=count.thresh; colnames(fc.all)<-colnames(dataSet$norm); rownames(fc.all)<-c("Count (up)", "Count (down)"); fc.log <- NULL; # dummy, not applicable for counts # replace the count.thresh for plot max.xthresh <- count.thresh; min.xthresh <- -count.thresh; }else{ res <- GetFC(F, cmpType); # create a named matrix of sig vars for display fc.log <- res$fc.log; fc.all <- res$fc.all; inx.up = fc.log > max.xthresh; inx.down = fc.log < min.xthresh; } # create named sig table for display inx.imp<-(inx.up | inx.down) & inx.p; if(paired){ sig.var<-cbind(fc.all[1,][inx.imp,drop=F], fc.all[2,][inx.imp, drop=F], p.value[inx.imp, drop=F], p.log[inx.imp, drop=F]); colnames(sig.var)<-c("Counts (up)","Counts (down)", "p.value", "-log10(p)"); # first order by count difference, then by log(p) dif.count<-abs(sig.var[,1]-sig.var[,2]); ord.inx<-order(dif.count, sig.var[,4], decreasing=T); sig.var<-sig.var[ord.inx,,drop=F]; sig.var[,c(3,4)]<-signif(sig.var[,c(3,4)],5); }else{ sig.var<-cbind(fc.all[inx.imp,drop=F], fc.log[inx.imp,drop=F], p.value[inx.imp,drop=F], p.log[inx.imp,drop=F]); colnames(sig.var)<-c("FC", "log2(FC)", "p.value", "-log10(p)"); # first order by log(p), then by log(FC) ord.inx<-order(sig.var[,4], abs(sig.var[,2]), decreasing=T); sig.var<-sig.var[ord.inx,,drop=F]; sig.var<-signif(sig.var,5); } fileName <- "volcano.csv"; write.csv(signif(sig.var,5),file=fileName); volcano<-list ( raw.threshx = fcthresh, raw.threshy = threshp, paired = paired, max.xthresh = max.xthresh, min.xthresh = min.xthresh, thresh.y = -log10(threshp), fc.all = fc.all, fc.log = fc.log, fc.log.uniq = jitter(fc.log), inx.up = inx.up, inx.down = inx.down, p.log = p.log, inx.p = inx.p, sig.mat = sig.var ); analSet$volcano<<-volcano; } # now try to label the interesting points # it is defined by the following rules # need to be signficant (sig.inx) and # or 2. top 5 p # or 2. top 5 left # or 3. top 5 right PlotVolcano<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10; }else if(width == 0){ w <- 8; imgSet$volcano<<-imgName; }else{ w <- width; } h <- w*6/10; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,3,4)); vcn<-analSet$volcano; MyGray <- rgb(t(col2rgb("black")), alpha=40, maxColorValue=255); MyHighlight <- rgb(t(col2rgb("magenta")), alpha=80, maxColorValue=255); if(vcn$paired){ xlim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2)*1.2; # merge fc.all two rows into one, bigger one win fc.all <- apply(vcn$fc.all, 2, function(x){ if(x[1] > x[2]){return(x[1])}else{return(-x[2])}}) hit.inx <- vcn$inx.p & (vcn$inx.up | vcn$inx.down); plot(fc.all, vcn$p.log, xlim=xlim, pch=20, cex=ifelse(hit.inx, 1.2, 0.8), col = ifelse(hit.inx, MyHighlight, MyGray), xlab="Count of Significant Pairs", ylab="-log10(p)"); sig.upInx <- vcn$inx.p & vcn$inx.up; p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.up; fc.rtInx <- GetTopInx(vcn$fc.all[1,], 5, T); lblInx <- p.topInx & sig.upInx & fc.rtInx; if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(vcn$fc.all[1,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8); } sig.dnInx <- vcn$inx.p & vcn$inx.down; p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.down; fc.leftInx <- GetTopInx(vcn$fc.all[2,], 5, T) & vcn$inx.down; lblInx <-p.topInx & sig.dnInx & fc.leftInx; if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(-vcn$fc.all[2,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8); } }else{ imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p; plot(vcn$fc.log, vcn$p.log, pch=20, cex=ifelse(imp.inx, 1.2, 0.7), col = ifelse(imp.inx, MyHighlight, MyGray), xlab="log2 (FC)", ylab="-log10(p)"); sig.inx <- imp.inx; p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.down); fc.leftInx <- GetTopInx(vcn$fc.log, 5, F); lblInx <- sig.inx & (p.topInx | fc.leftInx); if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8); } p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.up); fc.rtInx <- GetTopInx(vcn$fc.log, 5, T); lblInx <- sig.inx & (p.topInx | fc.rtInx); if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8); } } abline (v = vcn$max.xthresh, lty=3); abline (v = vcn$min.xthresh, lty=3); abline (h = vcn$thresh.y, lty=3); axis(4); # added by Beomsoo #dev.off(); } GetVolcanoDnMat<- function(){ vcn<-analSet$volcano; imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p; blue.inx<- which(!imp.inx); # make sure they are not tied xs <- vcn$fc.log.uniq[blue.inx] ys <- vcn$p.log[blue.inx]; as.matrix(cbind(xs, ys)); } GetVolcanoUpMat<- function(){ vcn<-analSet$volcano; imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p; red.inx<- which(imp.inx); # make sure they are not tied xs <- vcn$fc.log.uniq[red.inx] ys <- vcn$p.log[red.inx]; as.matrix(cbind(xs, ys)); } GetVolcanoVlMat<- function(){ vcn<-analSet$volcano; limy <- GetExtendRange(vcn$fc.log); as.matrix(rbind(c(vcn$min.xthresh, limy[1]), c(vcn$min.xthresh,limy[2]))); } GetVolcanoVrMat<- function(){ vcn<-analSet$volcano; limy <- GetExtendRange(vcn$fc.log); as.matrix(rbind(c(vcn$max.xthresh, limy[1]), c(vcn$max.xthresh,limy[2]))); } GetVolcanoHlMat<- function(){ vcn<-analSet$volcano; limx <- GetExtendRange(vcn$fc.log); as.matrix(rbind(c(limx[1], vcn$thresh.y), c(limx[2],vcn$thresh.y))); } GetVolcanoRangeX<- function(){ range(analSet$volcano$fc.log.uniq); } GetVolcanoCmpds<- function(){ names(analSet$volcano$fc.log); } GetVolcanoCmpdInxs<-function(){ analSet$volcano$fc.log.uniq } # get indices of top n largest/smallest number GetTopInx <- function(vec, n, dec=T){ inx <- order(vec, decreasing = dec)[1:n]; # convert to T/F vec vec<-rep(F, length=length(vec)); vec[inx] <- T; return (vec); } GetSigTable.Volcano<-function(){ GetSigTable(analSet$volcano$sig.mat, "volcano plot"); } GetVolcanoSigMat<-function(){ return(CleanNumber(analSet$volcano$sig.mat)); } GetVolcanoSigRowNames<-function(){ rownames(analSet$volcano$sig.mat); } GetVolcanoSigColNames<-function(){ colnames(analSet$volcano$sig.mat); } ContainInfiniteVolcano<-function(){ if(sum(!is.finite(analSet$volcano$sig.mat))>0){ return("true"); } return("false"); } ################################################################# ################ One-way ANOVA ################################## ################################################################# # perform anova and only return p values and MSres (for Fisher's LSD) aof <- function(x, cls = dataSet$cls) { aov(x ~ cls); } # perform Kruskal Wallis Test kwtest <- function(x, cls = dataSet$cls) { kruskal.test(x ~ cls); } FisherLSD<-function(aov.obj, thresh){ LSD.test(aov.obj,"cls", alpha=thresh) } # return only the signicant comparison names parseTukey <- function(tukey, cut.off){ inx <- tukey$cls[,"p adj"] <= cut.off; paste(rownames(tukey$cls)[inx], collapse="; "); } # return only the signicant comparison names parseFisher <- function(fisher, cut.off){ inx <- fisher[,"pvalue"] <= cut.off; paste(rownames(fisher)[inx], collapse="; "); } ANOVA.Anal<-function(nonpar=F, thresh=0.05, post.hoc="fisher"){ if(nonpar){ aov.nm <- "Kruskal Wallis Test"; anova.res<-apply(as.matrix(dataSet$norm), 2, kwtest); #extract all p values res <- unlist(lapply(anova.res, function(x) {c(x$statistic, x$p.value)})); res <- data.frame(matrix(res, nrow=length(anova.res), byrow=T), stringsAsFactors=FALSE); fstat <- res[,1]; p.value <- res[,2]; names(fstat) <- names(p.value)<-colnames(dataSet$norm); fdr.p <- p.adjust(p.value, "fdr"); inx.imp <- p.value <= thresh; if(sum(inx.imp) == 0){ # no sig features! cutpt <- round(0.2*length(p.value)); cutpt <- ifelse(cutpt>50, 50, cutpt); inx <- which(rank(p.value) == cutpt); thresh <- p.value[inx]; inx.imp <- p.value <= thresh; } sig.f <- fstat[inx.imp]; sig.p <- p.value[inx.imp]; fdr.p <- fdr.p[inx.imp]; sig.mat <- data.frame(signif(sig.f,5), signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), 'NA'); rownames(sig.mat) <- names(sig.p); colnames(sig.mat) <- c("chi.squared", "p.value", "-log10(p)", "FDR", "Post-Hoc"); # order the result simultaneously ord.inx <- order(sig.p, decreasing = FALSE); sig.mat <- sig.mat[ord.inx,]; fileName <- "anova_posthoc.csv"; my.mat <- sig.mat[,1:4]; colnames(my.mat) <- c("chi_squared", "pval_KW", "-log10(p)", "FDR"); write.csv(my.mat,file=fileName); }else{ aov.nm <- "One-way ANOVA"; aov.res<-apply(as.matrix(dataSet$norm), 2, aof); anova.res<-lapply(aov.res, anova); #extract all p values res<-unlist(lapply(anova.res, function(x) { c(x["F value"][1,], x["Pr(>F)"][1,])})); res <- data.frame(matrix(res, nrow=length(aov.res), byrow=T), stringsAsFactors=FALSE); fstat <- res[,1]; p.value <- res[,2]; names(fstat) <- names(p.value)<-colnames(dataSet$norm); fdr.p <- p.adjust(p.value, "fdr"); # do post-hoc only for signficant entries inx.imp <- p.value <= thresh; if(sum(inx.imp) == 0){ # no sig features with default thresh # readjust threshold to top 20% or top 50 cutpt <- round(0.2*length(p.value)); cutpt <- ifelse(cutpt>50, 50, cutpt); inx <- which(rank(p.value) == cutpt); thresh <- p.value[inx]; inx.imp <- p.value <= thresh; } aov.imp <- aov.res[inx.imp]; sig.f <- fstat[inx.imp]; sig.p <- p.value[inx.imp]; fdr.p <- fdr.p[inx.imp]; cmp.res <- NULL; post.nm <- NULL; if(post.hoc=="tukey"){ tukey.res<-lapply(aov.imp, TukeyHSD, conf.level=1-thresh); cmp.res <- unlist(lapply(tukey.res, parseTukey, cut.off=thresh)); post.nm = "Tukey's HSD"; }else{ fisher.res<-lapply(aov.imp, FisherLSD, thresh); cmp.res <- unlist(lapply(fisher.res, parseFisher, cut.off=thresh)); post.nm = "Fisher's LSD"; } # create the result dataframe, # note, the last column is string, not double sig.mat <- data.frame(signif(sig.f,5), signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), cmp.res); rownames(sig.mat) <- names(sig.p); colnames(sig.mat) <- c("f.value", "p.value", "-log10(p)", "FDR", post.nm); # order the result simultaneously ord.inx <- order(sig.p, decreasing = FALSE); sig.mat <- sig.mat[ord.inx,]; fileName <- "anova_posthoc.csv"; write.csv(sig.mat,file=fileName); } aov<-list ( aov.nm = aov.nm, raw.thresh = thresh, thresh = -log10(thresh), # only used for plot threshold line p.value = p.value, p.log = -log10(p.value), inx.imp = inx.imp, post.hoc = post.hoc, sig.mat = sig.mat ); analSet$aov<<-aov; return(1); } PlotANOVA<-function(imgName, format="png", dpi=72, width=NA){ lod <- analSet$aov$p.log; imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7; imgSet$anova<<-imgName; }else{ w <- width; } h <- w*6/9; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); plot(lod, ylab="-log10(p)", xlab = GetVariableLabel(), main=analSet$aov$aov.nm, type="n"); red.inx<- which(analSet$aov$inx.imp); blue.inx <- which(!analSet$aov$inx.imp); points(red.inx, lod[red.inx], bg="red", cex=1.2, pch=21); points(blue.inx, lod[blue.inx], bg="green", pch=21); abline (h=analSet$aov$thresh, lty=3); #dev.off(); } GetAovSigMat<-function(){ return(CleanNumber(as.matrix(analSet$aov$sig.mat[, 1:4]))); } GetAovSigRowNames<-function(){ rownames(analSet$aov$sig.mat); } GetAovSigColNames<-function(){ colnames(analSet$aov$sig.mat[, 1:4]); } GetAovPostHocSig<-function(){ analSet$aov$sig.mat[,5]; } GetSigTable.Anova<-function(){ GetSigTable(analSet$aov$sig.mat, "One-way ANOVA and post-hoc analysis"); } GetAnovaUpMat<-function(){ lod <- analSet$aov$p.log; red.inx<- which(analSet$aov$inx.imp); as.matrix(cbind(red.inx, lod[red.inx])); } GetAnovaDnMat<-function(){ lod <- analSet$aov$p.log; blue.inx <- which(!analSet$aov$inx.imp); as.matrix(cbind(blue.inx, lod[blue.inx])); } GetAnovaLnMat<-function(){ lod <- analSet$aov$p.log; as.matrix(rbind(c(0, analSet$aov$thresh), c(length(lod)+1,analSet$aov$thresh))); } GetAnovaCmpds<-function(){ names(analSet$aov$p.log); } GetMaxAnovaInx <- function(){ which.max(analSet$aov$p.log); } PlotCmpdView<-function(cmpdNm, format="png", dpi=72, width=NA){ imgName <- gsub("\\/", "_", cmpdNm); imgName <- paste(imgName, "_dpi", dpi, ".", format, sep=""); Cairo(file = imgName, dpi=dpi, width=240, height=240, type=format, bg="transparent"); par(mar=c(4,3,1,2), oma=c(0,0,1,0)); boxplot(dataSet$norm[, cmpdNm]~dataSet$cls,las=2, col= unique(GetColorSchema())); title(main=cmpdNm, out=T); #dev.off(); return(imgName); } # change to use dataSet$proc instead of dataSet$orig in # case of too many NAs PlotCmpd<-function(cmpdNm, format="png", dpi=72, width=NA){ imgName <- gsub("\\/", "_", cmpdNm); imgName <- paste(imgName, "_dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else{ w <- width; } if(substring(dataSet$format,4,5)!="ts"){ Cairo(file = imgName, unit="in", dpi=dpi, width=w, height= w*5/9, type=format, bg="white"); par(mar=c(4,4,2,2), mfrow = c(1,2), oma=c(0,0,2,0)); mns <- by(as.numeric(dataSet$proc[, cmpdNm]), dataSet$proc.cls, mean, na.rm=T); sds <- by(as.numeric(dataSet$proc[, cmpdNm]), dataSet$proc.cls, sd, na.rm=T); ups <- mns + sds; dns <- mns - sds; # all concentration need start from 0 y <- c(0, dns, mns, ups); rg <- range(y) + 0.05 * diff(range(y)) * c(-1, 1) pt <- pretty(y) axp=c(min(pt), max(pt[pt <= max(rg)]),length(pt[pt <= max(rg)]) - 1); # ymk <- pretty(c(0,ymax)); x <- barplot(mns, col= unique(GetColorSchema()), las=2, yaxp=axp, ylim=range(pt)); arrows(x, dns, x, ups, code=3, angle=90, length=.1); axis(1, at=x, col="white", col.tick="black", labels=F); box(); mtext("Original Conc.", line=1); boxplot(dataSet$norm[, cmpdNm]~dataSet$cls,las=2, col= unique(GetColorSchema())); mtext("Normalized Conc.", line=1); title(main=cmpdNm, out=T); #dev.off(); }else if(dataSet$design.type =="time0"){ Cairo(file = imgName, unit="in", dpi=dpi, width=8, height= 6, type=format, bg="white"); plotProfile(cmpdNm); #dev.off(); }else{ if(dataSet$design.type =="time"){ # time trend within phenotype out.fac <- dataSet$exp.fac; in.fac <- dataSet$time.fac; xlab="Time"; }else{ # factor a split within factor b out.fac <- dataSet$facB; in.fac <- dataSet$facA; xlab=dataSet$facA.lbl; } # two images per row img.num <- length(levels(out.fac)); row.num <- ceiling(img.num/2) if(row.num == 1){ h <- w*5/9; }else{ h <- w*0.5*row.num; } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(3,4,4,2), mfrow=c(row.num, 2)); # make sure all at the same range ylim.ext <- GetExtendRange (dataSet$norm[, cmpdNm], 12); for(lv in levels(out.fac)){ inx <- out.fac == lv; dat <- dataSet$norm[inx, cmpdNm]; cls <- in.fac[inx]; boxplot(dat ~ cls, col="#0000ff22", ylim=ylim.ext, outline=FALSE, boxwex=c(0.5, 0.5), xlab=xlab, ylab="Abundance", main=lv); stripchart(dat ~ cls, method = "jitter", ylim=ylim.ext, vertical=T, add = T, pch=19, cex=0.7, names = c("","")); } #dev.off(); } return(imgName); } ############################################################################################ ############################################################################################ ############################################################################################ ############################################################################################ ################################################## ## R script for MetaboAnalyst ## Description: perform PCA/PLS-DA/OPLS-DA ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### ############################ ########### PCA ############# ############################# # perform PCA analysis PCA.Anal<-function(){ pca<-prcomp(dataSet$norm, center=T, scale=F); # obtain variance explained sum.pca<-summary(pca); imp.pca<-sum.pca$importance; std.pca<-imp.pca[1,]; # standard devietation var.pca<-imp.pca[2,]; # variance explained by each PC cum.pca<-imp.pca[3,]; # cummulated variance explained # store the item to the pca object analSet$pca<<-append(pca, list(std=std.pca, variance=var.pca, cum.var=cum.pca)); write.csv(signif(analSet$pca$x,5), file="pca_score.csv"); write.csv(signif(analSet$pca$rotation,5), file="pca_loadings.csv"); } # format: png, tiff, pdf, ps, svg PlotPCAPairSummary<-function(imgName, format="png", dpi=72, width=NA, pc.num){ pclabels <- paste("PC", 1:pc.num, "\n", round(100*analSet$pca$variance[1:pc.num],1), "%"); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10; }else if(width == 0){ w <- 8; imgSet$pca.pair <<- imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(dataSet$cls.type == "disc"){ pairs(analSet$pca$x[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels); }else{ pairs(analSet$pca$x[,1:pc.num], labels=pclabels); } #dev.off(); } # scree plot PlotPCAScree<-function(imgName, format="png", dpi=72, width=NA, scree.num){ stds <-analSet$pca$std[1:scree.num]; pcvars<-analSet$pca$variance[1:scree.num]; cumvars<-analSet$pca$cum.var[1:scree.num]; ylims <- range(c(pcvars,cumvars)); extd<-(ylims[2]-ylims[1])/10 miny<- ifelse(ylims[1]-extd>0, ylims[1]-extd, 0); maxy<- ifelse(ylims[2]+extd>1, 1.0, ylims[2]+extd); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10; }else if(width == 0){ w <- 8; imgSet$pca.scree<<-imgName; }else{ w <- width; } h <- w*2/3; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,6,3)); plot(pcvars, type='l', col='blue', main='Scree plot', xlab='PC index', ylab='Variance explained', ylim=c(miny, maxy), axes=F) text(pcvars, labels =paste(100*round(pcvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T) points(pcvars, col='red'); lines(cumvars, type='l', col='green') text(cumvars, labels =paste(100*round(cumvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T) points(cumvars, col='red'); abline(v=1:scree.num, lty=3); axis(2); axis(1, 1:length(pcvars), 1:length(pcvars)); #dev.off(); } # 2D score plot PlotPCA2DScore <- function(imgName, format="png", dpi=72, width=NA, pcx, pcy, reg = 0.95, show=1, grey.scale = 0){ xlabel = paste("PC",pcx, "(", round(100*analSet$pca$variance[pcx],1), "%)"); ylabel = paste("PC",pcy, "(", round(100*analSet$pca$variance[pcy],1), "%)"); pc1 = analSet$pca$x[, pcx]; pc2 = analSet$pca$x[, pcy]; text.lbls<-substr(names(pc1),1,14) # some names may be too long imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ imgSet$pca.score2d<<-imgName; w <- 7.2; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); suppressMessages(require('ellipse')); op<-par(mar=c(5,5,3,3)); if(dataSet$cls.type == "disc"){ # obtain ellipse points to the scatter plot for each category lvs <- levels(dataSet$cls); pts.array <- array(0, dim=c(100,2,length(lvs))); for(i in 1:length(lvs)){ inx <-dataSet$cls == lvs[i]; groupVar<-var(cbind(pc1[inx],pc2[inx]), na.rm=T); groupMean<-cbind(mean(pc1[inx], na.rm=T),mean(pc2[inx], na.rm=T)); pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100); } xrg <- range (pc1, pts.array[,1,]); yrg <- range (pc2, pts.array[,2,]); x.ext<-(xrg[2]-xrg[1])/12; y.ext<-(yrg[2]-yrg[1])/12; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); cols <- GetColorSchema(grey.scale==1); uniq.cols <- unique(cols); plot(pc1, pc2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot", color=cols, pch=as.numeric(dataSet$cls)+1); ## added grid(col = "lightgray", lty = "dotted", lwd = 1); # make sure name and number of the same order DO NOT USE levels, which may be different legend.nm <- unique(as.character(dataSet$cls)); ## uniq.cols <- unique(cols); ## BHAN: when same color is choosen; it makes an error if ( length(uniq.cols) > 1 ) { names(uniq.cols) <- legend.nm; } # draw ellipse for(i in 1:length(lvs)){ if (length(uniq.cols) > 1) { polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA); } else { polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA); } if(grey.scale) { lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2); } } pchs <- GetShapeSchema(show, grey.scale); if(grey.scale) { cols <- rep("black", length(cols)); } if(show == 1){ text(pc1, pc2, label=text.lbls, pos=4, xpd=T, cex=0.75); points(pc1, pc2, pch=pchs, col=cols); }else{ if(length(uniq.cols) == 1){ points(pc1, pc2, pch=pchs, col=cols, cex=1.0); }else{ if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){ points(pc1, pc2, pch=pchs, col=cols, cex=1.8); }else{ points(pc1, pc2, pch=21, bg=cols, cex=2); } } } uniq.pchs <- unique(pchs); if(grey.scale) { uniq.cols <- "black"; } legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols); }else{ plot(pc1, pc2, xlab=xlabel, ylab=ylabel, type='n', main="Scores Plot"); points(pc1, pc2, pch=15, col="magenta"); text(pc1, pc2, label=text.lbls, pos=4, col ="blue", xpd=T, cex=0.8); } par(op); #dev.off(); } GetPCALoadAxesSpec<-function(){ pca.axis.lims; } GetPCALoadCmpds<- function(){ names(analSet$pca$load.x.uniq); } GetPCALoadCmpdInxs<-function(){ analSet$pca$load.x.uniq; } GetPCALoadMat <- function(){ as.matrix(cbind(analSet$pca$load.x.uniq, analSet$pca$imp.loads[,2])); } # plot PCA loadings and also set up the matrix for display PlotPCALoading<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){ loadings<-signif(as.matrix(cbind(analSet$pca$rotation[,inx1],analSet$pca$rotation[,inx2])),5); ldName1<-paste("Loadings", inx1); ldName2<-paste("Loadings", inx2); colnames(loadings)<-c(ldName1, ldName2); load.x.uniq <- jitter(loadings[,1]); names(load.x.uniq) <- rownames(loadings); analSet$pca$load.x.uniq <<- load.x.uniq; analSet$pca$imp.loads<<-loadings; # set up the loading matrix imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pca.loading<<-imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(plotType=="scatter"){ par(mar=c(6,5,2,6)); plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2); pca.axis.lims <<- par("usr"); # x1, x2, y1 ,y2 grid(col = "lightgray", lty = "dotted", lwd = 1); points(loadings[,1],loadings[,2], pch=19, col="magenta"); if(lbl.feat > 0){ text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T); } }else{ # barplot layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T), respect = FALSE) cmpd.nms <- substr(rownames(loadings), 1, 14); hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2])); par(mar=c(1,4,4,1)); barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main =ldName1); par(mar=c(10,4,3,1)); barplot(loadings[,2], names.arg=cmpd.nms, las=2, cex.names=1.0, ylim=hlims, main =ldName2); } #dev.off(); } # Biplot, set xpd = T to plot outside margin PlotPCABiplot<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2){ choices = c(inx1, inx2); scores<-analSet$pca$x; lam <- analSet$pca$sdev[choices] n <- NROW(scores) lam <- lam * sqrt(n); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pca.biplot<<-imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); biplot(t(t(scores[, choices]) / lam), t(t(analSet$pca$rotation[, choices]) * lam), xpd =T, cex=0.9); #dev.off(); } # for plotting, max top 9 GetMaxPCAComp<-function(){ return (min(9, dim(dataSet$norm)[1]-1, dim(dataSet$norm)[2])); } ############################### ########### PLS-DA ############# ################################ # pls analysis using oscorespls so that VIP can be calculated # note: the VIP is calculated only after PLSDA-CV is performed # to determine the best # of comp. used for VIP PLSR.Anal<-function(){ comp.num <- dim(dataSet$norm)[1]-1; if(comp.num > 8) { comp.num <- 8; } suppressMessages(require('pls')); # note, standardize the cls, to minimize the impact of categorical to numerical impact cls<-scale(as.numeric(dataSet$cls))[,1]; datmat<-as.matrix(dataSet$norm); analSet$plsr<<-plsr(cls~datmat,method='oscorespls', ncomp=comp.num); write.csv(signif(analSet$plsr$scores,5), row.names=rownames(dataSet$norm), file="plsda_score.csv"); write.csv(signif(analSet$plsr$loadings,5), file="plsda_loadings.csv"); } # plot pairwise summary PlotPLSPairSummary<-function(imgName, format="png", dpi=72, width=NA, pc.num){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pls.pair <<- imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); pclabels <- paste("Component", 1:pc.num, "\n", round(100*analSet$plsr$Xvar[1:pc.num]/analSet$plsr$Xtotvar,1), "%"); # pairs(analSet$plsr$scores[,1:pc.num], col=as.numeric(dataSet$cls)+1, pch=as.numeric(dataSet$cls)+1, labels=pclabels) pairs(analSet$plsr$scores[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels) #dev.off(); } # score plot PlotPLS2DScore<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){ suppressMessages(require('ellipse')); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pls.score2d<<-imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,3,3)); lv1 <- analSet$plsr$scores[,inx1]; lv2 <- analSet$plsr$scores[,inx2]; xlabel <- paste("Component", inx1, "(", round(100*analSet$plsr$Xvar[inx1]/analSet$plsr$Xtotvar,1), "%)"); ylabel <- paste("Component", inx2, "(", round(100*analSet$plsr$Xvar[inx2]/analSet$plsr$Xtotvar,1), "%)"); text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long # obtain ellipse points to the scatter plot for each category lvs <- levels(dataSet$cls); pts.array <- array(0, dim=c(100,2,length(lvs))); for(i in 1:length(lvs)){ inx <-dataSet$cls == lvs[i]; groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T); groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T)); pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100); } xrg <- range (lv1, pts.array[,1,]); yrg <- range (lv2, pts.array[,2,]); x.ext<-(xrg[2]-xrg[1])/12; y.ext<-(yrg[2]-yrg[1])/12; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); ## cols = as.numeric(dataSet$cls)+1; cols <- GetColorSchema(grey.scale==1); uniq.cols <- unique(cols); plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot"); grid(col = "lightgray", lty = "dotted", lwd = 1); # make sure name and number of the same order DO NOT USE levels, which may be different legend.nm <- unique(as.character(dataSet$cls)); ## uniq.cols <- unique(cols); ## BHAN: when same color is choosen for black/white; it makes an error # names(uniq.cols) <- legend.nm; if ( length(uniq.cols) > 1 ) { names(uniq.cols) <- legend.nm; } # draw ellipse for(i in 1:length(lvs)){ if ( length(uniq.cols) > 1) { polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA); } else { polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA); } if(grey.scale) { lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2); } } pchs <- GetShapeSchema(show, grey.scale); if(grey.scale) { cols <- rep("black", length(cols)); } if(show==1){ # display sample name set on text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75); points(lv1, lv2, pch=pchs, col=cols); }else{ if (length(uniq.cols) == 1) { points(lv1, lv2, pch=pchs, col=cols, cex=1.0); } else { if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){ points(lv1, lv2, pch=pchs, col=cols, cex=1.8); }else{ points(lv1, lv2, pch=21, bg=cols, cex=2); } } } uniq.pchs <- unique(pchs); if(grey.scale) { uniq.cols <- "black"; } legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols); #dev.off(); } GetPLSLoadAxesSpec<-function(){ pls.axis.lims; } GetPLSLoadCmpds<- function(){ names(analSet$plsr$load.x.uniq); } GetPLSLoadCmpdInxs<-function(){ analSet$plsr$load.x.uniq; } GetPLSLoadMat <- function(){ as.matrix(cbind(analSet$plsr$load.x.uniq, analSet$plsr$imp.loads[,2])); } # plot loading plot, also set the loading matrix for display PlotPLSLoading<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){ # named vector load1<-analSet$plsr$loadings[,inx1]; load2<-analSet$plsr$loadings[,inx2]; loadings = signif(as.matrix(cbind(load1, load2)),5); ldName1<-paste("Loadings", inx1); ldName2<-paste("Loadings", inx2) colnames(loadings)<-c(ldName1, ldName2); load.x.uniq <- jitter(loadings[,1]); names(load.x.uniq) <- rownames(loadings); analSet$plsr$load.x.uniq <<- load.x.uniq; analSet$plsr$imp.loads<<-loadings; # set up loading matrix imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pls.loading<<-imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(plotType == "scatter"){ par(mar=c(6,4,4,5)); plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2); pls.axis.lims <<- par("usr"); # x1, x2, y1 ,y2 grid(col = "lightgray", lty = "dotted", lwd = 1); points(loadings[,1],loadings[,2], pch=19, col="magenta"); if(lbl.feat > 0){ text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T); } }else{ # barplot cmpd.nms <- substr(rownames(loadings), 1, 14); hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2])); layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T)) par(mar=c(1,4,4,1)); barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main = ldName1); par(mar=c(10,4,3,1)); barplot(loadings[,2], names.arg=cmpd.nms, cex.names=1.0, las=2, ylim=hlims, main = ldName2); } #dev.off(); } # classification and feature selection PLSDA.CV<-function(methodName="T", compNum=GetDefaultPLSCVComp(), choice="Q2"){ # get classification accuracy using caret suppressMessages(require('caret')); cls<-as.numeric(dataSet$cls)-1; datmat<-as.matrix(dataSet$norm); plsda.cls <- train(dataSet$norm, dataSet$cls, "pls", trControl=trainControl(method=ifelse(methodName == 'L', "LOOCV", 'CV')), tuneLength=compNum); # use the classifical regression to get R2 and Q2 measure plsda.reg <- plsr(cls~datmat,method ='oscorespls', ncomp=compNum, validation= ifelse(methodName == 'L', "LOO", 'CV')); fit.info <- pls::R2(plsda.reg, estimate = "all")$val[,1,]; # combine accuracy, R2 and Q2 accu <- plsda.cls$results[,2] all.info <- rbind(accu, fit.info[,-1]); rownames(all.info) <- c("Accuracy", "R2", "Q2"); # default use best number determined by Q2 if(choice == 'Q2'){ best.num <- which(all.info[3,] == max(all.info[3,])); }else if(choice == "R2"){ best.num <- which(all.info[2,] == max(all.info[2,])); }else{ best.num <- which(all.info[1,] == max(all.info[1,])); } # get coef. table, this can be error when class is very unbalanced coef.mat <- try(varImp(plsda.cls, scale=T)$importance); if(class(coef.mat) == "try-error") { coef.mat <- NULL; }else{ if(dataSet$cls.num > 2){ # add an average coef for multiple class coef.mean<-apply(coef.mat, 1, mean); coef.mat <- cbind(coef.mean = coef.mean, coef.mat); } # rearange in decreasing order, keep as matrix, prevent dimesion dropping if only 1 col inx.ord<- order(coef.mat[,1], decreasing=T); coef.mat <- data.matrix(coef.mat[inx.ord, ,drop=FALSE]); write.csv(signif(coef.mat,5), file="plsda_coef.csv"); # added 27 Jan 2014 } # calculate VIP http://mevik.net/work/software/VIP.R pls<-analSet$plsr; b <- c(pls$Yloadings)[1:compNum]; T <- pls$scores[,1:compNum, drop = FALSE] SS <- b^2 * colSums(T^2) W <- pls$loading.weights[,1:compNum, drop = FALSE] Wnorm2 <- colSums(W^2); SSW <- sweep(W^2, 2, SS / Wnorm2, "*") vips <- sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS)); if(compNum > 1){ vip.mat <- as.matrix(t(vips)); }else{ vip.mat <- as.matrix(vips); } colnames(vip.mat) <- paste("Comp.", 1:ncol(vip.mat)); write.csv(signif(vip.mat,5),file="plsda_vip.csv"); analSet$plsda<<-list(best.num=best.num, choice=choice, coef.mat=coef.mat, vip.mat=vip.mat, fit.info=all.info); return(1); } # perform permutation, using training classification accuracy as # indicator, for two or multi-groups PLSDA.Permut<-function(num=100, type="accu"){ orig.cls<-cls<-as.numeric(dataSet$cls); datmat<-as.matrix(dataSet$norm); best.num<-analSet$plsda$best.num; # dummy is not used, for the purpose to maintain lapply API Get.pls.bw <- function(dummy){ cls <- cls[order(runif(length(cls)))]; pls <- plsda(datmat, as.factor(cls), ncomp=best.num); pred <- predict(pls, datmat); Get.bwss(pred, cls); } Get.pls.accu <- function(dummy){ cls <- cls[order(runif(length(cls)))]; pls <- plsda(datmat, as.factor(cls), ncomp=best.num); pred <- predict(pls, datmat); sum(pred == cls)/length(cls); } # first calculate the bw values with original labels pls <- plsda(datmat, as.factor(orig.cls), ncomp=best.num); pred.orig <- predict(pls, datmat); if(type=="accu"){ perm.type = "prediction accuracy"; res.orig <- sum(pred.orig == orig.cls)/length(orig.cls); res.perm <- Perform.permutation(num, Get.pls.accu); }else{ perm.type = "separation distance"; res.orig <- Get.bwss(pred.orig, orig.cls); res.perm <- Perform.permutation(num, Get.pls.bw); } perm.vec <- c(res.orig, unlist(res.perm, use.names=FALSE)); # check for infinite since with group variance could be zero for perfect classification inf.found = TRUE; if(sum(is.finite(perm.vec))==length(perm.vec)){ inf.found = FALSE; }else { if(sum(is.finite(perm.vec))==0){ # all are infinite, give a random number 10 perm.vec<-rep(10, length(perm.vec)); }else{ # if not all inf, replace with the 10 fold of non-inf values perm.vec[!is.finite(perm.vec)]<-10*max(perm.vec[is.finite(perm.vec)]); } } # calculate the significant p value as the proportion of sampled permutations better than or equal to original one # note, the precision is determined by the permutation number i.e. for 100 time, no better than original # p value is < 0.01, we can not say it is zero better.hits <- sum(perm.vec[-1]>=perm.vec[1]); if(better.hits == 0) { p <- paste("p < ", 1/num, " (", better.hits, "/", num, ")", sep=""); }else{ p <- better.hits/num; p <- paste("p = ", signif(p, digits=5), " (", better.hits, "/", num, ")", sep=""); } analSet$plsda$permut.p<<-p; analSet$plsda$permut.inf<<-F; analSet$plsda$permut.type<<- perm.type; analSet$plsda$permut<<-perm.vec; return(p); } # BHan: added bgcolor parameter for B/W color PlotPLS.Imp<-function(imgName, format="png", dpi=72, width=NA, type, feat.nm, feat.num, color.BW=FALSE){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$pls.imp<<-imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(type=="vip"){ analSet$plsda$imp.type<<-"vip"; vips<-analSet$plsda$vip.mat[,feat.nm]; PlotImpVar(vips, "VIP scores", feat.num, color.BW); }else{ analSet$plsda$imp.type<<-"coef"; data<-analSet$plsda$coef.mat[,feat.nm]; PlotImpVar(data, "Coefficients", feat.num, color.BW); } #dev.off(); } # BHan: added bgcolor parameter for B/W color PlotImpVar <- function(imp.vec, xlbl, feat.num=15, color.BW=FALSE){ cls.len <- length(levels(dataSet$cls)); if(cls.len == 2){ rt.mrg <- 5; }else if(cls.len == 3){ rt.mrg <- 6; }else if(cls.len == 4){ rt.mrg <- 7; }else if(cls.len == 5){ rt.mrg <- 8; }else if(cls.len == 6){ rt.mrg <- 9; }else{ rt.mrg <- 11; } op <- par(mar=c(5,7,3,rt.mrg)); # set right side margin with the number of class if(feat.num <= 0){ feat.num = 15; } if(feat.num > length(imp.vec)){ feat.num <- length(imp.vec); } # first get the top subset imp.vec <- rev(sort(imp.vec))[1:feat.num]; # reverser the order for display imp.vec <- sort(imp.vec); # as data should already be normalized, use mean/median should be the same # mns is a list contains means of all vars at each level # conver the list into a matrix with each row contains var averages across different lvls mns <- by(dataSet$norm[, names(imp.vec)], dataSet$cls, function(x){ # inner function note, by send a subset of dataframe apply(x, 2, mean, trim=0.1) }); mns <- t(matrix(unlist(mns), ncol=feat.num, byrow=TRUE)); # vip.nms <-substr(names(imp.vec), 1, 12); vip.nms <-substr(names(imp.vec), 1, 14); names(imp.vec) <- NULL; # modified for B/W color dotcolor <- ifelse(color.BW, "darkgrey", "blue"); dotchart(imp.vec, bg=dotcolor, xlab= xlbl, cex=1.3); mtext(side=2, at=1:feat.num, vip.nms, las=2, line=1) axis.lims <- par("usr"); # x1, x2, y1 ,y2 # get character width shift <- 2*par("cxy")[1]; lgd.x <- axis.lims[2] + shift; x <- rep(lgd.x, feat.num); y <- 1:feat.num; par(xpd=T); suppressMessages(require(RColorBrewer)); nc <- ncol(mns); # modified for B/W color colorpalette <- ifelse(color.BW, "Greys", "RdYlGn"); col <- colorRampPalette(brewer.pal(10, colorpalette))(nc); # set colors for each class if(color.BW) col <- rev(col); # calculate background bg <- matrix("", nrow(mns), nc); for (m in 1:nrow(mns)){ bg[m,] <- (col[nc:1])[rank(mns[m,])]; } cls.lbl <- levels(dataSet$cls); for (n in 1:ncol(mns)){ points(x,y, bty="n", pch=22, bg=bg[,n], cex=3); # now add label text(x[1], axis.lims[4], cls.lbl[n], srt=45, adj=c(0.2,0.5)); # shift x, note, this is good for current size x <- x + shift/1.25; } # now add color key, padding with more intermediate colors for contiuous band col <- colorRampPalette(brewer.pal(25, colorpalette))(50) if(color.BW) col <- rev(col); nc <- length(col); x <- rep(x[1] + shift, nc); shifty <- (axis.lims[4]-axis.lims[3])/3; starty <- axis.lims[3] + shifty; endy <- axis.lims[3] + 2*shifty; y <- seq(from = starty, to = endy, length = nc); points(x,y, bty="n", pch=15, col=rev(col), cex=2); text(x[1], endy+shifty/8, "High"); text(x[1], starty-shifty/8, "Low"); par(op); } # Plot plsda classification performance using different components PlotPLS.Classification<-function(imgName, format="png", dpi=72, width=NA){ res<-analSet$plsda$fit.info; colnames(res) <- 1:ncol(res); best.num <- analSet$plsda$best.num; choice <- analSet$plsda$choice; imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 7; }else if(width == 0){ w <- 7; imgSet$pls.class<<-imgName; }else{ w <- width; } h <- w*5/7; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,2,7)); # put legend on the right outside barplot(res, beside = TRUE, col = c("lightblue", "mistyrose","lightcyan"), ylim= c(0,1.05), xlab="Number of components", ylab="Performance"); if(choice == "Q2"){ text((best.num-1)*3 + best.num + 2.5, res[3,best.num]+ 0.02, labels = "*", cex=2.5, col="red"); }else if(choice == "R2"){ text((best.num-1)*3 + best.num + 1.5, res[2,best.num]+ 0.02, labels = "*", cex=2.5, col="red"); }else{ text((best.num-1)*3 + best.num + 0.5, res[1,best.num]+ 0.02, labels = "*", cex=2.5, col="red"); } # calculate the maximum y position, each bar is 1, place one space between the group xpos <- ncol(res)*3 + ncol(res) + 1; legend(xpos, 1.0, rownames(res), fill = c("lightblue", "mistyrose","lightcyan"), xpd=T); #dev.off(); } # Plot plsda classification performance using different components PlotPLS.Permutation<-function(imgName, format="png", dpi=72, width=NA){ bw.vec<-analSet$plsda$permut; len<-length(bw.vec); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$pls.permut<<-imgName; }else{ w <- width; } h <- w*6/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,2,4)); hst <- hist(bw.vec, breaks = "FD", freq=T, ylab="Frequency", xlab= 'Permutation test statistics', col="lightblue", main=""); # add the indicator using original label h <- max(hst$counts) arrows(bw.vec[1], h/5, bw.vec[1], 0, col="red", lwd=2); text(bw.vec[1], h/3.5, paste('Observed \n statistic \n', analSet$plsda$permut.p), xpd=T); #dev.off(); } # get which number of components give best performance GetPLSBestTune<-function(){ if(is.null(analSet$plsda$best.num)){ return (0); } analSet$plsda$best.num; } # obtain VIP score GetPLSSigMat<-function(type){ if(type == "vip"){ return (CleanNumber(signif(as.matrix(analSet$plsda$vip.mat),5))); }else if(type == "coef"){ return (CleanNumber(signif(as.matrix(analSet$plsda$coef.mat),5))); }else{ return (CleanNumber(signif(as.matrix(analSet$plsr$imp.loads),5))); } } GetPLSSigRowNames<-function(type){ if(type == "vip"){ return (rownames(analSet$plsda$vip.mat)); }else if(type == "coef"){ return (rownames(analSet$plsda$coef.mat)); }else{ return (rownames(analSet$plsr$imp.loads)) } } GetPLSSigColNames<-function(type){ if(type == "vip"){ return (colnames(analSet$plsda$vip.mat)); }else if(type == "coef"){ return (colnames(analSet$plsda$coef.mat)); }else{ return (colnames(analSet$plsr$imp.loads)); } } GetPLS_CVRowNames <- function(){ rownames(analSet$plsda$fit.info); } GetPLS_CVColNames <- function(){ colnames(analSet$plsda$fit.info); } GetPLS_CVMat<-function(){ return(signif(analSet$plsda$fit.info, 5)); } GetMaxPLSPairComp<-function(){ return (min(dim(dataSet$norm)[1]-1, dim(dataSet$norm)[2])); } GetMaxPLSCVComp<-function(){ return (min(dim(dataSet$norm)[1]-2, dim(dataSet$norm)[2])); } GetDefaultPLSPairComp<-function(){ return (min(5, dim(dataSet$norm)[1]-1, dim(dataSet$norm)[2])); } GetDefaultPLSCVComp<-function(){ return (min(5, dim(dataSet$norm)[1]-2, dim(dataSet$norm)[2], dataSet$min.grp.size)); } ############################## ####### OPLS-DA ############## ############################## OPLSR.Anal<-function(){ # note, standardize the cls, to minimize the impact of categorical to numerical impact cls<-scale(as.numeric(dataSet$cls))[,1]; datmat<-as.matrix(dataSet$norm); cv.num <- min(7, dim(dataSet$norm)[1]-1); analSet$oplsda<<-perform_opls(datmat,cls, predI=1, permI=0, orthoI=NA, crossvalI=cv.num); score.mat <- cbind(analSet$oplsda$scoreMN[,1], analSet$oplsda$orthoScoreMN[,1]); colnames(score.mat) <- c("Score (t1)","OrthoScore (to1)"); write.csv(signif(score.mat,5), row.names=rownames(dataSet$norm), file="oplsda_score.csv"); load.mat <- cbind(analSet$oplsda$loadingMN[,1], analSet$oplsda$orthoLoadingMN[,1]); colnames(load.mat) <- c("Loading (t1)","OrthoLoading (to1)"); write.csv(signif(load.mat,5), file="oplsda_loadings.csv"); custom.cmpds <<- c(); } # score plot PlotOPLS2DScore<-function(imgName, format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){ suppressMessages(require('ellipse')); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$opls.score2d<<-imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,3,3)); lv1 <- analSet$oplsda$scoreMN[,1]; lv2 <- analSet$oplsda$orthoScoreMN[,1]; xlabel <- paste("T score [1]", "(", round(100*analSet$oplsda$modelDF["p1", "R2X"],1), "%)"); ylabel <- paste("Orthogonal T score [1]", "(", round(100*analSet$oplsda$modelDF["o1", "R2X"],1), "%)"); text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long # obtain ellipse points to the scatter plot for each category lvs <- levels(dataSet$cls); pts.array <- array(0, dim=c(100,2,length(lvs))); for(i in 1:length(lvs)){ inx <-dataSet$cls == lvs[i]; groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T); groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T)); pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100); } xrg <- range (lv1, pts.array[,1,]); yrg <- range (lv2, pts.array[,2,]); x.ext<-(xrg[2]-xrg[1])/12; y.ext<-(yrg[2]-yrg[1])/12; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); ## cols = as.numeric(dataSet$cls)+1; cols <- GetColorSchema(grey.scale==1); uniq.cols <- unique(cols); plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot"); grid(col = "lightgray", lty = "dotted", lwd = 1); # make sure name and number of the same order DO NOT USE levels, which may be different legend.nm <- unique(as.character(dataSet$cls)); ## uniq.cols <- unique(cols); ## BHAN: when same color is choosen for black/white; it makes an error # names(uniq.cols) <- legend.nm; if ( length(uniq.cols) > 1 ) { names(uniq.cols) <- legend.nm; } # draw ellipse for(i in 1:length(lvs)){ if ( length(uniq.cols) > 1) { polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA); } else { polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA); } if(grey.scale) { lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2); } } pchs <- GetShapeSchema(show, grey.scale); if(grey.scale) { cols <- rep("black", length(cols)); } if(show==1){ # display sample name set on text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75); points(lv1, lv2, pch=pchs, col=cols); }else{ if (length(uniq.cols) == 1) { points(lv1, lv2, pch=pchs, col=cols, cex=1.0); } else { if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){ points(lv1, lv2, pch=pchs, col=cols, cex=1.8); }else{ points(lv1, lv2, pch=21, bg=cols, cex=2); } } } uniq.pchs <- unique(pchs); if(grey.scale) { uniq.cols <- "black"; } legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols); #dev.off(); } ResetCustomCmpds <- function(){ custom.cmpds <<- c(); } #S-plot for important features from OPLS-DA PlotOPLS.Splot<-function(imgName, format="png", dpi=72, width=NA, plotType){ s <- as.matrix(dataSet$norm); T <- as.matrix(analSet$oplsda$scoreMN) p1 <- c() for (i in 1:ncol(s)) { scov <- cov(s[,i], T) p1 <- matrix(c(p1, scov), ncol=1) } pcorr1 <- c() for (i in 1:nrow(p1)) { den <- apply(T, 2, sd)*sd(s[,i]) corr1 <- p1[i,]/den pcorr1 <- matrix(c(pcorr1, corr1), ncol=1) } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- h <- 8; }else if(width == 0){ imgSet$opls.loading<<-imgName; }else{ w <- h <- width; } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,4,7)) plot(p1, pcorr1, pch=19, xlab="p[1]", ylab ="p(corr)[1]", main = "S-plot", col="magenta"); opls.axis.lims <<- par("usr"); if(plotType=="all"){ text(p1, pcorr1, labels=colnames(s), cex=0.8, pos=4, xpd=TRUE, col="blue"); }else if(plotType == "custom"){ if(length(custom.cmpds) > 0){ hit.inx <- colnames(dataSet$norm) %in% custom.cmpds; text(p1[hit.inx], pcorr1[hit.inx], labels=colnames(s)[hit.inx], pos=4, xpd=TRUE, col="blue"); } }else{ # do nothing } #dev.off(); splot.mat <- cbind(jitter(p1),p1, pcorr1); rownames(splot.mat) <- colnames(s); colnames(splot.mat) <- c("jitter", "p[1]","p(corr)[1]"); write.csv(signif(splot.mat[,2:3],5), file="oplsda_splot.csv"); analSet$oplsda$splot.mat <<- splot.mat; } PlotLoadingCmpd<-function(cmpdNm, format="png", dpi=72, width=NA){ # need to recoed the clicked compounds custom.cmpds <<- c(custom.cmpds, cmpdNm); return(PlotCmpdView(cmpdNm, format, dpi, width)); } PlotOPLS.MDL <- function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 8; imgSet$pls.class<<-imgName; }else{ w <- width; } h <- w*6/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); # the model R2Y and Q2Y par(mar=c(5,5,4,7)); # put legend on the right outside modBarDF <- analSet$oplsda$modelDF[!(rownames(analSet$oplsda$modelDF) %in% c("rot")), ]; mod.dat <- rbind(modBarDF[, "R2Y(cum)"], modBarDF[, "Q2(cum)"]); bplt <- barplot(mod.dat,beside=TRUE, names.arg = rownames(modBarDF),xlab = ""); axis(2, lwd.ticks=1); barplot(mod.dat,add = TRUE, beside = TRUE, col = c("lightblue", "mistyrose")); text(x=bplt, y=mod.dat+max(mod.dat)/25, labels=as.character(mod.dat), xpd=TRUE) xpos <- nrow(modBarDF)*2 + nrow(modBarDF) + 0.5; ypos <- max(mod.dat)/2; legend(xpos, ypos, legend = c("R2Y", "Q2"), pch=15, col=c("lightblue", "mistyrose"), xpd=T, bty="n"); #dev.off(); } GetOPLSLoadAxesSpec<-function(){ opls.axis.lims; } GetOPLSLoadCmpds<- function(){ rownames(analSet$oplsda$splot.mat); } GetOPLSLoadColNames<- function(){ return(c("p[1]","p(corr)[1]")); } GetOPLSLoadCmpdInxs<-function(){ analSet$oplsda$splot.mat[,1]; } GetOPLSLoadMat <- function(){ as.matrix(analSet$oplsda$splot.mat[,c(1,3)]); } # perform permutation, using training classification accuracy as # indicator, for two or multi-groups PlotOPLS.Permutation<-function(imgName, format="png", dpi=72, num=100, width=NA){ cls<-scale(as.numeric(dataSet$cls))[,1]; datmat<-as.matrix(dataSet$norm); cv.num <- min(7, dim(dataSet$norm)[1]-1); #perm.res<-performOPLS(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num); perm.res<-perform_opls(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num); r.vec<-perm.res$suppLs[["permMN"]][, "R2Y(cum)"]; q.vec<-perm.res$suppLs[["permMN"]][, "Q2(cum)"]; rng <- range(c(r.vec, q.vec, 1)); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 8; imgSet$pls.permut<<-imgName; }else{ w <- width; } h <- w*6/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,2,7)); rhst <- hist(r.vec[-1], plot=FALSE); qhst <- hist(q.vec[-1], plot=FALSE); h <- max(c(rhst$counts, qhst$counts))+1; bin.size <- min(c(rhst$breaks[2]-rhst$breaks[1], qhst$breaks[2]-qhst$breaks[1])); rbins <- seq(min(rhst$breaks),max(rhst$breaks),bin.size); qbins <- seq(min(qhst$breaks),max(qhst$breaks),bin.size); hist(r.vec[-1], xlim=rng, ylim=c(0, h), breaks=rbins, border=F, ylab="Frequency", xlab= 'Permutations', col=adjustcolor("lightblue", alpha=0.6), main=""); hist(q.vec[-1], add=TRUE,breaks=qbins, border=F, col=adjustcolor("mistyrose", alpha=0.6)); arrows(r.vec[1], h/3, r.vec[1], 0, length=0.1,angle=30,lwd=2); text(r.vec[1], h/2.5, paste('Observed \n R2Y:', r.vec[1]), xpd=TRUE); arrows(q.vec[1], h/2, q.vec[1], 0, length=0.1,angle=30,lwd=2); text(q.vec[1], h/1.8, paste('Observed \n Q2:', q.vec[1]), xpd=TRUE); legend(1, h/3, legend = c("Perm R2Y", "Perm Q2"), pch=15, col=c("lightblue", "mistyrose"), xpd=T, bty="n"); #dev.off(); better.rhits <- sum(r.vec[-1]>=r.vec[1]); if(better.rhits == 0) { pr <- paste("p < ", 1/num, " (", better.rhits, "/", num, ")", sep=""); }else{ p <- better.rhits/num; pr <- paste("p = ", signif(p, digits=5), " (", better.rhits, "/", num, ")", sep=""); } better.qhits <- sum(q.vec[-1]>=q.vec[1]); if(better.qhits == 0) { pq <- paste("p < ", 1/num, " (", better.qhits, "/", num, ")", sep=""); }else{ p <- better.qhits/num; pq <- paste("p = ", signif(p, digits=5), " (", better.qhits, "/", num, ")", sep=""); } msg <- paste0("Empirical p-values R2Y: ", pr, " and Q2: ", pq) return(msg); } ############################################################################################################# ############################################################################################################# ############################################################################################################# ############################################################################################################# ############################################################################################################ ######################################################### ## R script for MetaboAnalyst ## Description: perform RandomForest and SVM ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### ####################################### ########### Random Forest ############# ####################################### # random forests RF.Anal<-function(treeNum=500, tryNum=10){ suppressMessages(require(randomForest)); rf_out<-randomForest(dataSet$norm, dataSet$cls, ntree = treeNum, mtry = tryNum, importance = TRUE, proximity = TRUE); # set up named sig table for display impmat<-rf_out$importance; impmat<-impmat[rev(order(impmat[,"MeanDecreaseAccuracy"])),] sigmat<-impmat[,"MeanDecreaseAccuracy", drop=F]; sigmat<-signif(sigmat, 5); write.csv(sigmat,file="randomforests_sigfeatures.csv"); analSet$rf<<-rf_out; analSet$rf.sigmat<<-sigmat; } # plot variable importance ranked by MeanDecreaseAccuracy PlotRF.Classify<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 8; imgSet$rf.cls<<-imgName; }else{ w <- width; } h <- w*5/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); #par(mfrow=c(2,1)); par(mar=c(4,4,3,2)); cols <- rainbow(length(levels(dataSet$cls))+1); plot(analSet$rf, main="Random Forest classification", col=cols); legend("topright", legend = c("Overall", levels(dataSet$cls)), lty=2, lwd=1, col=cols); #PlotConfusion(analSet$rf$confusion); #dev.off(); } # plot variable importance ranked by MeanDecreaseAccuracy PlotRF.VIP<-function(imgName, format="png", dpi=72, width=NA){ vip.score <- rev(sort(analSet$rf$importance[,"MeanDecreaseAccuracy"])); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$rf.imp<<-imgName; }else{ w <- width; } h <- w*7/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); PlotImpVar(vip.score,"MeanDecreaseAccuracy"); #dev.off(); } PlotRF.Outlier<-function(imgName, format="png", dpi=72, width=NA){ cols <- GetColorSchema(); uniq.cols <- unique(cols); legend.nm <- unique(as.character(dataSet$cls)); dist.res <- outlier(analSet$rf); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$rf.outlier<<-imgName; }else{ w <- width; } h <- w*7/9; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); layout(matrix(c(1,2), 1, 2, byrow = TRUE), width=c(4,1)); op<-par(mar=c(5,5,4,0)); plot(dist.res, type="h", col=cols, xlab="Samples", xaxt="n", ylab="Outlying Measures", bty="n"); # add sample names to top 5 rankres <- rank(-abs(dist.res), ties.method="random"); inx.x <- which(rankres < 6); inx.y <- dist.res[inx.x]; nms <- names(dist.res)[inx.x]; text(inx.x, inx.y, nms, pos=ifelse(inx.y >= 0, 3, 1), xpd=T) op<-par(mar=c(5,0,4,1)); plot.new(); plot.window(c(0,1), c(0,1)); legend("center", legend =legend.nm, pch=15, col=uniq.cols); #dev.off(); } # get the OOB error for the last signif GetRFOOB<-function(){ errors = analSet$rf$err.rate; nrow = dim(errors)[1]; signif(errors[nrow, 1],3); } GetSigTable.RF<-function(){ GetSigTable(analSet$rf.sigmat, "Random Forest"); } # significance measure, double[][] GetRFSigMat<-function(){ return(CleanNumber(analSet$rf.sigmat)) } GetRFSigRowNames<-function(){ rownames(analSet$rf.sigmat); } GetRFSigColNames<-function(){ colnames(analSet$rf.sigmat); } GetRFConf.Table<-function(){ print(xtable(analSet$rf$confusion, caption="Random Forest Classification Performance"), size="\\scriptsize"); } # return double[][] confusion matrix GetRFConfMat<-function(){ signif(analSet$rf$confusion,3); } GetRFConfRowNames<-function(){ rownames(analSet$rf$confusion); } GetRFConfColNames<-function(){ colnames(analSet$rf$confusion); } ####################################### ########### R-SVM ##################### ####################################### # recursive SVM for feature selection and classification RSVM.Anal<-function(cvType){ ladder = CreateLadder(ncol(dataSet$norm)); svm.out <- RSVM(dataSet$norm, dataSet$cls, ladder, CVtype=cvType); # calculate important features ERInd <- max( which(svm.out$Error == min(svm.out$Error)) ) MinLevel <- svm.out$ladder[ERInd] FreqVec <- svm.out$SelFreq[, ERInd] SelInd <- which(rank(FreqVec) >= (svm.out$ladder[1]-MinLevel)); FreqInd<-svm.out$SelFreq[SelInd, ERInd] names(FreqInd)<-names(dataSet$norm)[SelInd]; #create a sig table for display sig.var<- rev(sort(FreqInd)); sig.var<-as.matrix(sig.var); # 1-column matrix colnames(sig.var)<-"Freqency"; write.csv(sig.var,file="svm_sigfeatures.csv"); # add sorted features frequencies as importance indicator svm.out<-append(svm.out, list(sig.mat=sig.var, best.inx=ERInd)); analSet$svm<<-svm.out; } # Plot plsda classification performance using different components PlotRSVM.Classification<-function(imgName, format="png", dpi=72, width=NA){ res<-analSet$svm$Error; edge<-(max(res)-min(res))/100; # expand y uplimit for text imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$svm.class<<-imgName; }else{ w <- width; } h <- w*6/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); plot(res,type='l',xlab='Number of variables (levels)',ylab='Error Rate', ylim = c(min(res)-5*edge, max(res)+18*edge), axes=F, main="Recursive SVM classification") text(res,labels =paste(100*round(res,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T) points(res, col=ifelse(1:length(res)==analSet$svm$best.inx,"red","blue")); axis(2); axis(1, 1:length(res), names(res)); #dev.off(); } # if too many, plot top 15 PlotRSVM.Cmpd<-function(imgName, format="png", dpi=72, width=NA){ sigs<-analSet$svm$sig.mat; data<-sigs[,1]; imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$svm<<-imgName; }else{ w <- width; } h <- w*7/8; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); PlotImpVar(data,"Frequency"); #dev.off(); } GetSigTable.SVM<-function(){ GetSigTable(analSet$svm$sig.mat, "Recursive SVM"); } # significance measure, double[][] GetSVMSigMat<-function(){ return(CleanNumber(analSet$svm$sig.mat)); } GetSVMSigRowNames<-function(){ rownames(analSet$svm$sig.mat); } GetSVMSigColNames<-function(){ colnames(analSet$svm$sig.mat); } ### R-code for R-SVM ### use leave-one-out / Nfold or bootstrape to permute data for external CV ### build SVM model and use mean-balanced weight to sort genes on training set ### and recursive elimination of least important genes ### author: Dr. Xin Lu, Research Scientist ### Biostatistics Department, Harvard School of Public Health ## create a decreasing ladder for recursive feature elimination CreateLadder <- function(Ntotal, Nmin=5 ){ x <- vector() x[1] <- Ntotal # note SVM is very computationally intensive, large step first # first descend with 0.5 -> 50 var left # then descend with 0.6 -> 25 var left # then desend with 0.75 -> 5 var for( i in 1:100 ){ if(x[i]>200){ pRatio = 0.4 }else if(x[i]>50){ pRatio = 0.5 }else if(x[i]>25){ pRatio = 0.6 }else{ pRatio = 0.75 } pp <- round(x[i] * pRatio) if( pp == x[i] ){ pp <- pp-1 } if( pp >= Nmin ) { x[i+1] <- pp } else{ break } } x } ## R-SVM core code ## input: ## x: row matrix of data ## y: class label: 1 / -1 for 2 classes ## CVtype: ## integer: N fold CV ## "LOO": leave-one-out CV ## "bootstrape": bootstrape CV ## CVnum: number of CVs ## LOO: defined as sample size ## Nfold and bootstrape: user defined, default as sample size ## output: a named list ## Error: a vector of CV error on each level ## SelFreq: a matrix for the frequency of each gene being selected in each level ## with each column corresponds to a level of selection ## and each row for a gene ## The top important gene in each level are those high-freqent ones RSVM <- function(x, y, ladder, CVtype, CVnum=0 ){ suppressMessages(require(e1071)); ## check if y is binary response Ytype <- names(table(y)) if( length(Ytype) != 2) { print("ERROR!! RSVM can only deal with 2-class problem") return(0) } ## class mean m1 <- apply(x[ which(y==Ytype[1]), ], 2, mean) m2 <- apply(x[ which(y==Ytype[2]), ], 2, mean) md <- m1-m2 yy <- vector( length=length(y)) yy[which(y==Ytype[1])] <- 1 yy[which(y==Ytype[2])] <- -1 y <- yy ## check ladder if( min(diff(ladder)) >= 0 ) { print("ERROR!! ladder must be monotonously decreasing") return(0); } if( ladder[1] != ncol(x) ) { ladder <- c(ncol(x), ladder) } nSample <- nrow(x) nGene <- ncol(x) SampInd <- seq(1, nSample) if( CVtype == "LOO" ) { CVnum <- nSample } else { if( CVnum == 0 ) { CVnum <- nSample } } ## vector for test error and number of tests ErrVec <- vector( length=length(ladder)) names(ErrVec) <- as.character(ladder); nTests <- 0 SelFreq <- matrix( 0, nrow=nGene, ncol=length(ladder)) colnames(SelFreq) <- paste("Level", ladder); ## for each CV for( i in 1:CVnum ) { ## split data if( CVtype == "LOO" ) { TestInd <- i TrainInd <- SampInd[ -TestInd] } else { if( CVtype == "bootstrape" ) { TrainInd <- sample(SampInd, nSample, replace=T); TestInd <- SampInd[ which(!(SampInd %in% TrainInd ))]; } else { ## Nfold TrainInd <- sample(SampInd, nSample*(CVtype-1)/CVtype); TestInd <- SampInd[ which(!(SampInd %in% TrainInd ))]; } } nTests <- nTests + length(TestInd) ## in each level, train a SVM model and record test error xTrain <- x[TrainInd, ] yTrain <- y[TrainInd] xTest <- x[TestInd,] yTest <- y[TestInd] ## index of the genes used in the SelInd <- seq(1, nGene) for( gLevel in 1:length(ladder) ) { ## record the genes selected in this ladder SelFreq[SelInd, gLevel] <- SelFreq[SelInd, gLevel] +1 ## train SVM model and test error ################################################################################### ## note the scale is changed to T or it never returns sometime for unscaled data ### ## note: the classification performance is idenpendent of about scale is T/F ##### ## for "LOO", the test data should be as.data.frame, matrxi will trigger error ##### ################################################################################### svmres <- svm(xTrain[, SelInd], yTrain, scale=T, type="C-classification", kernel="linear" ) if( CVtype == "LOO" ){ svmpred <- predict(svmres, as.data.frame(xTest[SelInd], nrow=1) ) }else{ svmpred <- predict(svmres, xTest[, SelInd] ) } ErrVec[gLevel] <- ErrVec[gLevel] + sum(svmpred != yTest ) ## weight vector W <- t(svmres$coefs*yTrain[svmres$index]) %*% svmres$SV * md[SelInd] rkW <- rank(W) if( gLevel < length(ladder) ){ SelInd <- SelInd[which(rkW > (ladder[gLevel] - ladder[gLevel+1]))] } } } ret <- list(ladder=ladder, Error=ErrVec/nTests, SelFreq=SelFreq); ret; } PlotConfusion <- function(clsConf){ prior(clsConf) <- 100 # The above rescales the confusion matrix such that columns sum to 100. opar <- par(mar=c(5.1, 6.1, 2, 2)) x <- x.orig <- unclass(clsConf) x <- log(x + 0.5) * 2.33 x[x < 0] <- NA x[x > 10] <- 10 diag(x) <- -diag(x) image(1:ncol(x), 1:ncol(x), -(x[, nrow(x):1]), xlab='Actual', ylab='', col=colorRampPalette(c(hsv(h = 0, s = 0.9, v = 0.9, alpha = 1), hsv(h = 0, s = 0, v = 0.9, alpha = 1), hsv(h = 2/6, s = 0.9, v = 0.9, alpha = 1)))(41), xaxt='n', yaxt='n', zlim=c(-10, 10)) axis(1, at=1:ncol(x), labels=colnames(x), cex.axis=0.8) axis(2, at=ncol(x):1, labels=colnames(x), las=1, cex.axis=0.8) title(ylab='Predicted', line=4.5) abline(h = 0:ncol(x) + 0.5, col = 'gray') abline(v = 0:ncol(x) + 0.5, col = 'gray') text(1:6, rep(6:1, each=6), labels = sub('^0$', '', round(c(x.orig), 0))) box(lwd=2) par(opar) # reset par } ################################################################################################ ################################################################################################ ################################################################################################ ################################################################################################ ################################################################################################ ######################################################### ## R script for MetaboAnalyst ## Description: perform correlation analysis ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### ####################################### ## Pattern hunter ########################################## # Run template on all the high region effect genes template.match <- function(x, template, dist.name) { k<-cor.test(x,template, method=dist.name); c(k$estimate, k$stat, k$p.value) } Match.Pattern<-function(dist.name="pearson", pattern=NULL){ if(is.null(pattern)){ pattern <- paste(1:length(levels(dataSet$cls)), collapse="-"); } templ <- as.numeric(ClearStrings(strsplit(pattern, "-")[[1]])); if(all(templ==templ[1])){ AddErrMsg("Cannot calculate correlation on constant values!"); return(0); } new.template <- vector(mode="numeric", length=length(dataSet$cls)) # expand to match each levels in the dataSet$cls all.lvls <- levels(dataSet$cls); if(length(templ)!=length(all.lvls)){ AddErrMsg("Wrong template - must the same length as the group number!"); return(0); } for(i in 1:length(templ)){ hit.inx <- dataSet$cls == all.lvls[i] new.template[hit.inx] = templ[i]; } cbtempl.results <- apply(dataSet$norm, 2, template.match, new.template, dist.name); cor.res<-t(cbtempl.results); fdr.col <- p.adjust(cor.res[,3], "fdr"); cor.res <- cbind(cor.res, fdr.col); colnames(cor.res)<-c("correlation", "t-stat", "p-value", "FDR"); ord.inx<-order(cor.res[,3]); sig.mat <- signif(cor.res[ord.inx,],5); fileName <- "correlation_pattern.csv"; write.csv(sig.mat,file=fileName); analSet$corr$sig.nm<<-fileName; analSet$corr$cor.mat<<-sig.mat; analSet$corr$pattern <<- pattern; return(1); } GenerateTemplates <- function(){ level.len <- length(levels(dataSet$cls)); # only specify 4: increasing, decreasing, mid high, mid low, constant incs <- 1:level.len; desc <- level.len:1; if(level.len > 2){ # use ceiling, so that the peak will be right for even length mid.pos <- ceiling((level.len+1)/2); mid.high <- c(1:mid.pos, seq(mid.pos-1,by=-1,length.out=level.len-mid.pos)); mid.low <- c(mid.pos:1, seq(2, length.out=level.len-mid.pos)); res <- rbind(incs, desc, mid.high, mid.low); # add the constant one }else{ res <- rbind(incs, desc); } # turn into string res <- apply(res, 1, paste, collapse="-"); # add the ledgends res <- c(paste(levels(dataSet$cls), collapse="-"), res); return (res); } # calculate correlation of all other feature to a given feature name FeatureCorrelation<-function(dist.name, varName){ cbtempl.results <- apply(dataSet$norm, 2, template.match, dataSet$norm[,varName], dist.name); cor.res<-t(cbtempl.results); fdr.col <- p.adjust(cor.res[,3], "fdr"); cor.res <- cbind(cor.res, fdr.col); colnames(cor.res)<-c("correlation", "t-stat", "p-value", "FDR"); ord.inx<-order(cor.res[,3]) sig.mat <-signif(cor.res[ord.inx,],5); fileName <- "correlation_feature.csv"; write.csv(sig.mat,file=fileName); analSet$corr$sig.nm<<-fileName; analSet$corr$cor.mat<<-sig.mat; analSet$corr$pattern<<-varName; return(1); } PlotCorr <- function(imgName, format="png", dpi=72, width=NA){ cor.res <- analSet$corr$cor.mat; pattern <- analSet$corr$pattern; title <- paste(GetVariableLabel(), "correlated with the", pattern); if(nrow(cor.res) > 25){ # first get most signficant ones (p value) ord.inx<-order(cor.res[,3]); cor.res <- cor.res[ord.inx, ]; cor.res <- cor.res[1:25, ]; # then order by their direction (correlation) ord.inx<-order(cor.res[,1]); if(sum(cor.res[,1] > 0) == 0){ # all negative correlation ord.inx <- rev(ord.inx); } cor.res <- cor.res[ord.inx, ]; title <- paste("Top 25", tolower(GetVariableLabel()), "correlated with the", pattern); } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- h <- 7.2; }else if(width == 0){ w <- 7.2; imgSet$corr<<-imgName; }else{ w <- h <- width; } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,6,4,3)) rownames(cor.res)<-substr(rownames(cor.res), 1, 18); cols <- ifelse(cor.res[,1] >0, "mistyrose","lightblue"); dotchart(cor.res[,1], pch="", xlim=c(-1,1), xlab="Correlation coefficients", main=title); rownames(cor.res) <- NULL; barplot(cor.res[,1], space=c(0.5, rep(0, nrow(cor.res)-1)), xlim=c(-1,1), xaxt="n", col = cols, add=T,horiz=T); #dev.off(); } GetCorrSigFileName <- function(){ analSet$corr$sig.nm; } GetCorSigMat<-function(){ as.matrix(CleanNumber(analSet$corr$cor.mat)); } GetCorSigRowNames<-function(){ rownames(analSet$corr$cor.mat); } GetCorSigColNames<-function(){ colnames(analSet$corr$cor.mat); } GetSigTable.Corr<-function(){ GetSigTable(analSet$corr$cor.mat, "Pattern search using correlation analysis"); } PlotCorrHeatMap<-function(imgName, format="png", dpi=72, width=NA, cor.method, colors, viewOpt, fix.col, no.clst, top, topNum){ main <- xlab <- ylab <- NULL; data <- dataSet$norm; if(ncol(data) > 1000){ filter.val <- apply(data.matrix(data), 2, IQR, na.rm=T); rk <- rank(-filter.val, ties.method='random'); data <- as.data.frame(data[,rk <=1000]); print("Data is reduced to 1000 vars .."); } colnames(data)<-substr(colnames(data), 1, 18); corr.mat<-cor(data, method=cor.method); # use total abs(correlation) to select if(top){ cor.sum <- apply(abs(corr.mat), 1, sum); cor.rk <- rank(-cor.sum); var.sel <- cor.rk <= topNum; corr.mat <- corr.mat[var.sel, var.sel]; } # set up parameter for heatmap suppressMessages(require(RColorBrewer)); suppressMessages(require(gplots)); if(colors=="gbr"){ colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256); }else if(colors == "heat"){ colors <- heat.colors(256); }else if(colors == "topo"){ colors <- topo.colors(256); }else if(colors == "gray"){ colors <- colorRampPalette(c("grey90", "grey10"))(256); }else{ colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256)); } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(viewOpt == "overview"){ if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$heatmap<<-imgName; }else{ w <- 7.2; } h <- w; }else{ if(ncol(corr.mat) > 50){ myH <- ncol(corr.mat)*12 + 40; }else if(ncol(corr.mat) > 20){ myH <- ncol(corr.mat)*12 + 60; }else{ myH <- ncol(corr.mat)*12 + 120; } h <- round(myH/72,2); if(is.na(width)){ w <- h; }else if(width == 0){ w <- h <- 7.2; imgSet$corr.heatmap<<-imgName; }else{ w <- h <- 7.2; } } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(no.clst){ rowv=FALSE; colv=FALSE; dendro= "none"; }else{ rowv=TRUE; colv=TRUE; dendro= "both"; } require(pheatmap); if(fix.col){ breaks <- seq(from = -1, to = 1, length = 257); pheatmap(corr.mat, fontsize=8, fontsize_row=8, cluster_rows = colv, cluster_cols = rowv, color = colors, breaks = breaks ); }else{ pheatmap(corr.mat, fontsize=8, fontsize_row=8, cluster_rows = colv, cluster_cols = rowv, color = colors ); } #dev.off(); write.csv(signif(corr.mat,5), file="correlation_table.csv") } ############################################################################################# ############################################################################################# ############################################################################################# ############################################################################################# #################################################################### ## R script for MetaboAnalyst ## Description: perform Dendrogram, Heatmap, Kmeans & SOM analysis ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### #################################### ########### Dendrogram ############## ##################################### PlotHCTree<-function(imgName, format="png", dpi=72, width=NA, smplDist, clstDist){ # set up data set hc.dat<-as.matrix(dataSet$norm); colnames(hc.dat)<-substr(colnames(hc.dat), 1, 18) # some names are too long # set up distance matrix if(smplDist == 'euclidean'){ dist.mat<-dist(hc.dat, method = smplDist); }else{ dist.mat<-dist(1-cor(t(hc.dat), method = smplDist)); } # record the paramters analSet$tree<<-list(dist.par=smplDist, clust.par=clstDist); # build the tree hc_tree<-hclust(dist.mat, method=clstDist); # plot the tree imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- minH <- 630; myH <- nrow(hc.dat)*10 + 150; if(myH < minH){ myH <- minH; } w <- round(w/72,2); h <- round(myH/72,2); }else if(width == 0){ w <- h <- 7.2; imgSet$tree<<-imgName; }else{ w <- h <- 7.2; } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(cex=0.8, mar=c(4,2,2,8)); if(dataSet$cls.type == "disc"){ clusDendro<-as.dendrogram(hc_tree); cols <- GetColorSchema(); names(cols) <- rownames(hc.dat); labelColors <- cols[hc_tree$order]; colLab <- function(n){ if(is.leaf(n)) { a <- attributes(n) labCol <- labelColors[a$label]; attr(n, "nodePar") <- if(is.list(a$nodePar)) c(a$nodePar, lab.col = labCol,pch=NA) else list(lab.col = labCol,pch=NA) } n } clusDendro<-dendrapply(clusDendro, colLab) plot(clusDendro,horiz=T,axes=T); par(cex=1); legend.nm <- as.character(dataSet$cls); legend("topleft", legend = unique(legend.nm), pch=15, col=unique(cols), bty = "n"); }else{ plot(as.dendrogram(hc_tree), hang=-1, main=paste("Cluster with", clstDist, "method"), xlab=NULL, sub=NULL, horiz=TRUE); } #dev.off(); } # inx has to be 1 or 2 GetClassLabel<-function(inx){ levels(dataSet$cls)[inx] } ############################ ########### SOM ############# ############################# # SOM analysis SOM.Anal<-function(x.dim, y.dim, initMethod, neigb = 'gaussian'){ require(som); analSet$som<<-som(as.matrix(dataSet$norm), xdim=x.dim, ydim=y.dim, init=initMethod, neigh=neigb); } # get members for given cluster index, return a character string GetSOMClusterMembers<-function(i, j){ clust<-analSet$som$visual; xTrue<-clust$x == i; yTrue<-clust$y == j; hit.inx <- xTrue & yTrue; all.cols <- GetColorSchema(); paste("", rownames(dataSet$norm)[hit.inx], "",collapse =", "); } GetAllSOMClusterMembers<-function(){ clust<-analSet$som$visual; xdim<-analSet$som$xdim; ydim<-analSet$som$ydim; clust.df = data.frame(); rowNameVec = c(); i = 0; while(i < xdim){ j = 0; while(j < ydim){ xTrue<-clust$x == i; yTrue<-clust$y == j; if(i==0 & j==0){ # bug in R, the first one need to be different clust.df <- rbind(paste(rownames(dataSet$norm)[xTrue & yTrue], collapse = " ")); rowNameVec <- c(paste("Cluster(", i, ",", j,")")); }else{ clust.df <- rbind(clust.df, paste(rownames(dataSet$norm)[xTrue & yTrue], collapse=" ")); rowNameVec <- c(rowNameVec, paste("Cluster(", i, ",", j,")")); } j = j+1; } i = i+1; } row.names(clust.df)<- rowNameVec; colnames(clust.df)<-"Samples in each cluster"; print(xtable(clust.df, align="l|p{8cm}", caption="Clustering result using SOM"),caption.placement="top", size="\\scriptsize"); } # plot SOM map for less than 20 clusters PlotSOM <- function(imgName, format="png", dpi=72, width=NA){ xdim<-analSet$som$xdim; ydim<-analSet$som$ydim; total<-xdim*ydim; if(total>20) { return();} ylabel<-GetValueLabel(); clust<-analSet$som$visual; imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7; imgSet$som<<-imgName; }else{ w <- width; } h <- w*8/9; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mfrow = GetXYCluster(total), mar=c(5,4,2,2)); for (i in 0:(xdim-1)) { xTrue<-clust$x == i; for (j in 0:(ydim-1)) { yTrue<-clust$y == j; sel.inx<-xTrue & yTrue; # selected row if(sum(sel.inx)>0){ # some cluster may not contain any member matplot(t(dataSet$norm[sel.inx, ]), type="l", col='grey', axes=F, ylab=ylabel, main=paste("Cluster(", i, ",", j,")", ", n=", sum(sel.inx), sep="")) lines(apply(dataSet$norm[sel.inx, ], 2, median), type="l", col='blue', lwd=1); }else{ # plot a dummy plot(t(dataSet$norm[1, ]), type="n", axes=F, ylab=ylabel, main=paste("Cluster(", i, ",", j,")",", n=", sum(sel.inx),sep="")) } axis(2); axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2); } } #dev.off(); } ################################## ########### K-means ############## ################################### # functions for k-means analysis Kmeans.Anal<-function(clust.num){ analSet$kmeans<<-kmeans (dataSet$norm, clust.num, nstart=100); } PlotKmeans<-function(imgName, format="png", dpi=72, width=NA){ clust.num <- max(analSet$kmeans$cluster); if(clust.num>20) return(); # calculate arrangement of panel ylabel<-GetValueLabel(); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7; imgSet$kmeans<<-imgName; }else{ w <- width; } h <- w*8/9; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mfrow = GetXYCluster(clust.num), mar=c(5,4,2,2)); for (loop in 1:clust.num) { matplot(t(dataSet$norm[analSet$kmeans$cluster==loop,]), type="l", col='grey', ylab=ylabel, axes=F, main=paste("Cluster ",loop, ", n=", analSet$kmeans$size[loop], sep="")) lines(apply(dataSet$norm[analSet$kmeans$cluster==loop,], 2, median), type="l", col='blue', lwd=1); axis(2); axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2); } #dev.off(); } # get cluster member for give index # add HTML color to the names based on its group membership GetKMClusterMembers<-function(i){ all.cols <- GetColorSchema(); hit.inx <- analSet$kmeans$cluster== i; paste("", rownames(dataSet$norm)[hit.inx], "",collapse =", "); # paste(all.cols[hit.inx], rownames(dataSet$norm)[hit.inx], collapse =", "); } GetAllKMClusterMembers<-function(){ clust.df = data.frame(); rowNameVec = c(); i = 1; clust.num<-max(analSet$kmeans$cluster); while(i<=clust.num){ if(i==1){ clust.df <- rbind(paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " ")); }else{ clust.df <- rbind(clust.df,paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " ")); } rowNameVec <- c(rowNameVec, paste("Cluster(", i, ")")); i = i+1; } row.names(clust.df)<- rowNameVec; colnames(clust.df)<-"Samples in each cluster"; print(xtable(clust.df, align="l|p{8cm}", caption="Clustering result using K-means"), caption.placement="top", size="\\scriptsize"); } # plot a sub heatmap based on results from t-tests/ANOVA, VIP or randomforest PlotSubHeatMap <- function(imgName, format="png", dpi=72, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, method.nm, top.num, viewOpt, rowV=T, colV=T, border=T){ var.nms = colnames(dataSet$norm); if(top.num < length(var.nms)){ if(method.nm == 'tanova'){ if(GetGroupNumber() == 2){ if(is.null(analSet$tt)){ Ttests.Anal(); } var.nms <- names(sort(analSet$tt$p.value))[1:top.num]; }else{ if(is.null(analSet$aov)){ ANOVA.Anal(); } var.nms <- names(sort(analSet$aov$p.value))[1:top.num]; } }else if(method.nm == 'cor'){ if(is.null(analSet$cor.res)){ Match.Pattern(); } # re-order for pretty view cor.res <- analSet$cor.res; ord.inx<-order(cor.res[,3]); cor.res <- cor.res[ord.inx, ]; ord.inx<-order(cor.res[,1]); cor.res <- cor.res[ord.inx, ]; var.nms <- rownames(cor.res)[1:top.num]; }else if(method.nm == 'vip'){ if(is.null(analSet$plsda)){ PLSR.Anal(); PLSDA.CV(); } vip.vars <- analSet$plsda$vip.mat[,1];# use the first component var.nms <- names(rev(sort(vip.vars)))[1:top.num]; }else if(method.nm == 'rf'){ if(is.null(analSet$rf)){ RF.Anal(); } var.nms <- GetRFSigRowNames()[1:top.num]; } } var.inx <- match(var.nms, colnames(dataSet$norm)); PlotHeatMap(imgName, format, dpi, width, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt, rowV, colV, var.inx, border); } PlotHeatMap<-function(imgName, format="png", dpi=72, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt="detail", rowV=T, colV=T, var.inx=NA, border=T){ # record the paramters analSet$htmap<<-list(dist.par=smplDist, clust.par=clstDist); # set up data set if(dataOpt=="norm"){ my.data <- dataSet$norm; }else{ my.data <- dataSet$proc; } if(is.na(var.inx)){ hc.dat<-as.matrix(my.data); }else{ hc.dat<-as.matrix(my.data[,var.inx]); } colnames(hc.dat)<-substr(colnames(hc.dat),1,18) # some names are too long hc.cls <- dataSet$cls; # set up colors for heatmap if(palette=="gbr"){ colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256); }else if(palette == "heat"){ colors <- heat.colors(256); }else if(palette == "topo"){ colors <- topo.colors(256); }else if(palette == "gray"){ colors <- colorRampPalette(c("grey90", "grey10"), space="rgb")(256); }else{ suppressMessages(require(RColorBrewer)); colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256)); } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ minW <- 630; myW <- nrow(hc.dat)*18 + 150; if(myW < minW){ myW <- minW; } w <- round(myW/72,2); }else if(width == 0){ w <- 7.2; imgSet$heatmap<<-imgName; }else{ w <- 7.2; } myH <- ncol(hc.dat)*18 + 150; h <- round(myH/72,2); if(viewOpt == "overview"){ if(is.na(width)){ if(w > 9){ w <- 9; } }else if(width == 0){ if(w > 7.2){ w <- 7.2; } imgSet$heatmap<<-imgName; }else{ w <- 7.2; } if(h > w){ h <- w; } } if(border){ border.col<-"grey60"; }else{ border.col <- NA; } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(dataSet$cls.type == "disc"){ require(pheatmap); annotation <- data.frame(class= hc.cls); rownames(annotation) <-rownames(hc.dat); # set up color schema for samples if(palette== "gray"){ cols <- GetColorSchema(T); uniq.cols <- unique(cols); }else{ cols <- GetColorSchema(); uniq.cols <- unique(cols); } names(uniq.cols) <- unique(as.character(dataSet$cls)); ann_colors <- list(class= uniq.cols); pheatmap(t(hc.dat), annotation=annotation, fontsize=8, fontsize_row=8, clustering_distance_rows = smplDist, clustering_distance_cols = smplDist, clustering_method = clstDist, border_color = border.col, cluster_rows = colV, cluster_cols = rowV, scale = scaleOpt, color = colors, annotation_colors = ann_colors ); }else{ heatmap(hc.dat, Rowv = rowTree, Colv=colTree, col = colors, scale="column"); } #dev.off(); } PlotHeatMap2<-function(imgName, format="png", dpi=72, width=NA, smplDist='pearson', clstDist='average', colors="bwm", viewOpt="overview", hiRes=FALSE, sortInx = 1, useSigFeature, drawBorder, var.inx=1:ncol(dataSet$norm)){ if(sortInx == 1){ ordInx <- order(dataSet$facA, dataSet$facB); }else{ ordInx <- order(dataSet$facB, dataSet$facA); } new.facA <- dataSet$facA[ordInx]; new.facB <- dataSet$facB[ordInx]; # set up data set. note, need to transpose the data for two way plotting data <- dataSet$norm[ordInx, ]; # use features from ANOVA2 if(useSigFeature){ hits <- colnames(data) %in% rownames(analSet$aov2$sig.mat); data <- dataSet$norm[ordInx, hits]; } hc.dat<-as.matrix(data); colnames(hc.dat)<-substr(colnames(data), 1, 18) # some names are too long # set up parameter for heatmap suppressMessages(require(RColorBrewer)); if(colors=="gbr"){ colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256); }else if(colors == "heat"){ colors <- heat.colors(256); }else if(colors == "topo"){ colors <- topo.colors(256); }else if(colors == "gray"){ colors <- colorRampPalette(c("grey90", "grey10"), space="rgb")(256); }else{ colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256)); } if(drawBorder){ border.col<-"grey60"; }else{ border.col <- NA; } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(viewOpt == "overview"){ if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$heatmap<<-imgName; }else{ w <- 7.2; } h <- w; }else{ if(is.na(width)){ minW <- 650; myW <- nrow(hc.dat)*11 + 150; if(myW < minW){ myW <- minW; } w <- round(myW/72,2); }else if(width == 0){ w <- 7.2; imgSet$heatmap<<-imgName; }else{ w <- 7.2; } if(ncol(hc.dat) >100){ myH <- ncol(hc.dat)*12 + 120; }else if(ncol(hc.dat) > 50){ myH <- ncol(hc.dat)*12 + 60; }else{ myH <- ncol(hc.dat)*12 + 20; } h <- round(myH/72,2); } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); require(pheatmap); annotation <- data.frame(new.facB, new.facA); colnames(annotation) <- c(dataSet$facB.lbl, dataSet$facA.lbl); rownames(annotation) <-rownames(hc.dat); pheatmap(t(hc.dat), annotation=annotation, fontsize=8, fontsize_row=8, clustering_distance_rows = smplDist, clustering_distance_cols = smplDist, clustering_method = clstDist, border_color = border.col, cluster_rows = T, cluster_cols = F, scale = 'row', color = colors); #dev.off(); analSet$htmap2<<-list(dist.par=smplDist, clust.par=clstDist); } ############################################################################################ ############################################################################################ ############################################################################################ ############################################################################################ ################################################## ## R script for MetaboAnalyst ## Description: perform SAM and EBAM for feature selection ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### ################################## ########### SAM ################## ################################## # SAM analysis SAM.Anal<-function(method="d.stat", paired=FALSE, varequal=TRUE){ suppressMessages(require(siggenes)); mat<-t(dataSet$norm); # in sam the column is sample cl<-as.numeric(dataSet$cls); # change to 0 and 1 for class label if(dataSet$cls.num==2){ if(paired){ cl<-as.numeric(dataSet$pairs); } if(method == "d.stat"){ sam_out<-sam(mat, cl, method=d.stat, var.equal=varequal, R.fold=0, rand=123); }else{ sam_out<-sam(mat, cl, method=wilc.stat, R.fold=0,rand=123); } }else{ sam_out<-sam(mat, cl, rand=123); } analSet$sam<<-sam_out; } PlotSAM.FDR<-function(delta, imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10; }else if(width == 0){ w <- 7.2; imgSet$sam.fdr<<-imgName; } h <- w*3/5; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mfrow=c(1,2), mar=c(5,6,4,1)); mat.fdr<-analSet$sam@mat.fdr; plot(mat.fdr[,"Delta"],mat.fdr[,"FDR"],xlab='Delta',ylab=NA,type="b", col='blue', las=2); abline(v = delta, lty=3, col="magenta"); mtext("FDR", side=2, line=5); par(mar=c(5,5,4,2)) plot(mat.fdr[,"Delta"],mat.fdr[,"Called"],xlab='Delta',ylab="Significant feaure No.",type="b", col='blue', las=2); abline(v = delta, lty=3, col="magenta"); hit.inx <- mat.fdr[,"Delta"] <= delta; my.fdr <- signif(min(mat.fdr[,"FDR"][hit.inx]), 3); my.sigs <- min(mat.fdr[,"Called"][hit.inx]); mtext(paste("Delta:", delta, " FDR:", my.fdr, " Sig. cmpds:", my.sigs), line=-2, side = 3, outer = TRUE, font=2) #dev.off(); } SetSAMSigMat<-function(delta){ sam.sum<-summary(analSet$sam, delta); summary.mat<-sam.sum@mat.sig; sig.mat <-as.matrix(signif(summary.mat[,-c(1,6)],5)); write.csv(signif(sig.mat,5),file="sam_sigfeatures.csv"); analSet$sam.cmpds<<-sig.mat; analSet$sam.delta<<-delta; } GetSAMSigMat<-function(){ return(CleanNumber(analSet$sam.cmpds)); } GetSAMSigRowNames<-function(){ rownames(analSet$sam.cmpds); } GetSAMSigColNames<-function(){ colnames(analSet$sam.cmpds); } GetSigTable.SAM<-function(){ GetSigTable(analSet$sam.cmpds, "SAM"); } PlotSAM.Cmpd<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$sam.cmpd<<-imgName; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); plot(analSet$sam, analSet$sam.delta); #dev.off(); } # obtain a default delta with reasonable number # of sig features and decent FDR GetSuggestedSAMDelta<-function(){ mat.fdr<-analSet$sam@mat.fdr deltaVec <- mat.fdr[,"Delta"]; fdrVec <- mat.fdr[,"FDR"]; signumVec <- mat.fdr[,"Called"]; for(i in 1:length(deltaVec)){ delta = deltaVec[i]; fdr = fdrVec[i]; called = signumVec[i]; if(called > 0){ # at least 1 significant cmpd # check fdr, default threshold 0.01 # if too many significant compounds, tight up and vice versa if(fdr < 0.001){ return (delta); }else if(fdr < 0.01 & called < 100){ return (delta); }else if(fdr < 0.05 & called <50){ return (delta); }else if(fdr < 0.1 & called < 20){ return (delta); }else if(called < 10){ return (delta); } } } return (deltaVec[1]); # if no significant found, return the first one } ####################################### ############# EBAM #################### ####################################### # deteriming a0, only applicable for z.ebam (default) EBAM.A0.Init<-function(isPaired, isVarEq){ suppressMessages(require(siggenes)); if(isPaired){ cl.ebam<-as.numeric(dataSet$pairs); }else{ cl.ebam<-as.numeric(dataSet$cls)-1; # change to 0 and 1 for class label } conc.ebam<-t(dataSet$norm); # in sam column is sample, row is gene ebam_a0<-find.a0(conc.ebam, cl.ebam, var.equal=isVarEq, gene.names = names(dataSet$norm), rand=123); analSet$ebam.a0<<-ebam_a0; } # plot ebam a0 plot also return the analSet$ebam.a0 object so that the suggested a0 can be obtained PlotEBAM.A0<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$ebam.a0<<-imgName; } h <- 3*w/4; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); plot(analSet$ebam.a0); #dev.off(); } # note: if method is wilcoxon, the A0 and var equal will be ignored EBAM.Cmpd.Init<-function(method="z.ebam", A0=0, isPaired=FALSE, isVarEq=TRUE){ if(isPaired){ cl.ebam<-as.numeric(dataSet$pairs); }else{ cl.ebam<-as.numeric(dataSet$cls)-1; } conc.ebam<-t(dataSet$norm); # in sam column is sample, row is feature if(method=="z.ebam"){ ebam_out<-ebam(conc.ebam, cl.ebam, method=z.ebam, a0=A0, var.equal=isVarEq, fast=TRUE, gene.names = names(dataSet$norm), rand=123); }else{ ebam_out<-ebam(conc.ebam, cl.ebam, method=wilc.ebam, gene.names = names(dataSet$norm), rand=123); } analSet$ebam<<-ebam_out; } # return double matrix with 3 columns - z.value, posterior, local.fdr SetEBAMSigMat<-function(delta){ ebam.sum<-summary(analSet$ebam, delta); summary.mat<-ebam.sum@mat.sig; sig.mat <-as.matrix(signif(summary.mat[,-1],5)); write.csv(signif(sig.mat,5),file="ebam_sigfeatures.csv"); analSet$ebam.cmpds<<-sig.mat; analSet$ebam.delta<<-delta; } GetEBAMSigMat<-function(){ return(CleanNumber(analSet$ebam.cmpds)); } GetEBAMSigRowNames<-function(){ rownames(analSet$ebam.cmpds); } GetEBAMSigColNames<-function(){ colnames(analSet$ebam.cmpds); } GetSigTable.EBAM<-function(){ GetSigTable(analSet$ebam.cmpds, "EBAM"); } # plot ebam PlotEBAM.Cmpd<-function(imgName, format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- h <- 7; }else if(width == 0){ w <- h <- 7; imgSet$ebam.cmpd<<-imgName; }else{ w <- h <- width; } Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); plot(analSet$ebam, analSet$ebam.delta); #dev.off(); } ############################################################################################ ############################################################################################ ############################################################################################ ############################################################################################ ################################################## ## R script for MetaboAnalyst ## Description: some misceleneous tasks ## ## Author: Jeff Xia, jeff.xia@mcgill.ca ## McGill University, Canada ## ## License: GNU GPL (>= 2) ################################################### # linearly transforms a vector or matrix of numbers to a new range rescale<-function(x,newrange) { if(missing(x) | missing(newrange)) { usage.string<-paste("Usage: rescale(x,newrange)\n", "\twhere x is a numeric object and newrange is the new min and max\n", sep="",collapse="") stop(usage.string) } if(is.numeric(x) && is.numeric(newrange)) { xna<-is.na(x) if(all(xna)) return(x) if(any(xna)) xrange<-range(x[!xna]) else xrange<-range(x) # if x is constant, just return it if(xrange[1] == xrange[2]) return(x) mfac<-(newrange[2]-newrange[1])/(xrange[2]-xrange[1]) return(newrange[1]+(x-xrange[1])*mfac) } else { warning("Only numeric objects can be rescaled") return(x) } } # merge duplicated columns or rows by their mean # dim 1 => row, dim 2 => column MergeDuplicates <- function(data, dim=2){ if(is.null(dim(data))){ # a vector if(is.null(names(data))){ print("Cannot detect duplicate data without names!!!"); return(); } nm.cls <- as.factor(names(data)); uniq.len <- length(levels(nm.cls)); if(uniq.len == length(data)){ return(data); } new.data <- vector (mode="numeric",length=uniq.len); for(i in 1:uniq.len){ dup.inx <- nm.cls == levels(nm.cls)[i]; new.data[i] <- mean(data[dup.inx]); } names(new.data) <- levels(nm.cls); rem.len <- length(data) - length(new.data); }else{ if(dim == 1){ data <- t(data); } if(is.null(colnames(data))){ print("Cannot detect duplicate data without var names!!!"); return(); } nm.cls <- as.factor(colnames(data)); uniq.len <- length(levels(nm.cls)); if(uniq.len == ncol(data)){ if(dim == 1){ data <- t(data); } return(data); } new.data <- matrix (nrow=nrow(data), ncol=uniq.len); for(i in 1:uniq.len){ dup.inx <- which(nm.cls == levels(nm.cls)[i]); new.data[,i] <- apply(data[,dup.inx, drop=F], 1, mean); } rownames(new.data) <- rownames(data); colnames(new.data) <- levels(nm.cls); rem.len <- ncol(data) - ncol(new.data); if(dim == 1){ new.data <- t(new.data); } } print(paste(rem.len, "duplicates are merged to their average")); new.data; } # given a data with duplicates, dups is the one with duplicates RemoveDuplicates <- function(data, lvlOpt="mean", quiet=T){ all.nms <- rownames(data); colnms <- colnames(data); dup.inx <- duplicated(all.nms); dim.orig <- dim(data); data <- apply(data, 2, as.numeric); # force to be all numeric dim(data) <- dim.orig; # keep dimension (will lost when only one item) rownames(data) <- all.nms; colnames(data) <- colnms; if(sum(dup.inx) > 0){ uniq.nms <- all.nms[!dup.inx]; uniq.data <- data[!dup.inx,,drop=F]; dup.nms <- all.nms[dup.inx]; uniq.dupnms <- unique(dup.nms); uniq.duplen <- length(uniq.dupnms); for(i in 1:uniq.duplen){ nm <- uniq.dupnms[i]; hit.inx.all <- which(all.nms == nm); hit.inx.uniq <- which(uniq.nms == nm); # average the whole sub matrix if(lvlOpt == "mean"){ uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, mean, na.rm=T); }else if(lvlOpt == "median"){ uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, median, na.rm=T); }else if(lvlOpt == "max"){ uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, max, na.rm=T); }else{ # sum uniq.data[hit.inx.uniq, ]<- apply(data[hit.inx.all,,drop=F], 2, sum, na.rm=T); } } if(!quiet){ current.msg <<- paste(current.msg, paste("A total of ", sum(dup.inx), " of duplicates were replaced by their ", lvlOpt, ".", sep=""), collapse="\n"); } return(uniq.data); }else{ if(!quiet){ current.msg <<- paste(current.msg, "All IDs are unique.", collapse="\n"); } return(data); } } # from two column input text to data matrix (single column data frame) getDataFromTextInput <- function(txtInput, sep.type="space"){ lines <- unlist(strsplit(txtInput, "\r|\n|\r\n")[1]); if(substring(lines[1],1,1)=="#"){ lines <- lines[-1]; } # separated by tab if(sep.type=="tab"){ my.lists <- strsplit(lines, "\\t"); }else{ # from any space my.lists <- strsplit(lines, "\\s+"); } my.mat <- do.call(rbind, my.lists); if(dim(my.mat)[2] == 1){ # add 0 my.mat <- cbind(my.mat, rep(0, nrow(my.mat))); }else if(dim(my.mat)[2] > 2){ my.mat <- my.mat[,1:2]; current.msg <- "More than two columns found in the list. Only first two columns will be used. "; } rownames(my.mat) <- data.matrix(my.mat[,1]); my.mat <- my.mat[,-1, drop=F]; return(my.mat); } # use single core on the public server Perform.permutation <- function(perm.num, fun){ print(paste("performing", perm.num, "permutations ...")); #suppressMessages(require('multicore')); #core.num <- multicore:::detectCores(); #if(core.num > 1){ # use two CPUs only, otherwise, the server will be unresponsive for other users # perm.res <- mclapply(2:perm.num, fun, mc.cores =core.num-1); #}else{ # just regular perm.res <- lapply(2:perm.num,fun); #} perm.res; } `%fin%` <- function(x, table) { fmatch(x, table, nomatch = 0L) > 0L } # create semitransparant colors for a given class label CreateSemiTransColors <- function(cls){ # note, the first color (red) is for QC col.nms <- rainbow(length(levels(cls))); # convert to semi-transparent semi.nms <- ToSemiTransParent(col.nms); # now expand to the one-to-one match to cls element col.vec <- vector(mode="character", length=length(cls)); for (i in 1:length(levels(cls))){ lv <- levels(cls)[i]; col.vec[cls==lv] <- semi.nms[i]; } return(col.vec); } # convert rgb color i.e. "#00FF00FF" to semi transparent ToSemiTransParent <- function (col.nms, alpha=0.5){ rgb.mat <- t(col2rgb(col.nms)); rgb(rgb.mat/255, alpha=alpha); } # col.vec should already been created UpdateGraphSettings <- function(){ grpnms <- GetGroupNames(); names(colVec) <<- grpnms; names(shapeVec) <<- grpnms; } GetShapeSchema <- function(show.name, grey.scale){ if(exists("shapeVec") && all(shapeVec > 0)){ sps <- rep(0, length=length(dataSet$cls)); clsVec <- as.character(dataSet$cls) grpnms <- names(shapeVec); for(i in 1:length(grpnms)){ sps[clsVec == grpnms[i]] <- shapeVec[i]; } shapes <- sps; }else{ if(show.name | grey.scale){ shapes <- as.numeric(dataSet$cls)+1; }else{ shapes <- rep(19, length(dataSet$cls)); } } return(shapes); } GetColorSchema <- function(grayscale=F){ # test if total group number is over 9 grp.num <- length(levels(dataSet$cls)); if(grayscale){ dist.cols <- colorRampPalette(c("grey90", "grey30"))(grp.num); lvs <- levels(dataSet$cls); colors <- vector(mode="character", length=length(dataSet$cls)); for(i in 1:length(lvs)){ colors[dataSet$cls == lvs[i]] <- dist.cols[i]; } }else if(grp.num > 9){ pal12 = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A", "#FFFF99", "#B15928"); dist.cols <- colorRampPalette(pal12)(grp.num); lvs <- levels(dataSet$cls); colors <- vector(mode="character", length=length(dataSet$cls)); for(i in 1:length(lvs)){ colors[dataSet$cls == lvs[i]] <- dist.cols[i]; } }else{ if(exists("colVec") && !any(colVec =="#NA") ){ cols <- vector(mode="character", length=length(dataSet$cls)); clsVec <- as.character(dataSet$cls) grpnms <- names(colVec); for(i in 1:length(grpnms)){ cols[clsVec == grpnms[i]] <- colVec[i]; } colors <- cols; }else{ colors <- as.numeric(dataSet$cls)+1; } } return (colors); } # unzip the uploaded .zip files, remove the uploaded file, check for success UnzipUploadedFile<-function(inPath, outPath, rmFile=T){ # a<-unzip(inPath, exdir=outPath); a<-try(system(paste("unzip", "-o", inPath, "-d", outPath), intern=T)); if(class(a) == "try-error" | !length(a)>0){ AddErrMsg("Failed to unzip the uploaded files!"); AddErrMsg("Possible reason: file name contains space or special characters."); AddErrMsg("Use only alphabets and numbers, make sure there is no space in your file name."); AddErrMsg("For WinZip 12.x, use \"Legacy compression (Zip 2.0 compatible)\""); return (0); } if(rmFile){ RemoveFile(inPath); } return(1); } # clean data and remove -Inf, Inf, NA, negative and 0 CleanData <-function(bdata, removeNA=T, removeNeg=T){ if(sum(bdata==Inf)>0){ inx <- bdata == Inf; bdata[inx] <- NA; bdata[inx] <- max(bdata, na.rm=T)*2 } if(sum(bdata==-Inf)>0){ inx <- bdata == -Inf; bdata[inx] <- NA; bdata[inx] <- min(bdata, na.rm=T)/2 } if(removeNA){ if(sum(is.na(bdata))>0){ bdata[is.na(bdata)] <- min(bdata, na.rm=T)/2 } } if(removeNeg){ if(sum(bdata<=0) > 0){ inx <- bdata <= 0; bdata[inx] <- NA; bdata[inx] <- min(bdata, na.rm=T)/2 } } bdata; } # replace -Inf, Inf to 99999 and -99999 CleanNumber <-function(bdata){ if(sum(bdata==Inf)>0){ inx <- bdata == Inf; bdata[inx] <- NA; bdata[inx] <- 999999; } if(sum(bdata==-Inf)>0){ inx <- bdata == -Inf; bdata[inx] <- NA; bdata[inx] <- -999999; } bdata; } # remove file RemoveFolder<-function(folderName){ # a<-unzip(inPath, exdir=outPath); a<-system(paste("rm", "-r", folderName), intern=T); if(!length(a)>0){ AddErrMsg(paste("Could not remove file -", folderName)); return (0); } return(1); } # remove files RemoveFile<-function(fileName){ if(file.exists(fileName)){ file.remove(fileName); } } # clear the current folder and objects in memory ClearUserDir<-function(){ # remove physical files unlink(dir(), recursive=T); dataSet <<- list(); analSet <<- list(); imgSet <<- list(); gc(); # remove objects in the memory # rm(list=ls(envir=sys.frame(-1)),envir=sys.frame(-1)); } # utils to remove from # within, leading and trailing spaces ClearStrings<-function(query){ # kill multiple white space query <- gsub(" +"," ",query); # remove leading and trailing space query<- sub("^[[:space:]]*(.*?)[[:space:]]*$", "\\1", query, perl=TRUE); return (query); } # remove HTML tag # add escape for % PrepareLatex <- function(stringVec){ stringVec <- gsub("<(.|\n)*?>","",stringVec); stringVec <- gsub("%", "\\\\%", stringVec); stringVec; } # get last command from the Rhistory.R file GetCMD<-function(regexp){ # store all lines into a list object all.lines<-readLines("Rhistory.R"); all.matches<-grep(regexp, all.lines, value=T); if(length(all.matches)==0){ return(NULL); }else{ # only return the last command return(all.matches[length(all.matches)]); } } # determine value label for plotting GetValueLabel<-function(){ if(dataSet$type=="conc"){ return("Concentration"); }else { return("Intensity"); } } # determine variable label for plotting GetVariableLabel<-function(){ if(dataSet$type=="conc"){ return("Compounds"); }else if(dataSet$type=="specbin"){ return("Spectra Bins"); }else if(dataSet$type=="nmrpeak"){ return("Peaks (ppm)"); }else if(dataSet$type=="mspeak"){ if(dataSet$peakSet$ncol==2){ return("Peaks (mass)"); }else{ return("Peaks (mz/rt)"); } }else{ return("Peaks(mz/rt)"); } } # determine the number of rows and columns for a given total # number of plots (used by Kmeans and SOM plots) GetXYCluster<-function(total){ if(total>16){ ncol<-4; nrow<-5; }else if(total>12){ ncol<-4; nrow<-4; }else if(total>9){ ncol<-3; nrow<-4; }else if(total>6){ ncol<-3; nrow<-3; }else if(total>4){ ncol<-2; nrow<-3; }else{ ncol<-1; nrow<-total; } c(nrow, ncol); } ################################################### ### ====== utility classes for peak grouping=== ### ################################################### rectUnique <- function(m, order = seq(length = nrow(m)), xdiff = 0, ydiff = 0) { nr <- nrow(m) nc <- ncol(m) if (!is.double(m)) m <- as.double(m) .C("RectUnique", m, as.integer(order-1), nr, nc, as.double(xdiff), as.double(ydiff), logical(nrow(m)), DUP = FALSE, PACKAGE = "xcms")[[7]] } findEqualGreaterM <- function(x, values) { if (!is.double(x)) x <- as.double(x) if (!is.double(values)) values <- as.double(values) .C("FindEqualGreaterM", x, length(x), values, length(values), index = integer(length(values)), DUP = FALSE, PACKAGE = "xcms")$index + 1 } descendMin <- function(y, istart = which.max(y)) { if (!is.double(y)) y <- as.double(y) unlist(.C("DescendMin", y, length(y), as.integer(istart-1), ilower = integer(1), iupper = integer(1), DUP = FALSE, PACKAGE = "xcms")[4:5]) + 1 } # obtain a random subset of numbers from a total number GetRandomSubsetIndex<-function(total, sub.num = 50){ if(total < sub.num){ 1:total; }else{ sample(1:total, sub.num); } } Get.Accuracy <- function(cm) { sum(diag(cm)) / sum(cm); } # Get a subsets of data ranked by their p values from t tests GetTTSubsetIndex<-function(data = dataSet$norm, sub.num=50){ if(ncol(data) < sub.num){ 1:ncol(data); }else{ if(is.null(analSet$tt)){ Ttests.Anal(0.75); } all.lod <- -log10(analSet$tt$p.value); sub.inx <-order(all.lod, decreasing = T)[1:sub.num]; sel.inx <- 1:ncol(data) %in% sub.inx; sel.inx; } } # generate Latex table GetSigTable<-function(mat, method){ suppressMessages(require(xtable)); if(!isEmptyMatrix(mat)){ # test if empty cap<-"Important features identified by"; if(nrow(mat)>50){ smat<-as.matrix(mat[1:50,]); # only print top 50 if too many colnames(smat)<-colnames(mat); # make sure column names are also copied mat<-smat; cap<-"Top 50 features identified by"; } # change the rowname to first column col1<-rownames(mat); cname<-colnames(mat); cname<-c(GetVariableLabel(), cname); mat<-cbind(col1, mat); rownames(mat)<-NULL; colnames(mat)<-cname; print(xtable(mat, caption=paste(cap, method)), ,caption.placement="top", size="\\scriptsize"); }else{ print(paste("No significant features were found using the given threshold for", method)); } } # test if a sig table matrix is empty isEmptyMatrix<-function(mat){ if(is.null(mat) | length(mat)==0){ return(TRUE); } if(nrow(mat)==0 | ncol(mat)==0){ return(TRUE); } if(is.na(mat[1,1])){ return(TRUE); } return(FALSE); } # Compute BSS/WSS for each row of a matrix which may have NA # Columns have labels # x is a numeric vector, # cl is consecutive integers Get.bwss<-function(x, cl){ K <- max(cl) - min(cl) + 1 tvar <- var.na(x); tn <- sum(!is.na(x)); wvar <- wn <- numeric(K); for(i in (1:K)) { if(sum(cl == (i + min(cl) - 1)) == 1){ wvar[i] <- 0; wn[i] <- 1; } if(sum(cl == (i + min(cl) - 1)) > 1) { wvar[i] <- var.na(x[cl == (i + min(cl) - 1)]); wn[i] <- sum(!is.na(x[cl == (i + min(cl) - 1)])); } } WSS <- sum.na(wvar * (wn - 1)); TSS <- tvar * (tn - 1) (TSS - WSS)/WSS; } # Compute SSQ for each row of a matrix which may have NA # Columns have labels cl=consecutive integers # note: this is desgined for ASCA parition data # in which Within group (WSS) is # zero, so, we only need TSS Get.tss<-function(x, cl){ K <- max(cl) - min(cl) + 1 tvar <- apply(x, 1, var.na); tn <- apply(!is.na(x), 1, sum); wvar <- matrix(0, nrow(x), K); wn <- matrix(0, nrow(x), K); for(i in (1:K)) { if(sum(cl == (i + min(cl) - 1)) == 1){ wvar[, i] <- 0; wn[, i] <- 1; } if(sum(cl == (i + min(cl) - 1)) > 1) { wvar[, i] <- apply(x[, cl == (i + min(cl) - 1)], 1, var.na); wn[, i] <- apply(!is.na(x[, cl == (i + min(cl) - 1)]), 1, sum); } } WSS <- apply(wvar * (wn - 1), 1, sum.na) TSS <- tvar * (tn - 1) return(TSS); } sum.na <- function(x,...){ res <- NA tmp <- !(is.na(x) | is.infinite(x)) if(sum(tmp) > 0) res <- sum(x[tmp]) res } var.na <- function(x){ res <- NA tmp <- !(is.na(x) | is.infinite(x)) if(sum(tmp) > 1){ res <- var(x[tmp]) } res } ####################################################### ## calculate Fisher's Least Significant Difference (LSD) ## adapted from the 'agricolae' package ############################################## LSD.test <- function (y, trt, alpha = 0.05){ clase<-c("aov","lm") name.y <- paste(deparse(substitute(y))) name.t <- paste(deparse(substitute(trt))) if("aov"%in%class(y) | "lm"%in%class(y)){ A<-y$model DFerror<-df.residual(y) MSerror<-deviance(y)/DFerror y<-A[,1] ipch<-pmatch(trt,names(A)) name.t <-names(A)[ipch] trt<-A[,ipch] name.y <- names(A)[1] } junto <- subset(data.frame(y, trt), is.na(y) == FALSE) means <- tapply.stat(junto[, 1], junto[, 2], stat="mean") #change sds <- tapply.stat(junto[, 1], junto[, 2], stat="sd") #change nn <- tapply.stat(junto[, 1], junto[, 2], stat="length") #change std.err <- sds[, 2]/sqrt(nn[, 2]) Tprob <- qt(1 - alpha/2, DFerror) LCL <- means[,2]-Tprob*std.err UCL <- means[,2]+Tprob*std.err means <- data.frame(means, std.err, replication = nn[, 2], LCL, UCL) names(means)[1:2] <- c(name.t, name.y) #row.names(means) <- means[, 1] ntr <- nrow(means) nk <- choose(ntr, 2) nr <- unique(nn[, 2]) comb <- combn(ntr, 2) nn <- ncol(comb) dif <- rep(0, nn) LCL1<-dif UCL1<-dif sig<-NULL pvalue <- rep(0, nn) for (k in 1:nn) { i <- comb[1, k] j <- comb[2, k] if (means[i, 2] < means[j, 2]){ comb[1, k]<-j comb[2, k]<-i } dif[k] <- abs(means[i, 2] - means[j, 2]) sdtdif <- sqrt(MSerror * (1/means[i, 4] + 1/means[j,4])) pvalue[k] <- 2 * (1 - pt(dif[k]/sdtdif, DFerror)); pvalue[k] <- round(pvalue[k],6); LCL1[k] <- dif[k] - Tprob*sdtdif UCL1[k] <- dif[k] + Tprob*sdtdif sig[k]<-" " if (pvalue[k] <= 0.001) sig[k]<-"***" else if (pvalue[k] <= 0.01) sig[k]<-"**" else if (pvalue[k] <= 0.05) sig[k]<-"*" else if (pvalue[k] <= 0.1) sig[k]<-"." } tr.i <- means[comb[1, ],1] tr.j <- means[comb[2, ],1] output<-data.frame("Difference" = dif, pvalue = pvalue,sig,LCL=LCL1,UCL=UCL1) rownames(output)<-paste(tr.i,tr.j,sep=" - "); output; } tapply.stat <-function (y, x, stat = "mean"){ cx<-deparse(substitute(x)) cy<-deparse(substitute(y)) x<-data.frame(c1=1,x) y<-data.frame(v1=1,y) nx<-ncol(x) ny<-ncol(y) namex <- names(x) namey <- names(y) if (nx==2) namex <- c("c1",cx) if (ny==2) namey <- c("v1",cy) namexy <- c(namex,namey) for(i in 1:nx) { x[,i]<-as.character(x[,i]) } z<-NULL for(i in 1:nx) { z<-paste(z,x[,i],sep="&") } w<-NULL for(i in 1:ny) { m <-tapply(y[,i],z,stat) m<-as.matrix(m) w<-cbind(w,m) } nw<-nrow(w) c<-rownames(w) v<-rep("",nw*nx) dim(v)<-c(nw,nx) for(i in 1:nw) { for(j in 1:nx) { v[i,j]<-strsplit(c[i],"&")[[1]][j+1] } } rownames(w)<-NULL junto<-data.frame(v[,-1],w) junto<-junto[,-nx] names(junto)<-namexy[c(-1,-(nx+1))] return(junto) } ######################################## #### Scatterplot3D #### adapted for better visualization ####################################### Plot3D <- function(x, y = NULL, z = NULL, color = par("col"), pch = NULL, main = NULL, sub = NULL, xlim = NULL, ylim = NULL, zlim = NULL, xlab = NULL, ylab = NULL, zlab = NULL, scale.y = 1, angle = 40, axis = TRUE, tick.marks = TRUE, label.tick.marks = TRUE, x.ticklabs = NULL, y.ticklabs = NULL, z.ticklabs = NULL, y.margin.add = 0, grid = TRUE, box = TRUE, lab = par("lab"), lab.z = mean(lab[1:2]), type = "p", highlight.3d = FALSE, mar = c(5, 3, 4, 3) + 0.1, col.axis = par("col.axis"), col.grid = "grey", col.lab = par("col.lab"), cex.symbols = par("cex"), cex.axis = 0.8 * par("cex.axis"), cex.lab = par("cex.lab"), font.axis = par("font.axis"), font.lab = par("font.lab"), lty.axis = par("lty"), lty.grid = 2, lty.hide = 1, lty.hplot = par("lty"), log = "", ...) # log not yet implemented { ## Uwe Ligges , ## http://www.statistik.tu-dortmund.de/~ligges ## ## For MANY ideas and improvements thanks to Martin Maechler!!! ## Parts of the help files are stolen from the standard plotting functions in R. mem.par <- par(mar = mar) x.scal <- y.scal <- z.scal <- 1 xlabel <- if (!missing(x)) deparse(substitute(x)) ylabel <- if (!missing(y)) deparse(substitute(y)) zlabel <- if (!missing(z)) deparse(substitute(z)) ## verification, init, ... if(highlight.3d && !missing(color)) warning("color is ignored when highlight.3d = TRUE") ## color as part of `x' (data.frame or list): if(!is.null(d <- dim(x)) && (length(d) == 2) && (d[2] >= 4)) color <- x[,4] else if(is.list(x) && !is.null(x$color)) color <- x$color ## convert 'anything' -> vector xyz <- xyz.coords(x=x, y=y, z=z, xlab=xlabel, ylab=ylabel, zlab=zlabel, log=log) if(is.null(xlab)) { xlab <- xyz$xlab; if(is.null(xlab)) xlab <- "" } if(is.null(ylab)) { ylab <- xyz$ylab; if(is.null(ylab)) ylab <- "" } if(is.null(zlab)) { zlab <- xyz$zlab; if(is.null(zlab)) zlab <- "" } if(length(color) == 1) color <- rep(color, length(xyz$x)) else if(length(color) != length(xyz$x)) stop("length(color) ", "must be equal length(x) or 1") angle <- (angle %% 360) / 90 yz.f <- scale.y * abs(if(angle < 1) angle else if(angle > 3) angle - 4 else 2 - angle) yx.f <- scale.y * (if(angle < 2) 1 - angle else angle - 3) if(angle > 2) { ## switch y and x axis to ensure righthand oriented coord. temp <- xyz$x; xyz$x <- xyz$y; xyz$y <- temp temp <- xlab; xlab <- ylab; ylab <- temp temp <- xlim; xlim <- ylim; ylim <- temp } angle.1 <- (1 < angle && angle < 2) || angle > 3 angle.2 <- 1 <= angle && angle <= 3 dat <- cbind(as.data.frame(xyz[c("x","y","z")]), col = color) ## xlim, ylim, zlim -- select the points inside the limits if(!is.null(xlim)) { xlim <- range(xlim) dat <- dat[ xlim[1] <= dat$x & dat$x <= xlim[2] , , drop = FALSE] } if(!is.null(ylim)) { ylim <- range(ylim) dat <- dat[ ylim[1] <= dat$y & dat$y <= ylim[2] , , drop = FALSE] } if(!is.null(zlim)) { zlim <- range(zlim) dat <- dat[ zlim[1] <= dat$z & dat$z <= zlim[2] , , drop = FALSE] } n <- nrow(dat) if(n < 1) stop("no data left within (x|y|z)lim") y.range <- range(dat$y[is.finite(dat$y)]) ### 3D-highlighting / colors / sort by y if(type == "p" || type == "h") { y.ord <- rev(order(dat$y)) dat <- dat[y.ord, ] if(length(pch) > 1) if(length(pch) != length(y.ord)) stop("length(pch) ", "must be equal length(x) or 1") else pch <- pch[y.ord] daty <- dat$y daty[!is.finite(daty)] <- mean(daty[is.finite(daty)]) if(highlight.3d && !(all(diff(daty) == 0))) dat$col <- rgb(seq(0, 1, length = n) * (y.range[2] - daty) / diff(y.range), g=0, b=0) } ### optim. axis scaling p.lab <- par("lab") ## Y y.range <- range(dat$y[is.finite(dat$y)], ylim) y.prty <- pretty(y.range, n = lab[2], min.n = max(1, min(.5 * lab[2], p.lab[2]))) y.scal <- round(diff(y.prty[1:2]), digits = 12) y.add <- min(y.prty) dat$y <- (dat$y - y.add) / y.scal y.max <- (max(y.prty) - y.add) / y.scal if(!is.null(ylim)) y.max <- max(y.max, ceiling((ylim[2] - y.add) / y.scal)) # if(angle > 2) dat$y <- y.max - dat$y ## turn y-values around ## X x.range <- range(dat$x[is.finite(dat$x)], xlim) x.prty <- pretty(x.range, n = lab[1], min.n = max(1, min(.5 * lab[1], p.lab[1]))) x.scal <- round(diff(x.prty[1:2]), digits = 12) dat$x <- dat$x / x.scal x.range <- range(x.prty) / x.scal x.max <- ceiling(x.range[2]) x.min <- floor(x.range[1]) if(!is.null(xlim)) { x.max <- max(x.max, ceiling(xlim[2] / x.scal)) x.min <- min(x.min, floor(xlim[1] / x.scal)) } x.range <- range(x.min, x.max) ## Z z.range <- range(dat$z[is.finite(dat$z)], zlim) z.prty <- pretty(z.range, n = lab.z, min.n = max(1, min(.5 * lab.z, p.lab[2]))) z.scal <- round(diff(z.prty[1:2]), digits = 12) dat$z <- dat$z / z.scal z.range <- range(z.prty) / z.scal z.max <- ceiling(z.range[2]) z.min <- floor(z.range[1]) if(!is.null(zlim)) { z.max <- max(z.max, ceiling(zlim[2] / z.scal)) z.min <- min(z.min, floor(zlim[1] / z.scal)) } z.range <- range(z.min, z.max) ### init graphics plot.new() if(angle.2) {x1 <- x.min + yx.f * y.max; x2 <- x.max} else {x1 <- x.min; x2 <- x.max + yx.f * y.max} plot.window(c(x1, x2), c(z.min, z.max + yz.f * y.max)) temp <- strwidth(format(rev(y.prty))[1], cex = cex.axis/par("cex")) if(angle.2) x1 <- x1 - temp - y.margin.add else x2 <- x2 + temp + y.margin.add plot.window(c(x1, x2), c(z.min, z.max + yz.f * y.max)) if(angle > 2) par("usr" = par("usr")[c(2, 1, 3:4)]) usr <- par("usr") # we have to remind it for use in closures title(main, sub, ...) ### draw axis, tick marks, labels, grid, ... xx <- if(angle.2) c(x.min, x.max) else c(x.max, x.min) if(grid) { ## grids ################### # XY wall i <- x.min:x.max; segments(i, z.min, i + (yx.f * y.max), yz.f * y.max + z.min, col = col.grid, lty = lty.grid); i <- 0:y.max; segments(x.min + (i * yx.f), i * yz.f + z.min, x.max + (i * yx.f), i * yz.f + z.min, col = col.grid, lty = lty.grid); ###################### # XZ wall # verticle lines temp <- yx.f * y.max; temp1 <- yz.f * y.max; i <- (x.min + temp):(x.max + temp); segments(i, z.min + temp1, i, z.max + temp1, col = col.grid, lty = lty.grid); # horizontal lines i <- (z.min + temp1):(z.max + temp1); segments(x.min + temp, i, x.max + temp, i, col = col.grid, lty = lty.grid) ################## # YZ wall # horizontal lines i <- xx[2]:x.min; mm <- z.min:z.max; segments(i, mm, i + temp, mm + temp1, col = col.grid, lty = lty.grid); # verticle lines i <- 0:y.max; segments(x.min + (i * yx.f), i * yz.f + z.min, xx[2] + (i * yx.f), i * yz.f + z.max, col = col.grid, lty = lty.grid) # make the axis into solid line segments(x.min, z.min, x.min + (yx.f * y.max), yz.f * y.max + z.min, col = col.grid, lty = lty.hide); segments(x.max, z.min, x.max + (yx.f * y.max), yz.f * y.max + z.min, col = col.axis, lty = lty.hide); segments(x.min + (y.max * yx.f), y.max * yz.f + z.min, x.max + (y.max* yx.f), y.max * yz.f + z.min, col = col.grid, lty = lty.hide); segments(x.min + temp, z.min + temp1, x.min + temp, z.max + temp1, col = col.grid, lty = lty.hide); segments(x.max + temp, z.min + temp1, x.max + temp, z.max + temp1, col = col.axis, lty = lty.hide); segments(x.min + temp, z.max + temp1, x.max + temp, z.max + temp1, col = col.axis, lty = lty.hide); segments(xx[2], z.max, xx[2] + temp, z.max + temp1, col = col.axis, lty = lty.hide); } if(axis) { if(tick.marks) { ## tick marks xtl <- (z.max - z.min) * (tcl <- -par("tcl")) / 50 ztl <- (x.max - x.min) * tcl / 50 mysegs <- function(x0,y0, x1,y1) segments(x0,y0, x1,y1, col=col.axis, lty=lty.axis) ## Y i.y <- 0:y.max mysegs(yx.f * i.y - ztl + xx[1], yz.f * i.y + z.min, yx.f * i.y + ztl + xx[1], yz.f * i.y + z.min) ## X i.x <- x.min:x.max mysegs(i.x, -xtl + z.min, i.x, xtl + z.min) ## Z i.z <- z.min:z.max mysegs(-ztl + xx[2], i.z, ztl + xx[2], i.z) if(label.tick.marks) { ## label tick marks las <- par("las") mytext <- function(labels, side, at, ...) mtext(text = labels, side = side, at = at, line = -.5, col=col.lab, cex=cex.axis, font=font.lab, ...) ## X if(is.null(x.ticklabs)) x.ticklabs <- format(i.x * x.scal) mytext(x.ticklabs, side = 1, at = i.x) ## Z if(is.null(z.ticklabs)) z.ticklabs <- format(i.z * z.scal) mytext(z.ticklabs, side = if(angle.1) 4 else 2, at = i.z, adj = if(0 < las && las < 3) 1 else NA) ## Y temp <- if(angle > 2) rev(i.y) else i.y ## turn y-labels around if(is.null(y.ticklabs)) y.ticklabs <- format(y.prty) else if (angle > 2) y.ticklabs <- rev(y.ticklabs) text(i.y * yx.f + xx[1], i.y * yz.f + z.min, y.ticklabs, pos=if(angle.1) 2 else 4, offset=1, col=col.lab, cex=cex.axis/par("cex"), font=font.lab) } } ## axis and labels mytext2 <- function(lab, side, line, at) mtext(lab, side = side, line = line, at = at, col = col.lab, cex = cex.lab, font = font.axis, las = 0) ## X lines(c(x.min, x.max), c(z.min, z.min), col = col.axis, lty = lty.axis) mytext2(xlab, 1, line = 1.5, at = mean(x.range)) ## Y lines(xx[1] + c(0, y.max * yx.f), c(z.min, y.max * yz.f + z.min), col = col.axis, lty = lty.axis) mytext2(ylab, if(angle.1) 2 else 4, line= 0.5, at = z.min + y.max * yz.f) ## Z lines(xx[c(2,2)], c(z.min, z.max), col = col.axis, lty = lty.axis) mytext2(zlab, if(angle.1) 4 else 2, line= 1.5, at = mean(z.range)) } ### plot points x <- dat$x + (dat$y * yx.f) z <- dat$z + (dat$y * yz.f) col <- as.character(dat$col) if(type == "h") { z2 <- dat$y * yz.f + z.min segments(x, z, x, z2, col = col, cex = cex.symbols, lty = lty.hplot, ...) points(x, z, type = "p", col = col, pch = pch, cex = cex.symbols, ...) } else points(x, z, type = type, col = col, pch = pch, cex = cex.symbols, ...) ### box-lines in front of points (overlay) if(axis && box) { lines(c(x.min, x.max), c(z.max, z.max), col = col.axis, lty = lty.axis) lines(c(0, y.max * yx.f) + x.max, c(0, y.max * yz.f) + z.max, col = col.axis, lty = lty.axis) lines(xx[c(1,1)], c(z.min, z.max), col = col.axis, lty = lty.axis) } # par(mem.par) # we MUST NOT set the margins back ### Return Function Object ob <- ls() ## remove all unused objects from the result's enviroment: rm(list = ob[!ob %in% c("angle", "mar", "usr", "x.scal", "y.scal", "z.scal", "yx.f", "yz.f", "y.add", "z.min", "z.max", "x.min", "x.max", "y.max", "x.prty", "y.prty", "z.prty")]) rm(ob) invisible(list( xyz.convert = function(x, y=NULL, z=NULL) { xyz <- xyz.coords(x, y, z) if(angle > 2) { ## switch y and x axis to ensure righthand oriented coord. temp <- xyz$x; xyz$x <- xyz$y; xyz$y <- temp } y <- (xyz$y - y.add) / y.scal return(list(x = xyz$x / x.scal + yx.f * y, y = xyz$z / z.scal + yz.f * y)) }, points3d = function(x, y = NULL, z = NULL, type = "p", ...) { xyz <- xyz.coords(x, y, z) if(angle > 2) { ## switch y and x axis to ensure righthand oriented coord. temp <- xyz$x; xyz$x <- xyz$y; xyz$y <- temp } y2 <- (xyz$y - y.add) / y.scal x <- xyz$x / x.scal + yx.f * y2 y <- xyz$z / z.scal + yz.f * y2 mem.par <- par(mar = mar, usr = usr) on.exit(par(mem.par)) if(type == "h") { y2 <- z.min + yz.f * y2 segments(x, y, x, y2, ...) points(x, y, type = "p", ...) } else points(x, y, type = type, ...) }, plane3d = function(Intercept, x.coef = NULL, y.coef = NULL, lty = "dashed", lty.box = NULL, ...){ if(!is.atomic(Intercept) && !is.null(coef(Intercept))) Intercept <- coef(Intercept) if(is.null(lty.box)) lty.box <- lty if(is.null(x.coef) && length(Intercept) == 3){ x.coef <- Intercept[if(angle > 2) 3 else 2] y.coef <- Intercept[if(angle > 2) 2 else 3] Intercept <- Intercept[1] } mem.par <- par(mar = mar, usr = usr) on.exit(par(mem.par)) x <- x.min:x.max ltya <- c(lty.box, rep(lty, length(x)-2), lty.box) x.coef <- x.coef * x.scal z1 <- (Intercept + x * x.coef + y.add * y.coef) / z.scal z2 <- (Intercept + x * x.coef + (y.max * y.scal + y.add) * y.coef) / z.scal segments(x, z1, x + y.max * yx.f, z2 + yz.f * y.max, lty = ltya, ...) y <- 0:y.max ltya <- c(lty.box, rep(lty, length(y)-2), lty.box) y.coef <- (y * y.scal + y.add) * y.coef z1 <- (Intercept + x.min * x.coef + y.coef) / z.scal z2 <- (Intercept + x.max * x.coef + y.coef) / z.scal segments(x.min + y * yx.f, z1 + y * yz.f, x.max + y * yx.f, z2 + y * yz.f, lty = ltya, ...) }, wall3d = function(Intercept, x.coef = NULL, y.coef = NULL, lty = "dashed", lty.box = NULL, ...){ if(!is.atomic(Intercept) && !is.null(coef(Intercept))) Intercept <- coef(Intercept) if(is.null(lty.box)) lty.box <- lty if(is.null(x.coef) && length(Intercept) == 3){ x.coef <- Intercept[if(angle > 2) 3 else 2] y.coef <- Intercept[if(angle > 2) 2 else 3] Intercept <- Intercept[1] } mem.par <- par(mar = mar, usr = usr) on.exit(par(mem.par)) x <- x.min:x.max ltya <- c(lty.box, rep(lty, length(x)-2), lty.box) x.coef <- x.coef * x.scal z1 <- (Intercept + x * x.coef + y.add * y.coef) / z.scal z2 <- (Intercept + x * x.coef + (y.max * y.scal + y.add) * y.coef) / z.scal segments(x, z1, x + y.max * yx.f, z2 + yz.f * y.max, lty = ltya, ...) y <- 0:y.max ltya <- c(lty.box, rep(lty, length(y)-2), lty.box) y.coef <- (y * y.scal + y.add) * y.coef z1 <- (Intercept + x.min * x.coef + y.coef) / z.scal z2 <- (Intercept + x.max * x.coef + y.coef) / z.scal segments(x.min + y * yx.f, z1 + y * yz.f, x.max + y * yx.f, z2 + y * yz.f, lty = ltya, ...) }, box3d = function(...){ mem.par <- par(mar = mar, usr = usr) on.exit(par(mem.par)) lines(c(x.min, x.max), c(z.max, z.max), ...) lines(c(0, y.max * yx.f) + x.max, c(0, y.max * yz.f) + z.max, ...) lines(c(0, y.max * yx.f) + x.min, c(0, y.max * yz.f) + z.max, ...) lines(c(x.max, x.max), c(z.min, z.max), ...) lines(c(x.min, x.min), c(z.min, z.max), ...) lines(c(x.min, x.max), c(z.min, z.min), ...) } )) } ################################################### ## Utilities for create pathway maps for MetPA ################################################# # a function to deal with long string names Wrap.Names<-function(cName, wrap.len=10, tol.len=5){ nc <- nchar(cName); long.inx <- nc > (wrap.len+tol.len); long.nms <- cName[long.inx]; # first get positions of the natural breaks space or hyphen pos.list <- gregexpr("[ -]", long.nms); for(i in 1:length(pos.list)){ current.nm <- long.nms[i]; pos <- pos.list[[i]]+1; start.pos<- c(0, pos); end.pos <- c(pos, nchar(current.nm)+1); splits <- sapply(1:(length(pos)+1), function(x) substring(current.nm, start.pos[x], end.pos[x]-1)); long.nms[i]<-CheckMergeSplittedNames(splits); } cName[long.inx] <- long.nms; return (cName); } # given a vector with naturally splitted string elements # check if a particular element is too long and need to be # break by brute force CheckMergeSplittedNames<-function(nms, wrap.len=10, tol.len=5){ clean.nm <- ""; current.nm <- ""; for(i in 1:length(nms)){ current.nm <- paste(current.nm, nms[i], sep=""); current.len <- nchar(current.nm); # if too long, break into halves if(current.len > wrap.len + tol.len){ break.pt <- round(current.len/2); current.nm <- paste(substr(current.nm, 0, break.pt), "-", "\n", substr(current.nm, break.pt+1, current.len), sep=""); clean.nm <- paste(clean.nm, "\n", current.nm, sep=""); current.nm <- ""; }else if(current.len > tol.len){ clean.nm <- paste(clean.nm, "\n", current.nm, sep=""); current.nm <- ""; }else{ if(i == length(nms)){ clean.nm <- paste(clean.nm, current.nm, sep=ifelse(nchar(current.nm) 0){ rbc <- round(rbc/sum(rbc),5); } dgr <- degree(g)$outDegree; if(sum(dgr) >0){ dgr <- round(dgr/sum(dgr),5); } rbc.list[[i]] <- rbc; dgr.list[[i]] <- dgr; ms.list[[i]] <- nds; } names(ms.list) <- names(graph.list) <- names(dgr.list) <- names(rbc.list) <- substr(files, 0, nchar(files)-4); # the variables that will be saved metpa <- list(); metpa$mset.list <- ms.list; metpa$rbc.list <- rbc.list; metpa$dgr.list <- dgr.list; metpa$uniq.count <- length(unique(unlist(ms.list))); metpa$graph.list <- graph.list; metpa$path.ids <- path.ids; save(metpa, file=paste(nm.cp, ".rda", sep="")); } # given a vector of KEGGID, return a vector of KEGG compound names KEGGID2Name<-function(ids){ hit.inx<- match(ids, cmpd.map$kegg); if(sum(is.na(hit.inx))>0){ print(ids[is.na(hit.inx)]); } return(cmpd.map[hit.inx, 3]); } # get all the KEGG compounds from the pathway databases getCmpdID<-function(dirName){ require(KEGGgraph); folds<-dir(dirName); all.nms <- ""; for(m in 1:length(folds)){ files <- dir(paste(dirName, "/", folds[m], sep="")); cmpd.nms <- ""; for(i in 1:length(files)){ f <- paste(dirName, "/", folds[m],"/",files[i], sep=""); print(f); g <- KEGGpathway2reactionGraph(parseKGML(f)); nms <- nodes(g); start.pos <- unlist(gregexpr(":", nms))+1; nms <- substr(nms, start.pos, nchar(nms)); cmpd.nms <- c(cmpd.nms, nms); } all.nms <- c(all.nms, unique(cmpd.nms)); } write.csv(unique(all.nms), file="kegg_uniq.csv", row.names=F) } getPathName<-function(dirName, saveName){ require(KEGGgraph); files<-dir(dirName); nm.mat<-matrix("NA", nrow=length(files), ncol=2); for(i in 1:length(files)){ f <- files[i]; print(f); path <- parseKGML(paste(dirName,"/",f, sep="")); nm.mat[i,]<-c(f, path@pathwayInfo@title); } write.csv(nm.mat, file=saveName); } IdentifyDuplicateCmpdInMsets <- function(cmpdFile){ cmpd.db <- read.csv(cmpdFile, as.is=T, header=T); common.nms <- tolower(cmpd.db$name); syns.list <- strsplit(cmpd.db$synonym, "; *"); for(i in 1:length(syns)){ current <- strsplit(syns[i], "; *")[[1]]; if(length(current)>length(unique(current))){ print(i); } } } # extend the axis range to both end # vec is the values for that axis # unit is the width to extend, 10 will increase by 1/10 of the range GetExtendRange<-function(vec, unit=10){ var.max <- max(vec, na.rm=T); var.min <- min(vec, na.rm=T); exts <- (var.max - var.min)/unit; c(var.min-exts, var.max+exts); } # to return a shorter names # break long names at space, append "..." to indicate # the abbrev GetShortNames<-function(nm.vec, max.len= 45){ new.nms <- vector(mode="character", length=length(nm.vec)); for(i in 1:length(nm.vec)){ nm <- nm.vec[i]; if(nchar(nm) <= max.len){ new.nms[i] <- nm; }else{ wrds <- strsplit(nm, "[[:space:]]+")[[1]]; new.nm <- ""; if(length(wrds)>1){ for(m in 1:length(wrds)){ wrd <- wrds[m]; if(nchar(new.nm)+4+nchar(wrd) <= max.len){ new.nm <- paste(new.nm, wrd); }else{ new.nms[i] <- paste (new.nm, "...", sep=""); break; } } }else{ new.nms[i] <- paste (substr(nm, 0, 21), "...", sep=""); } } } return (new.nms); } # count the number of digits in the values getndp <- function(x, tol=2*.Machine$double.eps){ ndp <- 0 while(!isTRUE(all.equal(x, round(x, ndp), tol=tol))) ndp <- ndp+1 if(ndp > -log10(tol)) { warning("Tolerance reached, ndp possibly underestimated.") } ndp } ### convert usr coords (as used in current plot) to pixels in a png ## adapted from the imagemap package usr2png <- function(xy,im){ xy <- usr2dev(xy,dev.cur()) cbind( ceiling(xy[,1]*im$Width), ceiling((1-xy[,2])*im$Height) ) } usr2plt <- function(xy,dev=dev.cur()){ olddev <- dev.cur() dev.set(dev) usr <- par("usr") dev.set(olddev) xytrans(xy,usr) } plt2fig <- function(xy,dev=dev.cur()){ olddev <- dev.cur() dev.set(dev) plt <- par("plt") dev.set(olddev) xytrans2(xy,plt) } fig2dev <- function(xy,dev=dev.cur()){ olddev <- dev.cur() dev.set(dev) fig <- par("fig") dev.set(olddev) xytrans2(xy,fig) } usr2dev <- function(xy,dev=dev.cur()){ fig2dev(plt2fig(usr2plt(xy,dev),dev),dev) } xytrans2 <- function(xy,par){ cbind(par[1]+((par[2]-par[1])*xy[,1]), par[3]+((par[4]-par[3])*xy[,2])) } xytrans <- function(xy,par){ cbind((xy[,1]-par[1])/(par[2]-par[1]), (xy[,2]-par[3])/(par[4]-par[3])) } # VENN DIAGRAM COUNTS AND PLOTS getVennCounts <- function(x,include="both") { x <- as.matrix(x) include <- match.arg(include,c("both","up","down")) x <- sign(switch(include, both = abs(x), up = x > 0, down = x < 0 )) nprobes <- nrow(x) ncontrasts <- ncol(x) names <- colnames(x) if(is.null(names)) names <- paste("Group",1:ncontrasts) noutcomes <- 2^ncontrasts outcomes <- matrix(0,noutcomes,ncontrasts) colnames(outcomes) <- names for (j in 1:ncontrasts) outcomes[,j] <- rep(0:1,times=2^(j-1),each=2^(ncontrasts-j)) xlist <- list() for (i in 1:ncontrasts) xlist[[i]] <- factor(x[,ncontrasts-i+1],levels=c(0,1)) counts <- as.vector(table(xlist)) structure(cbind(outcomes,Counts=counts),class="VennCounts") } # Plot Venn diagram # Gordon Smyth, James Wettenhall. # Capabilities for multiple counts and colors by Francois Pepin. # 4 July 2003. Last modified 12 March 2010. plotVennDiagram <- function(object,include="both",names,mar=rep(0,4),cex=1.2,lwd=1,circle.col,counts.col,show.include,...) { if (!is(object, "VennCounts")){ if (length(include)>2) stop("Cannot plot Venn diagram for more than 2 sets of counts") if (length(include)==2) object.2 <- getVennCounts(object, include = include[2]) object <- getVennCounts(object, include = include[1]) } else if(length(include==2)) include <- include[1] nsets <- ncol(object)-1 if(nsets > 3) stop("Can't plot Venn diagram for more than 3 sets") if(missing(names)) names <- colnames(object)[1:nsets] counts <- object[,"Counts"] if(length(include)==2) counts.2 <- object.2[, "Counts"] if(missing(circle.col)) circle.col <- par('col') if(length(circle.col) max ) { # don't move top tmp2 <- rev( as.logical( cumprod( rev(tmp) ) ) ) tmp <- tmp & !tmp2 } x[ tmp ] <- x[ tmp] + stp df <- x[-1] - x[-length(x)] i <- i + 1 if( i > maxiter ) { warning("Maximum iterations reached") break } } x[unsort] } # borrowed from ### http://www.r-statistics.com/2011/01/how-to-label-all-the-outliers-in-a-boxplot/ boxplot.with.outlier.label <- function(y, label_name, ..., spread_text = T, data, plot = T, range = 1.5, label.col = "blue", push_text_right = 1.0, # enlarge push_text_right in order to push the text labels further from their point segement_width_as_percent_of_label_dist = .45, # Change this if you want to have the line closer to the label (range should be between 0 to 1 jitter_if_duplicate = T, jitter_only_positive_duplicates = F) { require(plyr) # for is.formula and ddply if(missing(data)) { boxdata <- boxplot(y, plot = plot,range = range ,...) } else { boxdata <- boxplot(y, plot = plot,data = data, range = range ,...) } # creating a data.frame with information from the boxplot output about the outliers (location and group) boxdata_group_name <- factor(boxdata$group) levels(boxdata_group_name) <- boxdata$names[as.numeric(levels(boxdata_group_name))] # the subseting is for cases where we have some sub groups with no outliers if(!is.null(list(...)$at)) { # if the user chose to use the "at" parameter, then we would like the function to still function (added on 19.04.2011) boxdata$group <- list(...)$at[boxdata$group] } boxdata_outlier_df <- data.frame(group = boxdata_group_name, y = boxdata$out, x = boxdata$group) # Let's extract the x,y variables from the formula: if(is.formula(y)) { model_frame_y <- model.frame(y) y <- model_frame_y[,1] x <- model_frame_y[,-1] if(!is.null(dim(x))) { # then x is a matrix/data.frame of the type x1*x2*..and so on - and we should merge all the variations... x <- apply(x,1, paste, collapse = ".") } } else { # if(missing(x)) x <- rep(1, length(y)) x <- rep(1, length(y)) # we do this in case y comes as a vector and without x } # and put all the variables (x, y, and outlier label name) into one data.frame DATA <- data.frame(label_name, x ,y) if(!is.null(list(...)$names)) { # if the user chose to use the names parameter, then we would like the function to still function (added on 19.04.2011) DATA$x <- factor(DATA$x, levels = unique(DATA$x)) levels(DATA$x) = list(...)$names # enable us to handle when the user adds the "names" parameter # fixed on 19.04.11 # notice that DATA$x must be of the "correct" order (that's why I used split above } if(!missing(data)) detach(data) # we don't need to have "data" attached anymore. # let's only keep the rows with our outliers boxplot.outlier.data <- function(xx, y_name = "y"){ y <- xx[,y_name] boxplot_range <- range(boxplot.stats(y, coef = range )$stats) ss <- (y < boxplot_range[1]) | (y > boxplot_range[2]) return(xx[ss,]) } outlier_df <-ddply(DATA, .(x), boxplot.outlier.data) # create propor x/y locations to handle over-laping dots... if(spread_text) { # credit: Greg Snow temp_x <- boxdata_outlier_df[,"x"] temp_y1 <- boxdata_outlier_df[,"y"] temp_y2 <- temp_y1 for(i in unique(temp_x)){ tmp <- temp_x == i temp_y2[tmp] <- spread.labs( temp_y2[ tmp ], 1.3*strheight('A'), maxiter=6000, stepsize = 0.05) #, min=0 ) } } # plotting the outlier labels :) (I wish there was a non-loop wise way for doing this) for(i in seq_len(dim(boxdata_outlier_df)[1])) { ss <- (outlier_df[,"x"] %in% boxdata_outlier_df[i,]$group) & (outlier_df[,"y"] %in% boxdata_outlier_df[i,]$y); current_label <- outlier_df[ss,"label_name"] temp_x <- boxdata_outlier_df[i,"x"] temp_y <- boxdata_outlier_df[i,"y"] if(spread_text) { temp_y_new <- temp_y2[i] # not ss move_text_right <- strwidth(current_label) * push_text_right text( temp_x+move_text_right, temp_y_new, current_label, col = label.col) # strwidth segments( temp_x+(move_text_right/6), temp_y, temp_x+(move_text_right*segement_width_as_percent_of_label_dist), temp_y_new ) } else { text(temp_x, temp_y, current_label, pos = 4, col = label.col) } } } # borrowed from Hmisc all.numeric <- function (x, what = c("test", "vector"), extras = c(".", "NA")){ what <- match.arg(what) old <- options(warn = -1) on.exit(options(old)); x <- sub("[[:space:]]+$", "", x); x <- sub("^[[:space:]]+", "", x); inx <- x %in% c("", extras); xs <- x[!inx]; isnum <- !any(is.na(as.numeric(xs))) if (what == "test") isnum else if (isnum) as.numeric(x) else x } # cleaning up the "cmpd_name.csv" clean_cmpds_lib <- function(dat){ # remove rarely used cmpds rminx <- duplicated(dat$kegg_id) & is.na(dat$pubchem_id) & is.na(dat$chebi_id) sum(rminx) dat <- dat[!rminx,]; # add labels for lipids (those will be only be used for exact match) lipid.prefix <- c("CE(", "CL(", "DG(", "MG(", "PC(","PE(","PA(", "PG(", "PGP(", "PS(", "TG(", "SM(", "Cer(", "CerP(", "CPA(", "LPA(", "PIP(", "PIP2(", "CDP-DG(", "Ganglioside GD1a (", "Ganglioside GD1b (", "Ganglioside GD2 (", "Ganglioside GD3 (", "Ganglioside GM1 (", "Ganglioside GM2 (", "Ganglioside GM3 (", "Ganglioside GQ1c (", "Ganglioside GT1b (", "Ganglioside GT1c (", "Ganglioside GT2 (", "Ganglioside GT3 (", "Glucosylceramide (", "Lactosyceramide (", "Lactosylceramide (", "Galactosylceramide (", "Tetrahexosylceramide (", "LysoPC(", "LysoPE(", "Ganglioside GM1 (", "Ganglioside GM3 (", "Ganglioside GM2 (", "Trihexosylceramide (", "3-O-Sulfogalactosylceramide (", "Galabiosylceramide (", "Ganglioside GD3 (", "Ganglioside GA2 (", "Ganglioside GA1 ("); hit <- rep(F, length(dat$name)); for(name in lipid.prefix){ hitInx <- substring(dat$name, 1, nchar(name)) == name; hit <- hit | hitInx; } exactinx <- (duplicated(dat$kegg_id) | is.na(dat$kegg_id)) & hit dat <- cbind(dat, lipid=as.numeric(exactinx)); write.csv(dat, file="cmpd_name_update9.csv", row.names=F) } GetFileContentAsString <- function(file.nm){ content <- paste(readLines(file.nm), collapse="\n"); return(content); } ClearNumerics <-function(dat.mat){ dat.mat[is.na(dat.mat)] <- -777; dat.mat[dat.mat == Inf] <- -999; dat.mat[dat.mat == -Inf] <- -111; dat.mat; } # adapted from ropls package perform_opls <- function (x, y = NULL, predI = NA, orthoI = 0, crossvalI = 7, log10L = FALSE, permI = 20, scaleC = c("none", "center", "pareto", "standard")[4], ...) { xMN <- x yMCN <- matrix(y, ncol = 1); rownames(yMCN) <- rownames(xMN) colnames(yMCN) <- paste0("y", 1:ncol(yMCN)) yLevelVc <- NULL; xZeroVarVi <- NULL; epsN <- .Machine[["double.eps"]] opl <- .coreOPLS(xMN = xMN, yMCN = yMCN, orthoI = orthoI, predI = predI, scaleC = scaleC, crossvalI = crossvalI); opl$suppLs[["y"]] <- y opl$typeC <- "OPLS-DA"; ## Permutation testing (Szymanska et al, 2012) if(permI > 0) { modSumVc <- colnames(opl$summaryDF) permMN <- matrix(0, nrow = 1 + permI, ncol = length(modSumVc), dimnames = list(NULL, modSumVc)) perSimVn <- numeric(1 + permI) perSimVn[1] <- 1 permMN[1, ] <- as.matrix(opl$summaryDF) for(k in 1:permI) { yVcn <- drop(opl$suppLs[["yMCN"]]) yPerVcn <- sample(yVcn) yPerMCN <- matrix(yPerVcn, ncol = 1) perOpl <- .coreOPLS(xMN = xMN, yMCN = yPerMCN, orthoI = opl$summaryDF[, "ort"], predI = opl$summaryDF[, "pre"], scaleC = scaleC, crossvalI = crossvalI) permMN[1 + k, ] <- as.matrix(perOpl$summaryDF); perSimVn[1 + k] <- .similarityF(opl$suppLs[["yMCN"]], yPerMCN) } permMN <- cbind(permMN, sim = perSimVn); perPvaVn <- c(pR2Y = (1 + length(which(permMN[-1, "R2Y(cum)"] >= permMN[1, "R2Y(cum)"]))) / (nrow(permMN) - 1), pQ2 = (1 + length(which(permMN[-1, "Q2(cum)"] >= permMN[1, "Q2(cum)"]))) / (nrow(permMN) - 1)); opl$summaryDF[, "pR2Y"] <- perPvaVn["pR2Y"]; opl$summaryDF[, "pQ2"] <- perPvaVn["pQ2"]; opl$suppLs[["permMN"]] <- permMN; } ##------------------------------------ ## Numerical results ##------------------------------------ totN <- length(c(xMN)) nasN <- sum(is.na(c(xMN))) if(!is.null(opl$suppLs[["yMCN"]])) { totN <- totN + length(c(opl$suppLs[["yMCN"]])) nasN <- nasN + sum(is.na(c(opl$suppLs[["yMCN"]]))) } ## Raw summary ##------------ opl$suppLs[["topLoadI"]] <- 3 if(ncol(xMN) > opl$suppLs[["topLoadI"]]) { xVarVn <- apply(xMN, 2, var) names(xVarVn) <- 1:length(xVarVn) xVarVn <- sort(xVarVn) xVarSorVin <- as.numeric(names(xVarVn[seq(1, length(xVarVn), length = opl$suppLs[["topLoadI"]])])) opl$suppLs[["xSubIncVarMN"]] <- xMN[, xVarSorVin, drop = FALSE] } else{ opl$suppLs[["xSubIncVarMN"]] <- xMN } if(ncol(xMN) <= 100) { xCorMN <- cor(xMN, use = "pairwise.complete.obs") xCorMN[lower.tri(xCorMN, diag = TRUE)] <- 0 if(ncol(xMN) > opl$suppLs[["topLoadI"]]) { xCorNexDF <- which(abs(xCorMN) >= sort(abs(xCorMN), decreasing = TRUE)[opl$suppLs[["topLoadI"]] + 1], arr.ind = TRUE); xCorDisMN <- matrix(0, nrow = nrow(xCorNexDF), ncol = nrow(xCorNexDF), dimnames = list(colnames(xMN)[xCorNexDF[, "row"]], colnames(xMN)[xCorNexDF[, "col"]])) for(k in 1:nrow(xCorDisMN)){ xCorDisMN[k, k] <- xCorMN[xCorNexDF[k, "row"], xCorNexDF[k, "col"]] } } else { xCorDisMN <- xCorMN } opl$suppLs[["xCorMN"]] <- xCorDisMN rm(xCorDisMN) } return(invisible(opl)) } .coreOPLS <- function (xMN, yMCN, orthoI, predI, scaleC, crossvalI) { epsN <- .Machine[["double.eps"]] varVn <- NULL yMeanVn <- NULL ySdVn <- NULL wMN <- NULL cMN <- NULL uMN <- NULL rMN <- NULL bMN <- NULL vipVn <- NULL yPreMN <- NULL yTesMN <- NULL toMN <- NULL poMN <- NULL woMN <- NULL coMN <- NULL orthoVipVn <- NULL naxVi <- which(is.na(c(xMN))) naxL <- length(naxVi) > 0 nayVi <- integer() nayL <- FALSE; yMN <- yMCN; obsNamVc <- rownames(xMN) autNcoL <- autNcpL <- FALSE autMaxN <- min(c(10, dim(xMN))) if (is.na(orthoI)) { if (autMaxN == 1) { orthoI <- 0 predI <- 1 warning("The data contain a single variable (or sample): A PLS model with a single component will be built", call. = FALSE) } else { orthoI <- autMaxN - 1 predI <- 1 autNcoL <- TRUE } } if (is.na(predI)) { if (orthoI > 0) { if (autMaxN == 1) { orthoI <- 0 warning("The data contain a single variable (or sample): A PLS model with a single component will be built", call. = FALSE) } else warning("OPLS(-DA): The number of predictive component is set to 1 for a single response model", call. = FALSE) predI <- 1 if ((predI + orthoI) > min(dim(xMN))) stop("The sum of 'predI' (", predI, ") and 'orthoI' (", orthoI, ") exceeds the minimum dimension of the 'x' data matrix (", min(dim(xMN)), ")", call. = FALSE) } else { predI <- autMaxN autNcpL <- TRUE } } xVarVn <- apply(xMN, 2, function(colVn) var(colVn, na.rm = TRUE)) xMeanVn <- apply(xMN, 2, function(colVn) mean(colVn, na.rm = TRUE)) switch(scaleC, none = { xMeanVn <- rep(0, ncol(xMN)) xSdVn <- rep(1, times = ncol(xMN)) }, center = { xSdVn <- rep(1, times = ncol(xMN)) }, pareto = { xSdVn <- apply(xMN, 2, function(colVn) sqrt(sd(colVn, na.rm = TRUE))) }, standard = { xSdVn <- apply(xMN, 2, function(colVn) sd(colVn, na.rm = TRUE)) }) xMN <- scale(xMN, center = xMeanVn, scale = xSdVn) if (!is.null(colnames(xMN))) { xvaNamVc <- colnames(xMN) } else xvaNamVc <- paste("x", 1:ncol(xMN), sep = "") preNamVc <- paste("p", 1:predI, sep = "") pMN <- matrix(0, nrow = ncol(xMN), ncol = predI, dimnames = list(xvaNamVc, preNamVc)) tMN <- uMN <- matrix(0, nrow = nrow(xMN), ncol = predI, dimnames = list(obsNamVc, preNamVc)) ssxTotN <- sum(xMN^2, na.rm = TRUE) yMeanVn <- apply(yMN, 2, function(colVn) mean(colVn,na.rm = TRUE)) yMeanVn <- rep(0, times = ncol(yMN)) ySdVn <- rep(1, times = ncol(yMN)) yMN <- scale(yMN, center = yMeanVn, scale = ySdVn) yvaNamVc <- paste("y", 1:ncol(yMN), sep = "") wMN <- pMN uMN <- tMN cMN <- matrix(0, nrow = ncol(yMN), ncol = predI, dimnames = list(yvaNamVc, preNamVc)) cvfNamVc <- paste("cv", 1:crossvalI, sep = "") cvfOutLs <- split(1:nrow(xMN), rep(1:crossvalI, length = nrow(xMN))) prkVn <- numeric(crossvalI) ru1ThrN <- ifelse(orthoI == 0, ifelse(nrow(xMN) > 100, yes = 0, no = 0.05), 0.01) ssyTotN <- rs0N <- sum(yMN^2, na.rm = TRUE) hN <- 1 orthoNamVc <- paste("o", 1:orthoI, sep = ""); toMN <- matrix(0, nrow = nrow(xMN), ncol = orthoI, dimnames = list(obsNamVc, orthoNamVc)); woMN <- poMN <- matrix(0, nrow = ncol(xMN), ncol = orthoI, dimnames = list(xvaNamVc, orthoNamVc)); coMN <- matrix(0, nrow = ncol(yMN), ncol = orthoI, dimnames = list(yvaNamVc, orthoNamVc)); modelDF <- as.data.frame(matrix(NA, nrow = 1 + orthoI + 1, ncol = 7, dimnames = list(c("p1", orthoNamVc, "sum"), c("R2X", "R2X(cum)", "R2Y", "R2Y(cum)", "Q2", "Q2(cum)", "Signif.")))); for (j in 1:ncol(modelDF)){ mode(modelDF[, j]) <- ifelse(colnames(modelDF)[j] == "Signif.", "character", "numeric") } xcvTraLs <- lapply(cvfOutLs, function(obsVi) xMN[-obsVi, , drop = FALSE]) xcvTesLs <- lapply(cvfOutLs, function(obsVi) xMN[obsVi, , drop = FALSE]) ycvTraLs <- lapply(cvfOutLs, function(obsVi) yMN[-obsVi, , drop = FALSE]) ycvTesLs <- lapply(cvfOutLs, function(obsVi) yMN[obsVi, , drop = FALSE]) xcvTraLs <- c(xcvTraLs, list(xMN)) ycvTraLs <- c(ycvTraLs, list(yMN)) breL <- FALSE for (noN in 1:(orthoI + 1)) { if (breL){ break } for (cvN in 1:length(xcvTraLs)) { xcvTraMN <- xcvTraLs[[cvN]] ycvTraMN <- ycvTraLs[[cvN]] if (ncol(ycvTraMN) > 1) { wwMN <- apply(ycvTraMN, 2, function(colVn) crossprod(xcvTraMN, colVn)/drop(crossprod(colVn))) wwSvdLs <- svd(wwMN) wwNcpVin <- which(wwSvdLs[["d"]]^2 > epsN * sum(wwSvdLs[["d"]]^2)) twMN <- wwSvdLs[["u"]][, wwNcpVin, drop = FALSE] %*% diag(wwSvdLs[["d"]][wwNcpVin], nrow = length(wwNcpVin)) } uOldVn <- ycvTraMN[, 1, drop = FALSE] repeat { wVn <- crossprod(xcvTraMN, uOldVn)/drop(crossprod(uOldVn)) wVn <- wVn/sqrt(drop(crossprod(wVn))) tVn <- xcvTraMN %*% wVn cVn <- crossprod(ycvTraMN, tVn)/drop(crossprod(tVn)) uVn <- ycvTraMN %*% cVn/drop(crossprod(cVn)) dscN <- drop(sqrt(crossprod((uVn - uOldVn)/uVn))) if (ncol(ycvTraMN) == 1 || dscN < 1e-10) { break }else { uOldVn <- uVn } } pVn <- crossprod(xcvTraMN, tVn)/drop(crossprod(tVn)) if (ncol(ycvTraMN) > 1){ for (j in 1:ncol(twMN)) { woVn <- pVn - drop(crossprod(twMN[, j, drop = FALSE], pVn))/drop(crossprod(twMN[, j, drop = FALSE])) * twMN[, j, drop = FALSE]; } } else { woVn <- pVn - drop(crossprod(wVn, pVn))/drop(crossprod(wVn)) * wVn } woVn <- woVn/sqrt(drop(crossprod(woVn))) toVn <- xcvTraMN %*% woVn coVn <- crossprod(ycvTraMN, toVn)/drop(crossprod(toVn)) poVn <- crossprod(xcvTraMN, toVn)/drop(crossprod(toVn)) if (cvN <= crossvalI) { xcvTesMN <- xcvTesLs[[cvN]] ycvTesMN <- ycvTesLs[[cvN]] if (any(is.na(xcvTesMN))) { prxVn <- numeric(nrow(xcvTesMN)) for (r in 1:length(prxVn)) { comVl <- complete.cases(xcvTesMN[r, ]) prxVn[r] <- crossprod(xcvTesMN[r, comVl], wVn[comVl])/drop(crossprod(wVn[comVl])) } prkVn[cvN] <- sum((ycvTesMN - prxVn %*% t(cVn))^2, na.rm = TRUE) } else { prkVn[cvN] <- sum((ycvTesMN - xcvTesMN %*% wVn %*% t(cVn))^2, na.rm = TRUE) } toTesVn <- xcvTesMN %*% woVn xcvTesLs[[cvN]] <- xcvTesMN - tcrossprod(toTesVn, poVn) if (cvN == crossvalI) { q2N <- 1 - sum(prkVn)/rs0N if (noN == 1) { modelDF["p1", "Q2(cum)"] <- modelDF["p1", "Q2"] <- q2N } else { modelDF[noN, "Q2(cum)"] <- q2N - modelDF["p1", "Q2"] modelDF[noN, "Q2"] <- q2N - sum(modelDF[1:(noN - 1), "Q2"], na.rm = TRUE) } } } else { r2yN <- sum(tcrossprod(tVn, cVn)^2)/ssyTotN if (noN == 1) { modelDF["p1", "R2Y(cum)"] <- modelDF["p1", "R2Y"] <- r2yN } else { modelDF[noN, "R2Y(cum)"] <- r2yN - modelDF["p1", "R2Y"] modelDF[noN, "R2Y"] <- r2yN - sum(modelDF[1:(noN - 1), "R2Y"], na.rm = TRUE) } if (noN <= orthoI) { modelDF[paste0("o", noN), "R2X"] <- sum(tcrossprod(toVn,poVn)^2)/ssxTotN poMN[, noN] <- poVn toMN[, noN] <- toVn woMN[, noN] <- woVn coMN[, noN] <- coVn } if (modelDF[noN, "R2Y"] < 0.01) { modelDF[noN, "Signif."] <- "N4" } else if (modelDF[noN, "Q2"] < ru1ThrN) { modelDF[noN, "Signif."] <- "NS" } else { modelDF[noN, "Signif."] <- "R1" } if (autNcoL && modelDF[noN, "Signif."] != "R1" && noN > 2) { breL <- TRUE break } else { cMN[, 1] <- cVn pMN[, 1] <- pVn tMN[, 1] <- tVn uMN[, 1] <- uVn wMN[, 1] <- wVn } } if (breL) { break; } if (noN < orthoI + 1){ xcvTraLs[[cvN]] <- xcvTraMN - tcrossprod(toVn, poVn); } } } rm(xcvTraLs) rm(xcvTesLs) rm(ycvTraLs) modelDF["p1", "R2X(cum)"] <- modelDF["p1", "R2X"] <- sum(tcrossprod(tMN, pMN)^2)/ssxTotN modelDF[1:(1 + orthoI), "R2X(cum)"] <- cumsum(modelDF[1:(1 + orthoI), "R2X"]); if (autNcoL) { if (all(modelDF[, "Signif."] == "R1", na.rm = TRUE)) { orthoI <- noN - 1 }else{ orthoI <- noN - 3 } if (orthoI == autMaxN - 1){ warning("The maximum number of orthogonal components in the automated mode (", autMaxN - 1, ") has been reached whereas R2Y (", round(modelDF[1 + orthoI, "R2Y"] * 100), "%) is above 1% and Q2Y (", round(modelDF[1 + orthoI, "Q2"] * 100), "%) is still above ", round(ru1ThrN * 100), "%.", call. = FALSE) } poMN <- poMN[, 1:orthoI, drop = FALSE] toMN <- toMN[, 1:orthoI, drop = FALSE] woMN <- woMN[, 1:orthoI, drop = FALSE] coMN <- coMN[, 1:orthoI, drop = FALSE] orthoNamVc <- orthoNamVc[1:orthoI] modelDF <- modelDF[c(1:(orthoI + 1), nrow(modelDF)), ] } modelDF["sum", "R2X(cum)"] <- modelDF[1 + orthoI, "R2X(cum)"] modelDF["sum", "R2Y(cum)"] <- sum(modelDF[, "R2Y"], na.rm = TRUE) modelDF["sum", "Q2(cum)"] <- sum(modelDF[, "Q2"], na.rm = TRUE) summaryDF <- modelDF["sum", c("R2X(cum)", "R2Y(cum)", "Q2(cum)")] rMN <- wMN bMN <- tcrossprod(rMN, cMN) yPreScaMN <- tcrossprod(tMN, cMN) yPreMN <- scale(scale(yPreScaMN, FALSE, 1/ySdVn), -yMeanVn, FALSE) attr(yPreMN, "scaled:center") <- NULL attr(yPreMN, "scaled:scale") <- NULL yActMCN <- yMCN yActMN <- yActMCN summaryDF[, "RMSEE"] <- sqrt(.errorF(yActMN, yPreMN)^2 * nrow(yActMN)/(nrow(yActMN) - (1 + predI + orthoI))) yTestMCN <- NULL sxpVn <- sapply(1:ncol(tMN), function(h) sum(drop(tcrossprod(tMN[, h], pMN[, h])^2))) sxpCumN <- sum(sxpVn) sxoVn <- sapply(1:ncol(toMN), function(h) sum(drop(tcrossprod(toMN[, h], poMN[, h])^2))) sxoCumN <- sum(sxoVn) ssxCumN <- sxpCumN + sxoCumN sypVn <- sapply(1:ncol(tMN), function(h) sum(drop(tcrossprod(tMN[, h], cMN[, h])^2))) sypCumN <- sum(sypVn) syoVn <- sapply(1:ncol(toMN), function(h) sum(drop(tcrossprod(toMN[, h], coMN[, h])^2))) syoCumN <- sum(syoVn) ssyCumN <- sypCumN + syoCumN kpN <- nrow(wMN)/(sxpCumN/ssxCumN + sypCumN/ssyCumN) pNorMN <- sweep(pMN, 2, sqrt(colSums(pMN^2)), "/") vipVn <- sqrt(kpN * (rowSums(sweep(pNorMN^2, 2, sxpVn, "*"))/ssxCumN + rowSums(sweep(pNorMN^2, 2, sypVn, "*"))/ssyCumN)) koN <- nrow(wMN)/(sxoCumN/ssxCumN + syoCumN/ssyCumN) poNorMN <- sweep(poMN, 2, sqrt(colSums(poMN^2)),"/") orthoVipVn <- sqrt(koN * (rowSums(sweep(poNorMN^2, 2, sxoVn, "*"))/ssxCumN + rowSums(sweep(poNorMN^2, 2, syoVn, "*"))/ssyCumN)) summaryDF[, "pre"] <- predI summaryDF[, "ort"] <- orthoI rownames(summaryDF) <- "Total" sigNamVc <- c("R2X", "R2X(cum)", "R2Y", "R2Y(cum)", "Q2", "Q2(cum)", "RMSEE", "RMSEP") for (namC in intersect(colnames(modelDF), sigNamVc)) modelDF[, namC] <- signif(modelDF[, namC], 3) for (namC in intersect(colnames(summaryDF), sigNamVc)) summaryDF[, namC] <- signif(summaryDF[, namC], 3) retLs <- list(typeC = NULL, modelDF = modelDF, summaryDF = summaryDF, pcaVarVn = varVn, vipVn = vipVn, orthoVipVn = orthoVipVn, fitted = NULL, tested = NULL, coefficients = bMN, residuals = NULL, xMeanVn = xMeanVn, xSdVn = xSdVn, yMeanVn = yMeanVn, ySdVn = ySdVn, xZeroVarVi = NULL, scoreMN = tMN, loadingMN = pMN, weightMN = wMN, orthoScoreMN = toMN, orthoLoadingMN = poMN, orthoWeightMN = woMN, cMN = cMN, uMN = uMN, weightStarMN = rMN, coMN = coMN, suppLs = list(yLevelVc = NULL, naxL = naxL, nayL = nayL, nayVi = nayVi, permMN = NULL, scaleC = scaleC, topLoadI = NULL, yMCN = yMCN, xSubIncVarMN = NULL, xCorMN = NULL, xModelMN = xMN, yModelMN = yMN, yPreMN = yPreMN, yTesMN = yTesMN)) } .errorF <- function(x, y){ sqrt(mean(drop((x - y)^2), na.rm = TRUE)) } .similarityF <- function(x, y) { return(cor(x, y, use = "pairwise.complete.obs")) } ################################################################################################# ################################################################################################# ################################################################################################# ################################################################################################# ################################### ############################ ################################### Altered/Wrote Scripts ############################ ################################### ############################ ################################################################################################# ################################################################################################# ################################################################################################# ################################################################################################# ############################################## ################## Summary plot ############## ############################################## PlotNormSum<-function(imgName, format="png", dpi=72, width=NA){ #imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); #if(is.na(width)){ # w <- 10.5; h <- 12; #}else if(width == 0){ # w <- 7.2;h <- 9; #imgSet$norm<<-imgName; #}else{ # w <- 7.2; h <- 9; #} #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); layout(matrix(c(1,1,1,2,3,3,3,4), 4, 2, byrow = FALSE)) # since there may be too many compounds, only plot a subsets (50) in box plot # but density plot will use all the data pre.inx<-GetRandomSubsetIndex(ncol(dataSet$proc), sub.num=50); namesVec <- colnames(dataSet$proc[,pre.inx]); # only get common ones nm.inx <- namesVec %in% colnames(dataSet$norm) namesVec <- namesVec[nm.inx]; pre.inx <- pre.inx[nm.inx]; norm.inx<-match(namesVec, colnames(dataSet$norm)); namesVec <- substr(namesVec, 1, 12); # use abbreviated name rangex.pre <- range(dataSet$proc[, pre.inx], na.rm=T); rangex.norm <- range(dataSet$norm[, norm.inx], na.rm=T); x.label<-GetValueLabel(); y.label<-GetVariableLabel(); # fig 1 op<-par(mar=c(0,7,4,0), xaxt="n"); boxplot(dataSet$proc[,pre.inx], names= namesVec, ylim=rangex.pre, las = 2, col="lightgreen", horizontal=T); mtext("Before Normalization",3, 1) # fig 2 op<-par(mar=c(7,7,0,0), xaxt="s"); plot(density(apply(dataSet$proc, 2, mean, na.rm=TRUE)), col='darkblue', las =2, lwd=2, main="", xlab="", ylab=""); mtext("Density", 2, 5); mtext(x.label, 1, 5); # fig 3 op<-par(mar=c(0,7,4,2), xaxt="n"); boxplot(dataSet$norm[,norm.inx], names=namesVec, ylim=rangex.norm, las = 2, col="lightgreen", horizontal=T); mtext("After Normalization",3, 1); # fig 4 op<-par(mar=c(7,7,0,2), xaxt="s"); plot(density(apply(dataSet$norm, 2, mean, na.rm=TRUE)), col='darkblue', las=2, lwd =2, main="", xlab="", ylab=""); mtext(paste("Normalized",x.label),1, 5); #dev.off(); } ################################ ######## Anova test ############ ################################ ANOVA.Anal<-function(nonpar=F, thresh=0.05, post.hoc="fisher"){ if(nonpar){ aov.nm <- "Kruskal Wallis Test"; anova.res<-apply(as.matrix(dataSet$norm), 2, kwtest); #extract all p values p.value<-unlist(lapply(anova.res, function(x) {x$p.value})); names(p.value)<-colnames(dataSet$norm); fdr.p <- p.adjust(p.value, "fdr"); inx.imp <- p.value <= thresh; if(sum(inx.imp) == 0){ # no sig features! cutpt <- round(0.2*length(p.value)); cutpt <- ifelse(cutpt>50, 50, cutpt); inx <- which(rank(p.value) == cutpt); thresh <- p.value[inx]; inx.imp <- p.value <= thresh; } sig.p <- p.value[inx.imp]; fdr.p <- fdr.p[inx.imp]; sig.mat <- data.frame(signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), 'NA'); rownames(sig.mat) <- names(sig.p); colnames(sig.mat) <- c("p.value", "-log10(p)", "FDR", "Post-Hoc"); # order the result simultaneously ord.inx <- order(sig.p, decreasing = FALSE); sig.mat <- sig.mat[ord.inx,]; fileName <- "anova_posthoc.csv"; my.mat <- sig.mat[,1:3]; colnames(my.mat) <- c("pval_KW", "-log10(p)", "FDR"); write.csv(my.mat,file=fileName); }else{ aov.nm <- "One-way ANOVA"; aov.res<-apply(as.matrix(dataSet$norm), 2, aof); anova.res<-lapply(aov.res, anova); #extract all p values p.value<-unlist(lapply(anova.res, function(x) { x["Pr(>F)"][1,]})); names(p.value)<-colnames(dataSet$norm); fdr.p <- p.adjust(p.value, "fdr"); # do post-hoc only for signficant entries inx.imp <- p.value <= thresh; if(sum(inx.imp) == 0){ # no sig features with default thresh # readjust threshold to top 20% or top 50 cutpt <- round(0.2*length(p.value)); cutpt <- ifelse(cutpt>50, 50, cutpt); inx <- which(rank(p.value) == cutpt); thresh <- p.value[inx]; inx.imp <- p.value <= thresh; } aov.imp <- aov.res[inx.imp]; sig.p <- p.value[inx.imp]; fdr.p <- fdr.p[inx.imp]; cmp.res <- NULL; post.nm <- NULL; if(post.hoc=="tukey"){ tukey.res<-lapply(aov.imp, TukeyHSD, conf.level=1-thresh); cmp.res <- unlist(lapply(tukey.res, parseTukey, cut.off=thresh)); post.nm = "Tukey's HSD"; }else{ fisher.res<-lapply(aov.imp, FisherLSD, thresh); cmp.res <- unlist(lapply(fisher.res, parseFisher, cut.off=thresh)); post.nm = "Fisher's LSD"; } # create the result dataframe, # note, the last column is string, not double sig.mat <- data.frame(signif(sig.p,5), signif(-log10(sig.p),5), signif(fdr.p,5), cmp.res); rownames(sig.mat) <- names(sig.p); colnames(sig.mat) <- c("p.value", "-log10(p)", "FDR", post.nm); # order the result simultaneously ord.inx <- order(sig.p, decreasing = FALSE); sig.mat <- sig.mat[ord.inx,]; fileName <- "anova_posthoc.csv"; write.csv(sig.mat,file=fileName); } aov<-list ( aov.nm = aov.nm, raw.thresh = thresh, thresh = -log10(thresh), # only used for plot threshold line p.value = p.value, p.log = -log10(p.value), inx.imp = inx.imp, post.hoc = post.hoc, sig.mat = sig.mat ); analSet$aov<<-aov; return(1); } ################################ ###### ANOVA Table############## ################################# MyANOVATable <-function(){ m=cbind(row.names(analSet$aov$sig.mat),analSet$aov$sig.mat) colnames(m)[1] <- c("Name") return(m) } ######################### # ANOVA plot ######################### PlotLiveANOVA<-function(c1,c2){ lod <- analSet$aov$p.log; AnovaPlot= plot(lod, ylab="-log10(p)", xlab = GetVariableLabel(), main=analSet$aov$aov.nm, type="n"); grid(); red.inx<- which(analSet$aov$inx.imp); blue.inx <- which(!analSet$aov$inx.imp); points(red.inx, lod[red.inx], bg=c1, cex=1.2, pch=21); points(blue.inx, lod[blue.inx], bg=c2, pch=21); abline (h=analSet$aov$thresh, lty=3); return(AnovaPlot) #dev.off(); } ################################# ###### t-test plot############# ################################ MyPlotTT<-function(c1,c2){ lod=analSet$tt$p.log; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); ttestgraph=plot(lod, ylab="-log10(p)", xlab=GetVariableLabel(), main=analSet$tt$tt.nm, type="n"); grid(); red.inx<- which(analSet$tt$inx.imp); blue.inx <- which(!analSet$tt$inx.imp); points(red.inx, lod[red.inx], bg=c1, cex=1.2, pch=21); points(blue.inx, lod[blue.inx], bg=c2, pch=21); abline (h=analSet$tt$thresh, lty=3); axis(4); return(ttestgraph) #dev.off(); } ################################## ############ T-test ############## ################################## MyTTTable <-function(){ m=cbind(row.names(analSet$tt$sig.mat),analSet$tt$sig.mat) colnames(m) <- c("Name","t.stat","p.value","-log10(p)","FDR") return(m) } ############################################## #Interval plot methods of individual compound ################################################ IntervalPlot<-function(cmpdNm, dpi=200, colors, calc){ cmpDat = split(dataSet$norm[,cmpdNm], dataSet$cls) cmpName = colnames(dataSet$norm[cmpdNm]) numOfLvl = length(levels(dataSet$cls)) groups=c() for(i in 1:numOfLvl){ groups[i]=levels(dataSet$cls)[i] } N=c() for(j in 1:numOfLvl){ N[j]=as.numeric(summary(dataSet$cls)[j]) } means=c() for(k in 1:numOfLvl){ means[k] = colMeans(as.data.frame(cmpDat[k],StringsAsFactors = FALSE)) } sd=c() for(l in 1:numOfLvl){ sd[l]= sapply((as.data.frame(cmpDat[l],StringsAsFactors = FALSE)), sd) } se=c() for(m in 1:numOfLvl){ se[m]= sd[m]/sqrt(as.numeric(summary(dataSet$cls)[m])) } ##### needs to check to check with other dataset ci=c() for (n in 1:numOfLvl){ ci[n]=qt( .95/2 + .5, as.numeric(summary(dataSet$cls)[n])-1) ci[n] = se[n]*ci[n] } if(calc=="se"){ dfp = data.frame(groups,N, means,sd,se,ci) SE.up = as.numeric(dfp$means)+as.numeric(dfp$se) SE.dn = as.numeric(dfp$means)-as.numeric(dfp$se) imgName <- gsub("\\/", "_", cmpdNm); imgName <- paste(imgName, "_dpi", dpi, ".", "png", sep=""); plot=ggplot(dfp, aes(x=dfp$groups, y=dfp$means, group=dfp$groups, color=dfp$groups))+ theme_bw()+ theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(), axis.line = element_line(size = 0.3,colour = "black"), axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+ geom_errorbar(aes(ymin=SE.dn, ymax=SE.up), width=.2) + geom_point(size=3.5) + scale_colour_manual(name = "Groups",values=colors) + xlab(" ") + ylab(" ") + ggtitle(cmpName) + list() } else if(calc=="sd"){ dfp = data.frame(groups,N, means,sd,se,ci) SD.up = as.numeric(dfp$means)+as.numeric(dfp$sd) SD.dn = as.numeric(dfp$means)-as.numeric(dfp$sd) imgName <- gsub("\\/", "_", cmpdNm); imgName <- paste(imgName, "_dpi", dpi, ".", "png", sep=""); plot=ggplot(dfp, aes(x=dfp$groups, y=dfp$means, group=dfp$groups, color=dfp$groups))+ theme_bw()+ theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(), axis.line = element_line(size = 0.3,colour = "black"), axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+ geom_errorbar(aes(ymin=SD.dn, ymax=SD.up), width=.2) + geom_point(size=3.5) + scale_colour_manual(name = "Groups",values=colors) + xlab(" ") + ylab(" ") + ggtitle(cmpName) + list() } return(plot) #ggsave(plot = plot, imgName, h = 9/3, w = 16/3, dpi=dpi, type = "cairo-png") } #################################################################### ###################OPLSDA 2D scrore plot############################ #################################################################### PlotOPLSDA2DScore <- function(imgName, format="png", dpi=72, width=NA, pcx, ocy, reg = 0.95, show=1, grey.scale = 0){ xlabel = paste("PC",1, "(", round(100*exoplsda$modelDF[1,1],1), "%)"); ylabel = paste("OC",1, "(", round(100*exoplsda$modelDF[2,1],1), "%)"); pc1 = exoplsda$scoreMN[, pcx]; pc2 = exoplsda$orthoScoreMN[, ocy]; text.lbls<-substr(names(pc1),1,14) # some names may be too long imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ imgSet$pca.score2d<<-imgName; w <- 7.2; }else{ w <- width; } h <- w; Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); suppressMessages(require('ellipse')); op<-par(mar=c(5,5,3,3)); if(dataSet$cls.type == "disc"){ # obtain ellipse points to the scatter plot for each category lvs <- levels(dataSet$cls); pts.array <- array(0, dim=c(100,2,length(lvs))); for(i in 1:length(lvs)){ inx <-dataSet$cls == lvs[i]; groupVar<-var(cbind(pc1[inx],pc2[inx]), na.rm=T); groupMean<-cbind(mean(pc1[inx], na.rm=T),mean(pc2[inx], na.rm=T)); pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100); } xrg <- range (pc1, pts.array[,1,]); yrg <- range (pc2, pts.array[,2,]); x.ext<-(xrg[2]-xrg[1])/12; y.ext<-(yrg[2]-yrg[1])/12; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); cols <- GetColorSchema(grey.scale==1); uniq.cols <- unique(cols); plot(pc1, pc2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="OPLSDA Scores Plot", color=cols, pch=as.numeric(dataSet$cls)+1); ## added grid(col = "lightgray", lty = "dotted", lwd = 1); # make sure name and number of the same order DO NOT USE levels, which may be different legend.nm <- unique(as.character(dataSet$cls)); ## uniq.cols <- unique(cols); ## BHAN: when same color is choosen; it makes an error if ( length(uniq.cols) > 1 ) { names(uniq.cols) <- legend.nm; } # draw ellipse for(i in 1:length(lvs)){ if (length(uniq.cols) > 1) { polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA); } else { polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA); } if(grey.scale) { lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2); } } pchs <- GetShapeSchema(show, grey.scale); if(grey.scale) { cols <- rep("black", length(cols)); } if(show == 1){ text(pc1, pc2, label=text.lbls, pos=4, xpd=T, cex=0.75); points(pc1, pc2, pch=pchs, col=cols); }else{ if(length(uniq.cols) == 1){ points(pc1, pc2, pch=pchs, col=cols, cex=1.0); }else{ if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){ points(pc1, pc2, pch=pchs, col=cols, cex=1.8); }else{ points(pc1, pc2, pch=21, bg=cols, cex=2); } } } uniq.pchs <- unique(pchs); if(grey.scale) { uniq.cols <- "black"; } legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols); }else{ plot(pc1, pc2, xlab=xlabel, ylab=ylabel, type='n', main="Scores Plot"); points(pc1, pc2, pch=15, col="magenta"); text(pc1, pc2, label=text.lbls, pos=4, col ="blue", xpd=T, cex=0.8); } par(op); #dev.off(); } ########################################################## ################ 3Dplot for PCA ######################### ########################################################## Graphs3DPCA <- function(inx1=1, inx2=2, inx3=3, pointSize=0.2, transparency=0.1, Title="PCA PLOT", grd=TRUE, ell=TRUE, group.col) { x1=analSet$pca$x[,inx1] y1=analSet$pca$x[,inx2] z1=analSet$pca$x[,inx3] numOfLvl <-length(levels(dataSet$cls)) grouplabels=c() for(i in 1:numOfLvl){ grouplabels[i]=levels(dataSet$cls)[i] } pchs <- as.numeric(dataSet$cls)+1; uniq.pchs <- unique(pchs); legend.nm <- unique(as.character(dataSet$cls)) #open3d() # par3d(windowRect = c(216,30, 958, 695)) groups <- dataSet$cls levs <-levels(groups) xlabel = paste("PC",inx1, "(", round(100*analSet$pca$variance[inx1],1), "%)") ylabel = paste("PC",inx2, "(", round(100*analSet$pca$variance[inx2],1), "%)") zlabel = paste("PC",inx3, "(", round(100*analSet$pca$variance[inx3],1), "%)") plot3d(x1, y1, z1, xlab = xlabel, ylab=ylabel, zlab=zlabel, col=group.col[as.numeric(groups)], size=pointSize, type='s'); if(ell==TRUE){ for (i in 1:length(levs)) { group <- levs[i] selected <- groups == group xx <- x1[selected]; yy <- y1[selected]; zz <- z1[selected]; ellips <- ellipse3d(cov(cbind(xx,yy,zz)), centre=c(mean(xx), mean(yy), mean(zz)), level = 0.95) #or use shade3d plot3d(ellips, col = group.col[i], add=TRUE, alpha = transparency) # show group labels # texts3d(mean(xx),mean(yy), mean(zz), text = group, # col= group.col[i], cex = 2) } } decorate3d(main=Title, box=FALSE) legend3d("topright", legend = paste(grouplabels), pch=16, col = group.col, cex=1, inset=c(0.02)) #rgl.spheres(x1, y1, z1, r = pointSize, # color = group.col[as.numeric(groups)]) if(grd==TRUE){ grid3d(side=c("x","y","z"), at = NULL, col = "gray", lwd = 1, lty = 1, n = 5) } aspect3d(1,1,1) rgl.material(color = "blue") # rglwidget() # dev.off(); } ########################################################## ################ 3Dplot for PLSDA ####################### ########################################################## Graphs3DPLSDA <- function(inx1=1, inx2=2, inx3=3, pointSize=0.2, transparency=0.1, Title="PLSDA 3D PLOT", grd=TRUE, ell=TRUE, group.col) { x2=analSet$plsr$score[,inx1] y2=analSet$plsr$score[,inx2] z2=analSet$plsr$score[,inx3] numOfLvl <-length(levels(dataSet$cls)) grouplabels=c() for(i in 1:numOfLvl){ grouplabels[i]=levels(dataSet$cls)[i] } pchs <- as.numeric(dataSet$cls)+1; uniq.pchs <- unique(pchs); legend.nm <- unique(as.character(dataSet$cls)) #Viewing Window Size #open3d() #par3d(windowRect = c(216,30, 958, 695)) groups <- dataSet$cls levs <-levels(groups) xlabel <- paste("Component", inx1, "(", round(100*analSet$plsr$Xvar[inx1]/analSet$plsr$Xtotvar,1), "%)"); ylabel <- paste("Component", inx2, "(", round(100*analSet$plsr$Xvar[inx2]/analSet$plsr$Xtotvar,1), "%)"); zlabel <- paste("Component", inx3, "(", round(100*analSet$plsr$Xvar[inx3]/analSet$plsr$Xtotvar,1), "%)"); plot3d(x2, y2, z2, xlab = xlabel, ylab=ylabel, zlab=zlabel, col=group.col[as.numeric(groups)], size=pointSize, type='s'); if(ell==TRUE){ for (i in 1:length(levs)) { group <- levs[i] selected <- groups == group xx <- x2[selected]; yy <- y2[selected]; zz <- z2[selected]; ellips <- ellipse3d(cov(cbind(xx,yy,zz)), centre=c(mean(xx), mean(yy), mean(zz)), level = 0.95) #or use shade3d plot3d(ellips, col = group.col[i], add=TRUE, alpha = transparency) # show group labels # texts3d(mean(xx),mean(yy), mean(zz), text = group, # col= group.col[i], cex = 2) } } decorate3d(main=Title, box=FALSE) legend3d("topright", legend = paste(grouplabels), pch=16, col = group.col, cex=1, inset=c(0.02)) # rgl.spheres(x2, y2, z2, r = pointSize, # color = group.col[as.numeric(groups)]) if(grd==TRUE){ grid3d(side=c("x","y","z"), at = NULL, col = "gray", lwd = 1, lty = 1, n = 5) } aspect3d(1,1,1) rgl.material(color = "blue") #dev.off(); } ############################################################################### ############################### Cmpd plot ##################################### ############################################################################### PlotCmpdBoxView<-function(cmpdNm, dpi=200, colr){ imgName <- gsub("\\/", "_", cmpdNm); imgName <- paste(imgName, "_dpi", dpi, ".", sep=""); par(mar=c(4,3,1,2), oma=c(0,0,1,0)); bplot=boxplot(dataSet$norm[, cmpdNm]~dataSet$cls,las=2, col= colr); title(main=cmpdNm, out=T); #dev.off(); return(bplot); } ################################################################################# ############################## FC Plot ########################################## ################################################################################# MyPlotFC<-function(imgName="Fold Change Plot", format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$fc<<-imgName; }else{ w <- width; } h <- w*6/8; par(mar=c(5,5,2,3)); fc = analSet$fc; if(fc$paired){ ylim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2); xlim<-c(0, ncol(dataSet$norm)); plot(NULL, xlim=xlim, ylim=ylim, xlab = GetVariableLabel(), ylab=paste("Count with FC >=", fc$max.thresh, "or <=", fc$min.thresh)); for(i in 1:ncol(fc$fc.all)){ segments(i,0, i, fc$fc.all[1,i], col= ifelse(fc$inx.up[i],"magenta", "darkgrey"), lwd= ifelse(fc$inx.up[i], 2, 1)); segments(i,0, i, -fc$fc.all[2,i], col= ifelse(fc$inx.down[i], "magenta", "darkgrey"), lwd= ifelse(fc$inx.down[i], 2, 1)); } abline(h=fc$max.thresh, lty=3); abline(h=fc$min.thresh, lty=3); abline(h=0, lwd=1); }else{ if(fc$raw.thresh > 0){ # be symmetrical topVal <- max(abs(fc$fc.log)); ylim <- c(-topVal, topVal); plot(fc$fc.log, ylab="Log2 (FC)", ylim = ylim, xlab = GetVariableLabel(), pch=19, axes=F, col= ifelse(fc$inx.imp, "magenta", "darkgrey")); axis(2); axis(4); # added by Beomsoo abline(h=log(fc$max.thresh,2), lty=3); abline(h=log(fc$min.thresh,2), lty=3); abline(h=0, lwd=1); }else{ # plot side by side dat1 <- dataSet$norm[as.numeric(dataSet$cls) == 1, ]; dat2 <- dataSet$norm[as.numeric(dataSet$cls) == 2, ]; mns1 <- apply(dat1, 2, mean); mn1 <- mean(mns1); sd1 <- sd(mns1); msd1.top <- mn1 + 2*sd1; msd1.low <- mn1 - 2*sd1; mns2 <- apply(dat2, 2, mean); mn2 <- mean(mns2); sd2 <- sd(mns2); msd2.top <- mn2 + 2*sd2; msd2.low <- mn2 - 2*sd2; ylims <- range(c(mns1, mns2, msd1.top, msd2.top, msd1.low, msd2.low)); new.mns <- c(mns1, rep(NA, 5), mns2); cols <- c(rep("magenta", length(mns1)), rep(NA, 5), rep("blue", length(mns2))); pchs <- c(rep(15, length(mns1)), rep(NA, 5), rep(19, length(mns2))); plot(new.mns, ylim=ylims, pch = pchs, col = cols, cex = 1.25, axes=F, ylab=""); axis(2); axis(4); # added by Beomsoo abline(h=mn1, col="magenta", lty=3, lwd=2); abline(h=msd1.low, col="magenta", lty=3, lwd=1); abline(h=msd1.top, col="magenta", lty=3, lwd=1); abline(h=mn2, col="blue", lty=3, lwd=2); abline(h=msd2.low, col="blue", lty=3, lwd=1); abline(h=msd2.top, col="blue", lty=3, lwd=1); # abline(h=mean(all.mns), col="darkgrey", lty=3); axis(1, at=1:length(new.mns), labels=c(1:length(mns1),rep(NA, 5),1:length(mns2))); } } #dev.off(); } ################################################################################# ############################## FC Table ########################################## ################################################################################# MyFCTable <-function(){ m=cbind(row.names(analSet$fc$sig.mat),analSet$fc$sig.mat) colnames(m) <- c("Name","Fold Change","Log2(FC)") return(m) } ########################################################################################################### ############################################# my volcano plot ############################################# ########################################################################################################### MyPlotVolcano2<-function(imgName="Volcano Plot", format="png", dpi=72, width=NA){ vcn<-analSet$volcano; MyGray <- rgb(t(col2rgb("black")), alpha=40, maxColorValue=255); MyHighlight <- rgb(t(col2rgb("magenta")), alpha=80, maxColorValue=255); if(vcn$paired){ xlim<-c(-nrow(dataSet$norm)/2, nrow(dataSet$norm)/2)*1.2; # merge fc.all two rows into one, bigger one win fc.all <- apply(vcn$fc.all, 2, function(x){ if(x[1] > x[2]){return(x[1])}else{return(-x[2])}}) hit.inx <- vcn$inx.p & (vcn$inx.up | vcn$inx.down); plot(fc.all, vcn$p.log, xlim=xlim, pch=20, cex=ifelse(hit.inx, 1.2, 0.8), col = ifelse(hit.inx, MyHighlight, MyGray), xlab="Count of Significant Pairs", ylab="-log10(p)"); sig.upInx <- vcn$inx.p & vcn$inx.up; p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.up; fc.rtInx <- GetTopInx(vcn$fc.all[1,], 5, T); lblInx <- p.topInx & sig.upInx & fc.rtInx; if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(vcn$fc.all[1,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8); } sig.dnInx <- vcn$inx.p & vcn$inx.down; p.topInx <- GetTopInx(vcn$p.log, 5, T) & vcn$inx.down; fc.leftInx <- GetTopInx(vcn$fc.all[2,], 5, T) & vcn$inx.down; lblInx <-p.topInx & sig.dnInx & fc.leftInx; if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(-vcn$fc.all[2,lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8); } }else{ imp.inx<-(vcn$inx.up | vcn$inx.down) & vcn$inx.p; plot(vcn$fc.log, vcn$p.log, pch=20, cex=ifelse(imp.inx, 1.2, 0.7), col = ifelse(imp.inx, "Red", "blue"), xlab="log2 (FC)", ylab="-log10(p)"); grid(); sig.inx <- imp.inx; p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.down); fc.leftInx <- GetTopInx(vcn$fc.log, 5, F); lblInx <- sig.inx & (p.topInx | fc.leftInx); if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=2, col="blue", srt=-30, xpd=T, cex=0.8); } p.topInx <- GetTopInx(vcn$p.log, 5, T) & (vcn$inx.up); fc.rtInx <- GetTopInx(vcn$fc.log, 5, T); lblInx <- sig.inx & (p.topInx | fc.rtInx); if(sum(lblInx, na.rm=T) > 0){ text.lbls<-substr(colnames(dataSet$norm)[lblInx],1,14) # some names may be too long text(vcn$fc.log[lblInx], vcn$p.log[lblInx],labels=text.lbls, pos=4, col="blue", srt=30, xpd=T, cex=0.8); } } abline (v = vcn$max.xthresh, lty=3); abline (v = vcn$min.xthresh, lty=3); abline (h = vcn$thresh.y, lty=3); axis(4); # added by Beomsoo } ########################################################################################################### ############################################# volcano Table ############################################# ########################################################################################################### MyVOLTable <-function(){ m=cbind(row.names(analSet$volcano$sig.mat),analSet$volcano$sig.mat) colnames(m) <- c("Name","FC","log2(FC)","p.value","-log10(p)") return(m) } ################################################################################################################## ######################################### CorrHeatMap ############################################################ ################################################################################################################## MyPlotCorrHeatMap<-function(imgName, format="png", dpi=72, width=NA, cor.method, colors, viewOpt, fix.col, no.clst, top, topNum){ main <- xlab <- ylab <- NULL; data <- dataSet$norm; if(ncol(data) > 1000){ filter.val <- apply(data.matrix(data), 2, IQR, na.rm=T); rk <- rank(-filter.val, ties.method='random'); data <- as.data.frame(data[,rk <=1000]); print("Data is reduced to 1000 vars .."); } colnames(data)<-substr(colnames(data), 1, 18); corr.mat<-cor(data, method=cor.method); # use total abs(correlation) to select if(top){ cor.sum <- apply(abs(corr.mat), 1, sum); cor.rk <- rank(-cor.sum); var.sel <- cor.rk <= topNum; corr.mat <- corr.mat[var.sel, var.sel]; } # set up parameter for heatmap suppressMessages(require(RColorBrewer)); suppressMessages(require(gplots)); if(colors=="gbr"){ colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256); }else if (colors=="wnvyb"){ colors <- colorRampPalette(c("white","navyblue"), space="Lab")(256); }else if (colors=="rwg"){ colors <- colorRampPalette(c("red", "white", "green"), space="rgb")(256); }else if (colors=="rwb"){ colors <- colorRampPalette(c("red","white","blue"),space="rgb")(256); }else if(colors == "heat"){ colors <- heat.colors(256); }else if(colors == "topo"){ colors <- topo.colors(256); }else if(colors == "gray"){ colors <- colorRampPalette(c("grey90", "grey10"))(256); }else{ colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256)); } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(viewOpt == "overview"){ if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$heatmap<<-imgName; }else{ w <- 7.2; } h <- w; }else{ if(ncol(corr.mat) > 50){ myH <- ncol(corr.mat)*12 + 40; }else if(ncol(corr.mat) > 20){ myH <- ncol(corr.mat)*12 + 60; }else{ myH <- ncol(corr.mat)*12 + 120; } h <- round(myH/72,2); if(is.na(width)){ w <- h; }else if(width == 0){ w <- h <- 7.2; imgSet$corr.heatmap<<-imgName; }else{ w <- h <- 7.2; } } #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(no.clst){ rowv=FALSE; colv=FALSE; dendro= "none"; }else{ rowv=TRUE; colv=TRUE; dendro= "both"; } require(pheatmap); if(fix.col){ breaks <- seq(from = -1, to = 1, length = 257); pheatmap(corr.mat, fontsize=8, fontsize_row=8, cluster_rows = colv, cluster_cols = rowv, color = colors, breaks = breaks ); }else{ pheatmap(corr.mat, fontsize=8, fontsize_row=8, cluster_rows = colv, cluster_cols = rowv, color = colors ); } #dev.off(); write.csv(signif(corr.mat,5), file="correlation_table.csv") } ###################################################################################### ###############################correlation plot for pattern search ################### ###################################################################################### MyPlotCorr <- function(imgName="corr2", format="png", dpi=72, width=NA){ cor.res <- analSet$corr$cor.mat; pattern <- analSet$corr$pattern; title <- paste(GetVariableLabel(), "correlated with the", pattern); if(nrow(cor.res) > 25){ # first get most signficant ones (p value) ord.inx<-order(cor.res[,3]); cor.res <- cor.res[ord.inx, ]; cor.res <- cor.res[1:25, ]; # then order by their direction (correlation) ord.inx<-order(cor.res[,1]); if(sum(cor.res[,1] > 0) == 0){ # all negative correlation ord.inx <- rev(ord.inx); } cor.res <- cor.res[ord.inx, ]; title <- paste("Top 25", tolower(GetVariableLabel()), "correlated with the", pattern); } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- h <- 7.2; }else if(width == 0){ w <- 7.2; imgSet$corr<<-imgName; }else{ w <- h <- width; } # Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,6,4,3)) rownames(cor.res)<-substr(rownames(cor.res), 1, 18); cols <- ifelse(cor.res[,1] >0, "mistyrose","lightblue"); dotchart(cor.res[,1], pch="", xlim=c(-1,1), xlab="Correlation coefficients", main=title); rownames(cor.res) <- NULL; barplot(cor.res[,1], space=c(0.5, rep(0, nrow(cor.res)-1)), xlim=c(-1,1), xaxt="n", col = cols, add=T,horiz=T); #dev.off(); } ###################################################################################### ###############################correlation Table #################################### ###################################################################################### MyCORRTable <-function(){ m=cbind(row.names(analSet$corr$cor.mat),analSet$corr$cor.mat) colnames(m)[1] <- c("Name") return(m) } ############################################################################################## #################################### PCA Summry Plot ######################################### ############################################################################################## MyPlotPCAPairSummary<-function(imgName="PCA Summary", format="png", dpi=72, width=NA, pc.num){ pclabels <- paste("PC", 1:pc.num, "\n", round(100*analSet$pca$variance[1:pc.num],1), "%"); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10; }else if(width == 0){ w <- 8; imgSet$pca.pair <<- imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(dataSet$cls.type == "disc"){ pairs(analSet$pca$x[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels); }else{ pairs(analSet$pca$x[,1:pc.num], labels=pclabels); } #dev.off(); } ##################################################################################################### ###################################################### Scree Plot ################################### ##################################################################################################### MyPlotPCAScree<-function(imgName="PCA Scree Plot", format="png", dpi=72, width=NA, scree.num){ stds <-analSet$pca$std[1:scree.num]; pcvars<-analSet$pca$variance[1:scree.num]; cumvars<-analSet$pca$cum.var[1:scree.num]; ylims <- range(c(pcvars,cumvars)); extd<-(ylims[2]-ylims[1])/10 miny<- ifelse(ylims[1]-extd>0, ylims[1]-extd, 0); maxy<- ifelse(ylims[2]+extd>1, 1.0, ylims[2]+extd); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10; }else if(width == 0){ w <- 8; imgSet$pca.scree<<-imgName; }else{ w <- width; } h <- w*2/3; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,6,3)); plot(pcvars, type='l', col='blue', main='Scree plot', xlab='PC index', ylab='Variance explained', ylim=c(miny, maxy), axes=F) text(pcvars, labels =paste(100*round(pcvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T) points(pcvars, col='red'); lines(cumvars, type='l', col='green') text(cumvars, labels =paste(100*round(cumvars,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T) points(cumvars, col='red'); abline(v=1:scree.num, lty=3); axis(2); axis(1, 1:length(pcvars), 1:length(pcvars)); #dev.off(); } ################################################################################################## ######################################### 2D PCA ################################################# ################################################################################################## MyPlotPCA2DScore <- function(imgName="2D PCA PLOT", format="png", dpi=72, width=NA, pcx, pcy, reg = 0.95, show=1, grey.scale = 0){ xlabel = paste("PC",pcx, "(", round(100*analSet$pca$variance[pcx],1), "%)"); ylabel = paste("PC",pcy, "(", round(100*analSet$pca$variance[pcy],1), "%)"); pc1 = analSet$pca$x[, pcx]; pc2 = analSet$pca$x[, pcy]; text.lbls<-substr(names(pc1),1,14) # some names may be too long imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ imgSet$pca.score2d<<-imgName; w <- 7.2; }else{ w <- width; } h <- w; # Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); suppressMessages(require('ellipse')); op<-par(mar=c(5,5,3,3)); if(dataSet$cls.type == "disc"){ # obtain ellipse points to the scatter plot for each category lvs <- levels(dataSet$cls); pts.array <- array(0, dim=c(100,2,length(lvs))); for(i in 1:length(lvs)){ inx <-dataSet$cls == lvs[i]; groupVar<-var(cbind(pc1[inx],pc2[inx]), na.rm=T); groupMean<-cbind(mean(pc1[inx], na.rm=T),mean(pc2[inx], na.rm=T)); pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100); } xrg <- range (pc1, pts.array[,1,]); yrg <- range (pc2, pts.array[,2,]); x.ext<-(xrg[2]-xrg[1])/12; y.ext<-(yrg[2]-yrg[1])/12; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); cols <- GetColorSchema(grey.scale==1); uniq.cols <- unique(cols); plot(pc1, pc2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot", color=cols, pch=as.numeric(dataSet$cls)+1); ## added grid(col = "lightgray", lty = "dotted", lwd = 1); # make sure name and number of the same order DO NOT USE levels, which may be different legend.nm <- unique(as.character(dataSet$cls)); ## uniq.cols <- unique(cols); ## BHAN: when same color is choosen; it makes an error if ( length(uniq.cols) > 1 ) { names(uniq.cols) <- legend.nm; } # draw ellipse for(i in 1:length(lvs)){ if (length(uniq.cols) > 1) { polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA); } else { polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA); } if(grey.scale) { lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2); } } pchs <- GetShapeSchema(show, grey.scale); if(grey.scale) { cols <- rep("black", length(cols)); } if(show == 1){ text(pc1, pc2, label=text.lbls, pos=4, xpd=T, cex=0.75); points(pc1, pc2, pch=pchs, col=cols); }else{ if(length(uniq.cols) == 1){ points(pc1, pc2, pch=pchs, col=cols, cex=1.0); }else{ if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){ points(pc1, pc2, pch=pchs, col=cols, cex=1.8); }else{ points(pc1, pc2, pch=21, bg=cols, cex=2); } } } uniq.pchs <- unique(pchs); if(grey.scale) { uniq.cols <- "black"; } legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols); }else{ plot(pc1, pc2, xlab=xlabel, ylab=ylabel, type='n', main="Scores Plot"); points(pc1, pc2, pch=15, col="magenta"); text(pc1, pc2, label=text.lbls, pos=4, col ="blue", xpd=T, cex=0.8); } par(op); #dev.off(); } ################################################################################################## ######################################### PCA Loading Plot ####################################### ################################################################################################## # plot PCA loadings and also set up the matrix for display MyPlotPCALoading<-function(imgName="Loading Plot", format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){ loadings<-signif(as.matrix(cbind(analSet$pca$rotation[,inx1],analSet$pca$rotation[,inx2])),5); ldName1<-paste("Loadings", inx1); ldName2<-paste("Loadings", inx2); colnames(loadings)<-c(ldName1, ldName2); load.x.uniq <- jitter(loadings[,1]); names(load.x.uniq) <- rownames(loadings); analSet$pca$load.x.uniq <<- load.x.uniq; analSet$pca$imp.loads<<-loadings; # set up the loading matrix imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pca.loading<<-imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(plotType=="scatter"){ #par(mar=c(6,5,2,6)); plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2); pca.axis.lims <<- par("usr"); # x1, x2, y1 ,y2 grid(col = "lightgray", lty = "dotted", lwd = 1); points(loadings[,1],loadings[,2], pch=19, col="blue"); if(lbl.feat > 0){ text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T); } }else{ # barplot layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T), respect = FALSE) cmpd.nms <- substr(rownames(loadings), 1, 14); hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2])); par(mar=c(1,4,4,1)); barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main =ldName1); par(mar=c(10,4,3,1)); barplot(loadings[,2], names.arg=cmpd.nms, las=2, cex.names=1.0, ylim=hlims, main =ldName2); } #dev.off(); } ################################################################################################## ################################ PCA loading table ############################################## ################################################################################################## MyLOADTable <- function(x,y){ loadings<-signif(as.matrix(cbind(analSet$pca$rotation[,x],analSet$pca$rotation[,y])),5); ldName1<-paste("Loadings", x); ldName2<-paste("Loadings", y); colnames(loadings)<-c(ldName1, ldName2); m=cbind(row.names(loadings),loadings) colnames(m)[1] <- c("Name") return(m) } ################################################################################################## ######################################### Biplot ################################################# ################################################################################################## MyPlotPCABiplot<-function(imgName="biplot", format="png", dpi=72, width=NA, inx1, inx2){ choices = c(inx1, inx2); scores<-analSet$pca$x; lam <- analSet$pca$sdev[choices] n <- NROW(scores) lam <- lam * sqrt(n); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pca.biplot<<-imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); biplot(t(t(scores[, choices]) / lam), t(t(analSet$pca$rotation[, choices]) * lam), xpd =T, cex=0.9); #dev.off(); } ################################################################################################# ####################################### PLSDA Summary plot ###################################### ################################################################################################# MyPlotPLSPairSummary<-function(imgName="PLSDA Summary Plot", format="png", dpi=72, width=NA, pc.num){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pls.pair <<- imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); pclabels <- paste("Component", 1:pc.num, "\n", round(100*analSet$plsr$Xvar[1:pc.num]/analSet$plsr$Xtotvar,1), "%"); # pairs(analSet$plsr$scores[,1:pc.num], col=as.numeric(dataSet$cls)+1, pch=as.numeric(dataSet$cls)+1, labels=pclabels) pairs(analSet$plsr$scores[,1:pc.num], col=GetColorSchema(), pch=as.numeric(dataSet$cls)+1, labels=pclabels) #dev.off(); } ################################################################################################## ################################ PLSDA 2D plot ################################################### ################################################################################################## MyPlotPLS2DScore<-function(imgName="PLSDA2D Plot", format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){ suppressMessages(require('ellipse')); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pls.score2d<<-imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,3,3)); lv1 <- analSet$plsr$scores[,inx1]; lv2 <- analSet$plsr$scores[,inx2]; xlabel <- paste("Component", inx1, "(", round(100*analSet$plsr$Xvar[inx1]/analSet$plsr$Xtotvar,1), "%)"); ylabel <- paste("Component", inx2, "(", round(100*analSet$plsr$Xvar[inx2]/analSet$plsr$Xtotvar,1), "%)"); text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long # obtain ellipse points to the scatter plot for each category lvs <- levels(dataSet$cls); pts.array <- array(0, dim=c(100,2,length(lvs))); for(i in 1:length(lvs)){ inx <-dataSet$cls == lvs[i]; groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T); groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T)); pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100); } xrg <- range (lv1, pts.array[,1,]); yrg <- range (lv2, pts.array[,2,]); x.ext<-(xrg[2]-xrg[1])/12; y.ext<-(yrg[2]-yrg[1])/12; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); ## cols = as.numeric(dataSet$cls)+1; cols <- GetColorSchema(grey.scale==1); uniq.cols <- unique(cols); plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot"); grid(col = "lightgray", lty = "dotted", lwd = 1); # make sure name and number of the same order DO NOT USE levels, which may be different legend.nm <- unique(as.character(dataSet$cls)); ## uniq.cols <- unique(cols); ## BHAN: when same color is choosen for black/white; it makes an error # names(uniq.cols) <- legend.nm; if ( length(uniq.cols) > 1 ) { names(uniq.cols) <- legend.nm; } # draw ellipse for(i in 1:length(lvs)){ if ( length(uniq.cols) > 1) { polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA); } else { polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA); } if(grey.scale) { lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2); } } pchs <- GetShapeSchema(show, grey.scale); if(grey.scale) { cols <- rep("black", length(cols)); } if(show==1){ # display sample name set on text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75); points(lv1, lv2, pch=pchs, col=cols); }else{ if (length(uniq.cols) == 1) { points(lv1, lv2, pch=pchs, col=cols, cex=1.0); } else { if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){ points(lv1, lv2, pch=pchs, col=cols, cex=1.8); }else{ points(lv1, lv2, pch=21, bg=cols, cex=2); } } } uniq.pchs <- unique(pchs); if(grey.scale) { uniq.cols <- "black"; } legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols); #dev.off(); } ################################################################################################## ################################ PLSDA loading plot ############################################## ################################################################################################## MyPlotPLSLoading<-function(imgName="PLSDA Loading plot", format="png", dpi=72, width=NA, inx1, inx2, plotType, lbl.feat=1){ # named vector load1<-analSet$plsr$loadings[,inx1]; load2<-analSet$plsr$loadings[,inx2]; loadings = signif(as.matrix(cbind(load1, load2)),5); ldName1<-paste("Loadings", inx1); ldName2<-paste("Loadings", inx2) colnames(loadings)<-c(ldName1, ldName2); load.x.uniq <- jitter(loadings[,1]); names(load.x.uniq) <- rownames(loadings); analSet$plsr$load.x.uniq <<- load.x.uniq; analSet$plsr$imp.loads<<-loadings; # set up loading matrix imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$pls.loading<<-imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(plotType == "scatter"){ par(mar=c(6,4,4,5)); plot(loadings[,1],loadings[,2], las=2, xlab=ldName1, ylab=ldName2); pls.axis.lims <<- par("usr"); # x1, x2, y1 ,y2 grid(col = "lightgray", lty = "dotted", lwd = 1); points(loadings[,1],loadings[,2], pch=19, col="magenta"); if(lbl.feat > 0){ text(loadings[,1],loadings[,2], labels=substr(rownames(loadings), 1, 12), pos=4, col="blue", xpd=T); } }else{ # barplot cmpd.nms <- substr(rownames(loadings), 1, 14); hlims <- c(min(loadings[,1], loadings[,2]), max(loadings[,1], loadings[,2])); layout(matrix(c(1,1,2,2,2), nrow=5, byrow=T)) par(mar=c(1,4,4,1)); barplot(loadings[,1], names.arg=NA, las=2, ylim=hlims, main = ldName1); par(mar=c(10,4,3,1)); barplot(loadings[,2], names.arg=cmpd.nms, cex.names=1.0, las=2, ylim=hlims, main = ldName2); } #dev.off(); } ############################################################################################## ################################ PLSDA Loading Table ######################################### ############################################################################################## MyLOADTable2 <- function(inx,iny){ loadings<-signif(as.matrix(cbind(analSet$plsr$loadings[,inx],analSet$plsr$loadings[,iny])),5); ldName1<-paste("Loadings", inx); ldName2<-paste("Loadings", iny); colnames(loadings)<-c(ldName1, ldName2); m=cbind(row.names(loadings),loadings) colnames(m)[1] <- c("Name") return(m) } ################################################################################################## ################################ PLSDA CV Plot ################################################### ################################################################################################## MyPlotPLS.Classification<-function(imgName="PLSCV", format="png", dpi=200, width=NA){ res<-analSet$plsda$fit.info; colnames(res) <- 1:ncol(res); best.num <- analSet$plsda$best.num; choice <- analSet$plsda$choice; imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 7; }else if(width == 0){ w <- 7; imgSet$pls.class<<-imgName; }else{ w <- width; } h <- w*5/7; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,2,7)); # put legend on the right outside barplot(res, beside = TRUE, col = c("lightblue", "mistyrose","lightcyan"), ylim= c(0,1.05), xlab="Number of components", ylab="Performance"); if(choice == "Q2"){ text((best.num-1)*3 + best.num + 2.5, res[3,best.num]+ 0.02, labels = "*", cex=2.5, col="red"); }else if(choice == "R2"){ text((best.num-1)*3 + best.num + 1.5, res[2,best.num]+ 0.02, labels = "*", cex=2.5, col="red"); }else{ text((best.num-1)*3 + best.num + 0.5, res[1,best.num]+ 0.02, labels = "*", cex=2.5, col="red"); } # calculate the maximum y position, each bar is 1, place one space between the group xpos <- ncol(res)*3 + ncol(res) + 1; legend(xpos, 1.0, rownames(res), fill = c("lightblue", "mistyrose","lightcyan"), xpd=T); #dev.off(); } ################################################################################################## ################################ PLSDA CV Table ################################################### ################################################################################################## MyCVTable <- function(){ m=cbind(row.names(signif(analSet$plsda$fit.info, 5)),signif(analSet$plsda$fit.info, 5)) colnames(m)[1] <- c(" ") return(m) } ################################################################################################## ################################ PLSDA Imp Plot ################################################## ################################################################################################## MyPlotPLS.Imp<-function(imgName="PLSDA Imp", format="png", dpi=72, width=NA, type, feat.nm, feat.num, color.BW=FALSE){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$pls.imp<<-imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(type=="vip"){ analSet$plsda$imp.type<<-"vip"; vips<-analSet$plsda$vip.mat[,feat.nm]; PlotImpVar(vips, "VIP scores", feat.num, color.BW); }else{ analSet$plsda$imp.type<<-"coef"; data<-analSet$plsda$coef.mat[,feat.nm]; PlotImpVar(data, "Coefficients", feat.num, color.BW); } #dev.off(); } ################################################################################################## ################################ PLSDA Imp table ################################################## ################################################################################################## VIPTab <-function(){ m=signif(as.matrix(analSet$plsda$vip.mat),5) c=cbind(row.names(m),m) colnames(c)[1] <- c("Name") return(c) } COEFTab <- function(){ m=signif(as.matrix(analSet$plsda$coef.mat),5) c=cbind(row.names(m),m) colnames(c)[1] <- c("Name") return(c) } ################################################################################################## ################################ PLSDA Perm Plot ################################################# ################################################################################################## MyPlotPLS.Permutation<-function(imgName="PLSDA Permutation", format="png", dpi=72, width=NA){ bw.vec<-analSet$plsda$permut; len<-length(bw.vec); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$pls.permut<<-imgName; }else{ w <- width; } h <- w*6/8; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,2,4)); hst <- hist(bw.vec, breaks = "FD", freq=T, ylab="Frequency", xlab= 'Permutation test statistics', col="lightblue", main=""); # add the indicator using original label h <- max(hst$counts) arrows(bw.vec[1], h/5, bw.vec[1], 0, col="red", lwd=2); text(bw.vec[1], h/3.5, paste('Observed \n statistic \n', analSet$plsda$permut.p), xpd=T); #dev.off(); } ################################################################################################## ################################ OPLSDA Score Plot ############################################### ################################################################################################## MyPlotOPLS2DScore<-function(imgName="OPLSDA Score", format="png", dpi=72, width=NA, inx1, inx2, reg=0.95, show=1, grey.scale=0){ suppressMessages(require('ellipse')); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$opls.score2d<<-imgName; }else{ w <- width; } h <- w; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,3,3)); lv1 <- analSet$oplsda$scoreMN[,1]; lv2 <- analSet$oplsda$orthoScoreMN[,1]; xlabel <- paste("T score [1]", "(", round(100*analSet$oplsda$modelDF["p1", "R2X"],1), "%)"); ylabel <- paste("Orthogonal T score [1]", "(", round(100*analSet$oplsda$modelDF["o1", "R2X"],1), "%)"); text.lbls<-substr(rownames(dataSet$norm),1,12) # some names may be too long # obtain ellipse points to the scatter plot for each category lvs <- levels(dataSet$cls); pts.array <- array(0, dim=c(100,2,length(lvs))); for(i in 1:length(lvs)){ inx <-dataSet$cls == lvs[i]; groupVar<-var(cbind(lv1[inx],lv2[inx]), na.rm=T); groupMean<-cbind(mean(lv1[inx], na.rm=T),mean(lv2[inx], na.rm=T)); pts.array[,,i] <- ellipse(groupVar, centre = groupMean, level = reg, npoints=100); } xrg <- range (lv1, pts.array[,1,]); yrg <- range (lv2, pts.array[,2,]); x.ext<-(xrg[2]-xrg[1])/12; y.ext<-(yrg[2]-yrg[1])/12; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); ## cols = as.numeric(dataSet$cls)+1; cols <- GetColorSchema(grey.scale==1); uniq.cols <- unique(cols); plot(lv1, lv2, xlab=xlabel, xlim=xlims, ylim=ylims, ylab=ylabel, type='n', main="Scores Plot"); grid(col = "lightgray", lty = "dotted", lwd = 1); # make sure name and number of the same order DO NOT USE levels, which may be different legend.nm <- unique(as.character(dataSet$cls)); ## uniq.cols <- unique(cols); ## BHAN: when same color is choosen for black/white; it makes an error # names(uniq.cols) <- legend.nm; if ( length(uniq.cols) > 1 ) { names(uniq.cols) <- legend.nm; } # draw ellipse for(i in 1:length(lvs)){ if ( length(uniq.cols) > 1) { polygon(pts.array[,,i], col=adjustcolor(uniq.cols[lvs[i]], alpha=0.25), border=NA); } else { polygon(pts.array[,,i], col=adjustcolor(uniq.cols, alpha=0.25), border=NA); } if(grey.scale) { lines(pts.array[,,i], col=adjustcolor("black", alpha=0.5), lty=2); } } pchs <- GetShapeSchema(show, grey.scale); if(grey.scale) { cols <- rep("black", length(cols)); } if(show==1){ # display sample name set on text(lv1, lv2, label=text.lbls, pos=4, xpd=T, cex=0.75); points(lv1, lv2, pch=pchs, col=cols); }else{ if (length(uniq.cols) == 1) { points(lv1, lv2, pch=pchs, col=cols, cex=1.0); } else { if(grey.scale == 1 | (exists("shapeVec") && all(shapeVec>0))){ points(lv1, lv2, pch=pchs, col=cols, cex=1.8); }else{ points(lv1, lv2, pch=21, bg=cols, cex=2); } } } uniq.pchs <- unique(pchs); if(grey.scale) { uniq.cols <- "black"; } legend("topright", legend = legend.nm, pch=uniq.pchs, col=uniq.cols); #dev.off(); } ################################################################################################## ################################ OPLSDA S-Plot ################################################### ################################################################################################## MyPlotOPLS.Splot<-function(imgName='OPLSDA S-Plot', format="png", dpi=72, width=NA, plotType){ s <- as.matrix(dataSet$norm); T <- as.matrix(analSet$oplsda$scoreMN) p1 <- c() for (i in 1:ncol(s)) { scov <- cov(s[,i], T) p1 <- matrix(c(p1, scov), ncol=1) } pcorr1 <- c() for (i in 1:nrow(p1)) { den <- apply(T, 2, sd)*sd(s[,i]) corr1 <- p1[i,]/den pcorr1 <- matrix(c(pcorr1, corr1), ncol=1) } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- h <- 8; }else if(width == 0){ imgSet$opls.loading<<-imgName; }else{ w <- h <- width; } #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,4,7)) plot(p1, pcorr1, pch=19, xlab="p[1]", ylab ="p(corr)[1]", main = "S-plot", col="magenta"); opls.axis.lims <<- par("usr"); if(plotType=="all"){ text(p1, pcorr1, labels=colnames(s), cex=0.8, pos=4, xpd=TRUE, col="blue"); }else if(plotType == "custom"){ if(length(custom.cmpds) > 0){ hit.inx <- colnames(dataSet$norm) %in% custom.cmpds; text(p1[hit.inx], pcorr1[hit.inx], labels=colnames(s)[hit.inx], pos=4, xpd=TRUE, col="blue"); } }else{ # do nothing } #dev.off(); splot.mat <- cbind(jitter(p1),p1, pcorr1); rownames(splot.mat) <- colnames(s); colnames(splot.mat) <- c("jitter", "p[1]","p(corr)[1]"); write.csv(signif(splot.mat[,2:3],5), file="oplsda_splot.csv"); analSet$oplsda$splot.mat <<- splot.mat; } ################################################################################################## ################################ OPLSDA Tab ###################################################### ################################################################################################## OPLSTab <- function(){ m=as.matrix(analSet$oplsda$splot.mat[,c(2,3)]) c=cbind(row.names(m),m) colnames(c)[1] <- c("Name") return(c) } ################################################################################################## ################################ OPLSDA Overview Plot ############################################ ################################################################################################## MyPlotOPLS.MDL <- function(imgName="OPLSDA OverView", format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 8; imgSet$pls.class<<-imgName; }else{ w <- width; } h <- w*6/8; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); # the model R2Y and Q2Y par(mar=c(5,5,4,7)); # put legend on the right outside modBarDF <- analSet$oplsda$modelDF[!(rownames(analSet$oplsda$modelDF) %in% c("sum")), ]; mod.dat <- rbind(modBarDF[, "R2X"] ,modBarDF[, "R2Y"], modBarDF[, "Q2"]); bplt <- barplot(mod.dat,beside=TRUE, names.arg = rownames(modBarDF),xlab = ""); axis(2, lwd.ticks=1); barplot(mod.dat,add = TRUE, beside = TRUE, col = c("lightblue", "mistyrose","lightgreen")); text(x=bplt, y=mod.dat+max(mod.dat)/25, labels=as.character(mod.dat), xpd=TRUE) xpos <- nrow(modBarDF)*3 + nrow(modBarDF) + 0.5; ypos <- max(mod.dat)/2; legend(xpos, ypos, legend = c("R2X","R2Y", "Q2"), pch=15, col=c("lightblue", "mistyrose","lightgreen"), xpd=TRUE, bty="n"); #dev.off(); } ################################################################################################## ################################ OPLSDA permutation Plot ######################################### ################################################################################################## MyPlotOPLS.Permutation<-function(imgName="OPLSDA permutation Plot", format="png", dpi=72, num, width=NA){ cls<-scale(as.numeric(dataSet$cls))[,1]; datmat<-as.matrix(dataSet$norm); cv.num <- min(7, dim(dataSet$norm)[1]-1); #perm.res<-performOPLS(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num); perm.res<-perform_opls(datmat,cls, predI=1, orthoI=NA, permI=num, crossvalI=cv.num); r.vec<-perm.res$suppLs[["permMN"]][, "R2Y(cum)"]; q.vec<-perm.res$suppLs[["permMN"]][, "Q2(cum)"]; rng <- range(c(r.vec, q.vec, 1)); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 8; imgSet$pls.permut<<-imgName; }else{ w <- width; } h <- w*6/8; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mar=c(5,5,2,7)); rhst <- hist(r.vec[-1], plot=FALSE); qhst <- hist(q.vec[-1], plot=FALSE); h <- max(c(rhst$counts, qhst$counts))+1; bin.size <- min(c(rhst$breaks[2]-rhst$breaks[1], qhst$breaks[2]-qhst$breaks[1])); rbins <- seq(min(rhst$breaks),max(rhst$breaks),bin.size); qbins <- seq(min(qhst$breaks),max(qhst$breaks),bin.size); hist(r.vec[-1], xlim=rng, ylim=c(0, h), breaks=rbins, border=F, ylab="Frequency", xlab= 'Permutations', col=adjustcolor("lightblue", alpha=0.6), main=""); hist(q.vec[-1], add=TRUE,breaks=qbins, border=F, col=adjustcolor("mistyrose", alpha=0.6)); arrows(r.vec[1], h/3, r.vec[1], 0, length=0.1,angle=30,lwd=2); text(r.vec[1], h/2.5, paste('Observed \n R2Y:', r.vec[1]), xpd=TRUE); arrows(q.vec[1], h/2, q.vec[1], 0, length=0.1,angle=30,lwd=2); text(q.vec[1], h/1.8, paste('Observed \n Q2:', q.vec[1]), xpd=TRUE); legend(1, h/3, legend = c("Perm R2Y", "Perm Q2"), pch=15, col=c("lightblue", "mistyrose"), xpd=T, bty="n"); #dev.off(); better.rhits <- sum(r.vec[-1]>=r.vec[1]); if(better.rhits == 0) { pr <- paste("p < ", 1/num, " (", better.rhits, "/", num, ")", sep=""); }else{ p <- better.rhits/num; pr <- paste("p = ", signif(p, digits=5), " (", better.rhits, "/", num, ")", sep=""); } better.qhits <- sum(q.vec[-1]>=q.vec[1]); if(better.qhits == 0) { pq <- paste("p < ", 1/num, " (", better.qhits, "/", num, ")", sep=""); }else{ p <- better.qhits/num; pq <- paste("p = ", signif(p, digits=5), " (", better.qhits, "/", num, ")", sep=""); } msg <- paste0("Empirical p-values R2Y: ", pr, " and Q2: ", pq) return(msg); } ################################################################################################### ################################ PCA Trajectory Plot ############################################## ################################################################################################### PlotTraPCA<-function(pc1,pc2,title,ptsSize,extPer,colors,errW){ pointx=analSet$pca$x[,pc1]# pc1 pointy=analSet$pca$x[,pc2]# pc2 groups <- dataSet$cls levs <-levels(groups) meanX=c() sdX=c() seX=c() meanY=c() sdY=c() seY=c() gr=c() for(i in 1:length(levs)){ gr[i]=levels(dataSet$cls)[i] } for (i in 1:length(levs)) { group <- levs[i] selected <- groups == group x1 <- pointx[selected]; meanX[i]<-mean(x1) sdX[i]=sapply(as.data.frame(x1), sd) seX[i]= sdX[i]/sqrt(as.numeric(summary(dataSet$cls)[i])) } for(i in 1:length(levs)){ group <- levs[i] selected <-groups ==group y1 <- pointy[selected]; meanY[i]<-mean(y1) sdY[i]=sapply(as.data.frame(y1), sd) seY[i]= sdY[i]/sqrt(as.numeric(summary(dataSet$cls)[i])) } dFrame = data.frame(gr,meanX,meanY,seX,seY) SEX.up = as.numeric(dFrame$meanX)+as.numeric(dFrame$seX) SEX.dn = as.numeric(dFrame$meanX)-as.numeric(dFrame$seX) SEY.up = as.numeric(dFrame$meanY)+as.numeric(dFrame$seY) SEY.dn = as.numeric(dFrame$meanY)-as.numeric(dFrame$seY) pts.arrayX <- c(meanX,SEX.up,SEX.dn) pts.arrayY <- c(meanY,SEY.up,SEY.dn) xrg <- range (min(pts.arrayX), max(pts.arrayX)); yrg <- range (min(pts.arrayY), max(pts.arrayY)); x.ext<-abs(xrg[2]-xrg[1])*extPer; y.ext<-abs(yrg[2]-yrg[1])*extPer; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); ggplot(data = dFrame,aes(x = meanX,y = meanY,group=dFrame$gr, color=dFrame$gr)) + theme_bw()+ theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(), axis.line = element_line(size = 0.3,colour = "black"), axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+ geom_point(size=ptsSize) + scale_colour_manual(name = "Groups",values=colors) + ggtitle(title) + xlim(xlims[1], xlims[2]) + ylim(ylims[1], ylims[2]) + xlab(paste(c("PC",pc1),collapse=" ")) + ylab(paste(c("PC",pc2),collapse=" ")) + geom_errorbar(aes(ymin = SEY.dn,ymax = SEY.up),width=errW) + geom_errorbarh(aes(xmin = SEX.dn,xmax = SEX.up),height=errW) } ################################################################################################### ############################### PLSDA Trajectory Plot ########################################### ################################################################################################### PlotTraPLSDA <- function(inx1, inx2,title,ptsSize,extPer,colors,errW){ lv1 <- analSet$plsr$scores[,inx1];# plsScore1 lv2 <- analSet$plsr$scores[,inx2];# plsScore2 groups <- dataSet$cls levs <-levels(groups) meanX=c() sdX=c() seX=c() meanY=c() sdY=c() seY=c() gr=c() for(i in 1:length(levs)){ gr[i]=levels(dataSet$cls)[i] } for (i in 1:length(levs)) { group <- levs[i] selected <- groups == group x1 <- lv1[selected]; meanX[i]<-mean(x1) sdX[i]=sapply(as.data.frame(x1), sd) seX[i]= sdX[i]/sqrt(as.numeric(summary(dataSet$cls)[i])) } for(i in 1:length(levs)){ group <- levs[i] selected <-groups ==group y1 <- lv2[selected]; meanY[i]<-mean(y1) sdY[i]=sapply(as.data.frame(y1), sd) seY[i]= sdY[i]/sqrt(as.numeric(summary(dataSet$cls)[i])) } dFrame = data.frame(gr,meanX,meanY,seX,seY) SEX.up = as.numeric(dFrame$meanX)+as.numeric(dFrame$seX) SEX.dn = as.numeric(dFrame$meanX)-as.numeric(dFrame$seX) SEY.up = as.numeric(dFrame$meanY)+as.numeric(dFrame$seY) SEY.dn = as.numeric(dFrame$meanY)-as.numeric(dFrame$seY) pts.arrayX <- c(meanX,SEX.up,SEX.dn) pts.arrayY <- c(meanY,SEY.up,SEY.dn) xrg <- range (min(pts.arrayX), max(pts.arrayX)); yrg <- range (min(pts.arrayY), max(pts.arrayY)); x.ext<-abs(xrg[2]-xrg[1])*extPer; y.ext<-abs(yrg[2]-yrg[1])*extPer; xlims<-c(xrg[1]-x.ext, xrg[2]+x.ext); ylims<-c(yrg[1]-y.ext, yrg[2]+y.ext); ggplot(data = dFrame,aes(x = meanX,y = meanY,group=dFrame$gr, color=dFrame$gr)) + theme_bw()+ theme(panel.grid.major = element_blank(), panel.grid.minor=element_blank(),panel.background=element_blank(), axis.line = element_line(size = 0.3,colour = "black"), axis.text=element_text(size=12,colour="black"),axis.title=element_text(size=14,face="bold"))+ geom_point(size=ptsSize) + scale_colour_manual(name = "Groups",values=colors) + ggtitle(title) + xlim(xlims[1], xlims[2]) + ylim(ylims[1], ylims[2]) + xlab(paste(c("Component",inx1),collapse=" ")) + ylab(paste(c("Component",inx2),collapse=" ")) + geom_errorbar(aes(ymin = SEY.dn,ymax = SEY.up),width=errW) + geom_errorbarh(aes(xmin = SEX.dn,xmax = SEX.up),height=errW) } ################################################################################################### ##################################### SAM Plot ################################################### ################################################################################################### MyPlotSAM.FDR<-function(delta, imgName="SAM FDR Plot", format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 10; }else if(width == 0){ w <- 7.2; imgSet$sam.fdr<<-imgName; } h <- w*3/5; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mfrow=c(1,2), mar=c(5,6,4,1)); mat.fdr<-analSet$sam@mat.fdr; plot(mat.fdr[,"Delta"],mat.fdr[,"FDR"],xlab='Delta',ylab=NA,type="b", col='blue', las=2); abline(v = delta, lty=3, col="magenta"); mtext("FDR", side=2, line=5); par(mar=c(5,5,4,2)) plot(mat.fdr[,"Delta"],mat.fdr[,"Called"],xlab='Delta',ylab="Significant feaure No.",type="b", col='blue', las=2); abline(v = delta, lty=3, col="magenta"); hit.inx <- mat.fdr[,"Delta"] <= delta; my.fdr <- signif(min(mat.fdr[,"FDR"][hit.inx]), 3); my.sigs <- min(mat.fdr[,"Called"][hit.inx]); mtext(paste("Delta:", delta, " FDR:", my.fdr, " Sig. cmpds:", my.sigs), line=-2, side = 3, outer = TRUE, font=2) #dev.off(); } ################################################################################################### #################################### SAM plot 2 #################################################### ################################################################################################### SAMResPlot <- function(delta){ sam.plot2(analSet$sam,delta) } ################################################################################################### #################################### SAM Table #################################################### ################################################################################################### SAMTable <- function(del){ SetSAMSigMat(delta=del) m=cbind(row.names(analSet$sam.cmpds),analSet$sam.cmpds) colnames(m)[1] <- c("Name") return(m) } ################################################################################################### ##################################### Dendrogram Plot ############################################ ################################################################################################### MyPlotHCTree<-function(imgName="Dendrpgram", format="png", dpi=600, width=NA, smplDist, clstDist){ # set up data set hc.dat<-as.matrix(dataSet$norm); colnames(hc.dat)<-substr(colnames(hc.dat), 1, 18) # some names are too long # set up distance matrix if(smplDist == 'euclidean'){ dist.mat<-dist(hc.dat, method = smplDist); }else{ dist.mat<-dist(1-cor(t(hc.dat), method = smplDist)); } # record the paramters analSet$tree<<-list(dist.par=smplDist, clust.par=clstDist); # build the tree hc_tree<-hclust(dist.mat, method=clstDist); # plot the tree imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- minH <- 630; myH <- nrow(hc.dat)*10 + 150; if(myH < minH){ myH <- minH; } w <- round(w/72,2); h <- round(myH/72,2); }else if(width == 0){ w <- h <- 7.2; imgSet$tree<<-imgName; }else{ w <- h <- 7.2; } #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(cex=0.8, mar=c(4,2,2,8)); if(dataSet$cls.type == "disc"){ clusDendro<-as.dendrogram(hc_tree); cols <- GetColorSchema(); names(cols) <- rownames(hc.dat); labelColors <- cols[hc_tree$order]; colLab <- function(n){ if(is.leaf(n)) { a <- attributes(n) labCol <- labelColors[a$label]; attr(n, "nodePar") <- if(is.list(a$nodePar)) c(a$nodePar, lab.col = labCol,pch=NA) else list(lab.col = labCol,pch=NA) } n } clusDendro<-dendrapply(clusDendro, colLab) plot(clusDendro,horiz=T,axes=T); par(cex=1); legend.nm <- as.character(dataSet$cls); legend("topleft", legend = unique(legend.nm), pch=15, col=unique(cols), bty = "n"); }else{ plot(as.dendrogram(hc_tree), hang=-1, main=paste("Cluster with", clstDist, "method"), xlab=NULL, sub=NULL, horiz=TRUE); } #dev.off(); } ################################################################################################### ##################################### Cluster Analysis Heatmap #################################### ################################################################################################### MyPlotSubHeatMap <- function(imgName="Sub Heat Map", format="png", dpi=600, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, method.nm, top.num, viewOpt, rowV=T, colV=T, border=T){ var.nms = colnames(dataSet$norm); if(top.num < length(var.nms)){ if(method.nm == 'tanova'){ if(GetGroupNumber() == 2){ if(is.null(analSet$tt)){ Ttests.Anal(); } var.nms <- names(sort(analSet$tt$p.value))[1:top.num]; }else{ if(is.null(analSet$aov)){ ANOVA.Anal(); } var.nms <- names(sort(analSet$aov$p.value))[1:top.num]; } }else if(method.nm == 'cor'){ if(is.null(analSet$cor.res)){ Match.Pattern(); } # re-order for pretty view cor.res <- analSet$cor.res; ord.inx<-order(cor.res[,3]); cor.res <- cor.res[ord.inx, ]; ord.inx<-order(cor.res[,1]); cor.res <- cor.res[ord.inx, ]; var.nms <- rownames(cor.res)[1:top.num]; }else if(method.nm == 'vip'){ if(is.null(analSet$plsda)){ PLSR.Anal(); PLSDA.CV(); } vip.vars <- analSet$plsda$vip.mat[,1];# use the first component var.nms <- names(rev(sort(vip.vars)))[1:top.num]; }else if(method.nm == 'rf'){ if(is.null(analSet$rf)){ RF.Anal(); } var.nms <- GetRFSigRowNames()[1:top.num]; } } var.inx <- match(var.nms, colnames(dataSet$norm)); MyPlotHeatMap(imgName, format, dpi, width, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt, rowV, colV, var.inx, border); } MyPlotHeatMap<-function(imgName="Heat Map", format="png", dpi=72, width=NA, dataOpt, scaleOpt, smplDist, clstDist, palette, viewOpt="detail", rowV=T, colV=T, var.inx=NA, border=T){ # record the paramters analSet$htmap<<-list(dist.par=smplDist, clust.par=clstDist); # set up data set if(dataOpt=="norm"){ my.data <- dataSet$norm; }else{ my.data <- dataSet$proc; } if(is.na(var.inx)){ hc.dat<-as.matrix(my.data); }else{ hc.dat<-as.matrix(my.data[,var.inx]); } colnames(hc.dat)<-substr(colnames(hc.dat),1,18) # some names are too long hc.cls <- dataSet$cls; # set up colors for heatmap if(palette=="gbr"){ colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256); }else if (palette=="wnvyb"){ colors <- colorRampPalette(c("white","navyblue"), space="Lab")(256); }else if (palette=="rwg"){ colors <- colorRampPalette(c("red", "white", "green"), space="rgb")(256); }else if (palette=="rwb"){ colors <- colorRampPalette(c("red","white","blue"),space="rgb")(256); }else if(palette == "heat"){ colors <- heat.colors(256); }else if(palette == "topo"){ colors <- topo.colors(256); }else if(palette == "gray"){ colors <- colorRampPalette(c("grey90", "grey10"), space="rgb")(256); }else{ suppressMessages(require(RColorBrewer)); colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256)); } imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ minW <- 630; myW <- nrow(hc.dat)*18 + 150; if(myW < minW){ myW <- minW; } w <- round(myW/72,2); }else if(width == 0){ w <- 7.2; imgSet$heatmap<<-imgName; }else{ w <- 7.2; } myH <- ncol(hc.dat)*18 + 150; h <- round(myH/72,2); if(viewOpt == "overview"){ if(is.na(width)){ if(w > 9){ w <- 9; } }else if(width == 0){ if(w > 7.2){ w <- 7.2; } imgSet$heatmap<<-imgName; }else{ w <- 7.2; } if(h > w){ h <- w; } } if(border){ border.col<-"grey60"; }else{ border.col <- NA; } #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); if(dataSet$cls.type == "disc"){ require(pheatmap); annotation <- data.frame(class= hc.cls); rownames(annotation) <-rownames(hc.dat); # set up color schema for samples if(palette== "gray"){ cols <- GetColorSchema(T); uniq.cols <- unique(cols); }else{ cols <- GetColorSchema(); uniq.cols <- unique(cols); } names(uniq.cols) <- unique(as.character(dataSet$cls)); ann_colors <- list(class= uniq.cols); pheatmap(t(hc.dat), annotation=annotation, fontsize=8, fontsize_row=8, clustering_distance_rows = smplDist, clustering_distance_cols = smplDist, clustering_method = clstDist, border_color = border.col, cluster_rows = colV, cluster_cols = rowV, scale = scaleOpt, color = colors, annotation_colors = ann_colors ); }else{ heatmap(hc.dat, Rowv = rowTree, Colv=colTree, col = colors, scale="column"); } #dev.off(); } ##################################################################################################### ################################# K-Means ########################################################### ##################################################################################################### MyPlotKmeans<-function(imgName="K-means Plot", format="png", dpi=72, width=NA){ clust.num <- max(analSet$kmeans$cluster); if(clust.num>20) return(); # calculate arrangement of panel ylabel<-GetValueLabel(); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7; imgSet$kmeans<<-imgName; }else{ w <- width; } h <- w*8/9; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mfrow = GetXYCluster(clust.num), mar=c(5,4,2,2)); for (loop in 1:clust.num) { matplot(t(dataSet$norm[analSet$kmeans$cluster==loop,]), type="l", col='grey', ylab=ylabel, axes=F, main=paste("Cluster ",loop, ", n=", analSet$kmeans$size[loop], sep="")) lines(apply(dataSet$norm[analSet$kmeans$cluster==loop,], 2, median), type="l", col='blue', lwd=1); axis(2); axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2); } #dev.off(); } ############################################################################################################### ####################################### K-means Table ######################################################### ############################################################################################################### MyGetAllKMClusterMembers<-function(){ clust.df = data.frame(); rowNameVec = c(); i = 1; clust.num<-max(analSet$kmeans$cluster); while(i<=clust.num){ if(i==1){ clust.df <- rbind(paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " ")); }else{ clust.df <- rbind(clust.df,paste(rownames(dataSet$norm)[analSet$kmeans$cluster== i], collapse = " ")); } rowNameVec <- c(rowNameVec, paste("Cluster(", i, ")")); i = i+1; } row.names(clust.df)<- rowNameVec; colnames(clust.df)<-"Samples in each cluster"; #xtable(clust.df, align="l|p{8cm}", caption="Clustering result using K-means"); m=cbind(row.names(clust.df),clust.df) colnames(m)[1] <- c(" ") return(m) } ############################################################################################################### ####################################### SOM Plot ######################################################### ############################################################################################################### MyPlotSOM <- function(imgName="SOM Plot", format="png", dpi=72, width=NA){ xdim<-analSet$som$xdim; ydim<-analSet$som$ydim; total<-xdim*ydim; if(total>20) { return();} ylabel<-GetValueLabel(); clust<-analSet$som$visual; imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7; imgSet$som<<-imgName; }else{ w <- width; } h <- w*8/9; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); par(mfrow = GetXYCluster(total), mar=c(5,4,2,2)); for (i in 0:(xdim-1)) { xTrue<-clust$x == i; for (j in 0:(ydim-1)) { yTrue<-clust$y == j; sel.inx<-xTrue & yTrue; # selected row if(sum(sel.inx)>0){ # some cluster may not contain any member matplot(t(dataSet$norm[sel.inx, ]), type="l", col='grey', axes=F, ylab=ylabel, main=paste("Cluster(", i, ",", j,")", ", n=", sum(sel.inx), sep="")) lines(apply(dataSet$norm[sel.inx, ], 2, median), type="l", col='blue', lwd=1); }else{ # plot a dummy plot(t(dataSet$norm[1, ]), type="n", axes=F, ylab=ylabel, main=paste("Cluster(", i, ",", j,")",", n=", sum(sel.inx),sep="")) } axis(2); axis(1, 1:ncol(dataSet$norm), substr(colnames(dataSet$norm), 1, 7), las=2); } } #dev.off(); } ############################################################################################################### ####################################### SOM Table ######################################################### ############################################################################################################### MyGetAllSOMClusterMembers<-function(){ clust<-analSet$som$visual; xdim<-analSet$som$xdim; ydim<-analSet$som$ydim; clust.df = data.frame(); rowNameVec = c(); i = 0; while(i < xdim){ j = 0; while(j < ydim){ xTrue<-clust$x == i; yTrue<-clust$y == j; if(i==0 & j==0){ # bug in R, the first one need to be different clust.df <- rbind(paste(rownames(dataSet$norm)[xTrue & yTrue], collapse = " ")); rowNameVec <- c(paste("Cluster(", i, ",", j,")")); }else{ clust.df <- rbind(clust.df, paste(rownames(dataSet$norm)[xTrue & yTrue], collapse=" ")); rowNameVec <- c(rowNameVec, paste("Cluster(", i, ",", j,")")); } j = j+1; } i = i+1; } row.names(clust.df)<- rowNameVec; colnames(clust.df)<-"Samples in each cluster"; #xtable(clust.df, align="l|p{8cm}", caption="Clustering result using SOM"); m=cbind(row.names(clust.df),clust.df) colnames(m)[1] <- c(" ") return(m) } ############################################################################################################ ####################################### Random Forest Plot ################################################# ############################################################################################################# MyPlotRF.Classify<-function(imgName="Random Forest Classification Plot", format="png", dpi=72, width=NA){ imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 8; imgSet$rf.cls<<-imgName; }else{ w <- width; } h <- w*5/8; # Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); #par(mfrow=c(2,1)); par(mar=c(4,4,3,2)); cols <- rainbow(length(levels(dataSet$cls))+1); plot(analSet$rf, main="Random Forest classification", col=cols); legend("topright", legend = c("Overall", levels(dataSet$cls)), lty=2, lwd=1, col=cols); #PlotConfusion(analSet$rf$confusion); #dev.off(); } ################################################################################################### #################################### Random Forest Table ########################################## ################################################################################################## MyGetRFConf.Table<-function(){ m=cbind(row.names(analSet$rf$confusion),analSet$rf$confusion) colnames(m)[1] <- c(" ") return(m) } ################################################################################################### #################################### Random Forest VIP ########################################## ################################################################################################## MyPlotRF.VIP<-function(imgName="Random Forest VIP", format="png", dpi=72, width=NA){ vip.score <- rev(sort(analSet$rf$importance[,"MeanDecreaseAccuracy"])); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$rf.imp<<-imgName; }else{ w <- width; } h <- w*7/8; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); PlotImpVar(vip.score,"MeanDecreaseAccuracy"); #dev.off(); } ################################################################################################### #################################### Random Forest VIP Table ########################################## ################################################################################################## MyRFVipTab <-function(){ m=cbind(row.names(analSet$rf.sigmat),analSet$rf.sigmat) colnames(m) <- c("Names","MeanDecreaseAccuracy") return(m) } ######################################################################################################## ################################## Random Forest Outlier Detection Plot################################# ######################################################################################################## MyPlotRF.Outlier<-function(imgName="Random Forest Outlier Dectection Plot", format="png", dpi=72, width=NA){ cols <- GetColorSchema(); uniq.cols <- unique(cols); legend.nm <- unique(as.character(dataSet$cls)); dist.res <- outlier(analSet$rf); imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 9; }else if(width == 0){ w <- 7.2; imgSet$rf.outlier<<-imgName; }else{ w <- width; } h <- w*7/9; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); layout(matrix(c(1,2), 1, 2, byrow = TRUE), width=c(4,1)); op<-par(mar=c(5,5,4,0)); plot(dist.res, type="h", col=cols, xlab="Samples", xaxt="n", ylab="Outlying Measures", bty="n"); # add sample names to top 5 rankres <- rank(-abs(dist.res), ties.method="random"); inx.x <- which(rankres < 6); inx.y <- dist.res[inx.x]; nms <- names(dist.res)[inx.x]; text(inx.x, inx.y, nms, pos=ifelse(inx.y >= 0, 3, 1), xpd=T) op<-par(mar=c(5,0,4,1)); plot.new(); plot.window(c(0,1), c(0,1)); legend("center", legend =legend.nm, pch=15, col=uniq.cols); #dev.off(); } ######################################################################################################## ################################## SVM Plot ############################################################ ######################################################################################################## MyPlotRSVM.Classification<-function(imgName="SVM Plot", format="png", dpi=72, width=NA){ res<-analSet$svm$Error; edge<-(max(res)-min(res))/100; # expand y uplimit for text imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$svm.class<<-imgName; }else{ w <- width; } h <- w*6/8; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); plot(res,type='l',xlab='Number of variables (levels)',ylab='Error Rate', ylim = c(min(res)-5*edge, max(res)+18*edge), axes=F, main="Recursive SVM classification") text(res,labels =paste(100*round(res,3),'%'), adj=c(-0.3, -0.5), srt=45, xpd=T) points(res, col=ifelse(1:length(res)==analSet$svm$best.inx,"red","blue")); axis(2); axis(1, 1:length(res), names(res)); #dev.off(); } ######################################################################################################## ################################## SVM Plot Var.importance ############################################################ ######################################################################################################## MyPlotRSVM.Cmpd<-function(imgName="SVm var.importance plot", format="png", dpi=72, width=NA){ sigs<-analSet$svm$sig.mat; data<-sigs[,1]; imgName = paste(imgName, "dpi", dpi, ".", format, sep=""); if(is.na(width)){ w <- 8; }else if(width == 0){ w <- 7; imgSet$svm<<-imgName; }else{ w <- width; } h <- w*7/8; #Cairo(file = imgName, unit="in", dpi=dpi, width=w, height=h, type=format, bg="white"); PlotImpVar(data,"Frequency"); #dev.off(); } PlsRegPlot <- function(no,color,ptsize){ cls<-as.numeric(dataSet$cls); datmat<-as.matrix(dataSet$norm); theModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no) summary(theModel) tempMatrix1 <- predictionFunc(theModel,ncomp=no) inx.one <- tempMatrix1 predplot(theModel, ncomp = no, asp = 1, line=TRUE, cex=ptsize) points(inx.one, bg=color, cex=ptsize, pch=21); } plsRegPlotCV <- function(no,color,ptsize){ cls<-as.numeric(dataSet$cls); datmat<-as.matrix(dataSet$norm); cvModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no, validation="LOO") summary(cvModel) tempMatrix2 <- predictionFunc(cvModel,ncomp=no) inx.two <- tempMatrix2 predplot(cvModel, ncomp = no, asp = 1, line=TRUE, cex=ptsize) points(inx.two, bg=color, cex=ptsize, pch=21); } predOvrlyPlt <- function(no,color1,color2,ptsize){ cls<-as.numeric(dataSet$cls); datmat<-as.matrix(dataSet$norm); theModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no) cvModel <<-plsr(cls~datmat,method='oscorespls', ncomp=no, validation="LOO") tempMatrix1 <- predictionFunc(theModel,ncomp=no) tempMatrix2 <- predictionFunc(cvModel,ncomp=no) inx.one <- tempMatrix1 inx.two <- tempMatrix2 predplot(theModel, ncomp = no, asp = 1, line=TRUE, cex=ptsize, main=paste("Cls,", no, "comps overlay", collapse=" ")) points(inx.one, bg=color1, cex=ptsize, pch=21); points(inx.two, bg=color2, cex=ptsize, pch=21); } PLSR.Table<-function(no){ cls<-as.numeric(dataSet$cls) datmat<-as.matrix(dataSet$norm); plsda.cls <- train(dataSet$norm, dataSet$cls, "pls", trControl=trainControl(method="LOOCV"), tuneLength=no); # use the classifical regression to get R2 and Q2 measure modelcv<<-plsr(cls~datmat,method='oscorespls', ncomp=no, validation="LOO") fit.info <- pls::R2(modelcv, estimate = "all")$val[,1,]; # combine accuracy, R2 and Q2 accu <- plsda.cls$results[,2] all.info <- rbind(accu, fit.info[,-1]); rownames(all.info) <- c("Accuracy", "R2", "Q2") return(all.info); } predictionFunc <- function(object, ncomp = object$ncomp, which, newdata, nCols, nRows, xlab = "measured", ylab = "predicted", main, ..., font.main, cex.main) { ## Select type(s) of prediction if (missing(which)) { ## Pick the `best' alternative. if (!missing(newdata)) { which <- "test" } else { if (!is.null(object$validation)) { which <- "validation" } else { which <- "train" } } } else { ## Check the supplied `which' allTypes <- c("train", "validation", "test") which <- allTypes[pmatch(which, allTypes)] if (length(which) == 0 || any(is.na(which))) stop("`which' should be a subset of ", paste(allTypes, collapse = ", ")) } ## Help variables nEst <- length(which) nSize <- length(ncomp) nResp <- dim(object$fitted.values)[2] ## Set plot parametres as needed: dims <- c(nEst, nSize, nResp) dims <- dims[dims > 1] nPlots <- prod(dims) if (nPlots > 1) { ## Set up default font.main and cex.main for individual titles: if (missing(font.main)) font.main <- 1 if (missing(cex.main)) cex.main <- 1.1 ## Show the *labs in the margin: mXlab <- xlab mYlab <- ylab xlab <- ylab <- "" if(missing(nCols)) nCols <- min(c(3, dims[1])) if(missing(nRows)) nRows <- min(c(3, ceiling(prod(dims[1:2], na.rm = TRUE) / nCols))) opar <- par(no.readonly = TRUE) on.exit(par(opar)) par(mfrow = c(nRows, nCols), oma = c(1, 1, if(missing(main)) 0 else 2, 0) + 0.1, mar = c(3,3,3,1) + 0.1) if (nRows * nCols < nPlots && dev.interactive()) par(ask = TRUE) } else { ## Set up default font.main and cex.main for the main title: if (missing(font.main)) font.main <- par("font.main") if (missing(cex.main)) cex.main <- par("cex.main") nCols <- nRows <- 1 } ## Set up measured and predicted for all model sizes, responses and ## estimates: if ("train" %in% which) { train.measured <- as.matrix(model.response(model.frame(object))) train.predicted <- object$fitted.values[,,ncomp, drop = FALSE] } if ("validation" %in% which) { if (is.null(object$validation)) stop("`object' has no `validation' component.") if(!exists("train.measured")) train.measured <- as.matrix(model.response(model.frame(object))) validation.predicted <- object$validation$pred[,,ncomp, drop = FALSE] } if ("test" %in% which) { if (missing(newdata)) stop("Missing `newdata'.") test.measured <- as.matrix(model.response(model.frame(formula(object), data = newdata))) test.predicted <- predict(object, ncomp = ncomp, newdata = newdata) } ## Do the plots plotNo <- 0 for (resp in 1:nResp) { for (size in 1:nSize) { for (est in 1:nEst) { plotNo <- plotNo + 1 if (nPlots == 1 && !missing(main)) { lmain <- main } else { lmain <- sprintf("%s, %d comps, %s", respnames(object)[resp], ncomp[size], which[est]) } sub <- which[est] switch(which[est], train = { measured <- train.measured[,resp] predicted <- train.predicted[,resp,size] }, validation = { measured <- train.measured[,resp] predicted <- validation.predicted[,resp,size] }, test = { measured <- test.measured[,resp] predicted <- test.predicted[,resp,size] } ) xy <- predplotXYFunc(measured, predicted, main = lmain, font.main = font.main, cex.main = cex.main, xlab = xlab, ylab = ylab, ...) if (nPlots > 1 && (plotNo %% (nCols * nRows) == 0 || plotNo == nPlots)) { ## Last plot on a page; add outer margin text and title: mtext(mXlab, side = 1, outer = TRUE) mtext(mYlab, side = 2, outer = TRUE) if (!missing(main)) title(main = main, outer = TRUE) } } } } invisible(xy) } predplotXYFunc <- function(x, y, line = FALSE, labels, type = "p", main = "Prediction plot", xlab = "measured response", ylab = "predicted response", line.col = par("col"), line.lty = NULL, line.lwd = NULL, ...) { if (!missing(labels)) { ## Set up point labels if (length(labels) == 1) { labels <- switch(match.arg(labels, c("names", "numbers")), names = names(y), numbers = as.character(1:length(y)) ) } ## Override plot type: type <- "n" } #plot(y ~ x, type = type, main = main, xlab = xlab, ylab = ylab, ...) if (!missing(labels)) text(x, y, labels, ...) if (line) abline(0, 1, col = line.col, lty = line.lty, lwd = line.lwd) invisible(cbind(measured = x, predicted = as.vector(y))) } #session$onSessionEnded(function() { #stopApp() #q("no") # }) LoadAllPackages<-function(){ packages <- c("xtable","ggplot2","shiny","rgl", "pca3d", "ellipse", "scatterplot3d","pls", "caret","lattice", "Cairo", "randomForest", "e1071","gplots", "som","RColorBrewer", "genefilter", "pheatmap","preprocessCore","xcms", "impute","siggenes","sva", "ropls","RBGL","pcaMethods") # loading packages ipak <- function(pkg){ new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, require, character.only = TRUE) } ipak(packages) } } shinyApp(ui = ui, server = server)