From beab0b9e6463727fba3541f6efa82c59fc803cec Mon Sep 17 00:00:00 2001 From: Taha Ahmed Date: Fri, 2 Sep 2011 00:45:52 +0200 Subject: [PATCH] Solved the issue with repeating diffractometry analyses by wrapper functions. Caught some errors and made some improvements in the other functions. --- Renishaw/Raman2df.R | 47 +++++++++++---- Renishaw/RamanWrapper.R | 64 ++++++++++++++++++++ Renishaw/RamanWrapper.tex | 4 ++ Renishaw/Ramanpk.R | 113 +++++++++++++++++++---------------- Renishaw/SiliconWrapper.R | 65 ++++++++++++++++++++ XRF-SPECTRO/xrfpk.R | 104 ++++++++++++++++++++++++++++++++ XRF-SPECTRO/xrfpkWrapper.R | 67 +++++++++++++++++++++ XRF-SPECTRO/xrfpkWrapper.tex | 3 + XRF-SPECTRO/xrfspectro2df.R | 86 ++++++++++++++++++++++++++ common/ProvideSampleId.R | 57 ++++++++++-------- common/ProvideSampleId.tex | 6 ++ 11 files changed, 527 insertions(+), 89 deletions(-) create mode 100644 Renishaw/RamanWrapper.R create mode 100644 Renishaw/RamanWrapper.tex create mode 100644 Renishaw/SiliconWrapper.R create mode 100644 XRF-SPECTRO/xrfpk.R create mode 100644 XRF-SPECTRO/xrfpkWrapper.R create mode 100644 XRF-SPECTRO/xrfpkWrapper.tex create mode 100644 XRF-SPECTRO/xrfspectro2df.R create mode 100644 common/ProvideSampleId.tex diff --git a/Renishaw/Raman2df.R b/Renishaw/Raman2df.R index a6678e3..bfe7ca6 100644 --- a/Renishaw/Raman2df.R +++ b/Renishaw/Raman2df.R @@ -1,10 +1,25 @@ -source("/home/taha/chepec/chetex/common/R/common.R") +source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R") -################################################## -################### Raman2df ####################### -################################################## Raman2df <- function(datafilename) { - # Function description: for reading Raman spectrum into dataframe + ## Description: + ## Reads Raman data in ASCII format + ## (wavenumber, counts) + ## and returns a dataframe with the original data, + ## as well as interpolated wavenumber and counts values + ## (interpolated data is evenly spaced along x-axis) + ## Usage: + ## Raman2df(datafilename) + ## Arguments: + ## datafilename: text string with full path to experimental file + ## Value: + ## Dataframe with the following columns (and no extra attributes): + ## $ sampleid : chr (id) + ## $ wavenum : num (measure) + ## $ counts : num (measure) + ## $ wnum.interp : num (measure) + ## $ cts.interp : num (measure) + ## Note: + ## # datafile <- file(datafilename, "r") chifile <- readLines(datafile, n = -1) #read all lines of input file @@ -20,15 +35,21 @@ Raman2df <- function(datafilename) { matrix(scan(zz, what = numeric(), sep = "\t"), ncol = 2, byrow = T))) close(zz) - names(ff) <- c("sampleid", "shift", "counts") - # Re-order by increasing shift - ff <- ff[order(ff$shift), ] - # And fix the row.names + names(ff) <- c("sampleid", "wavenum", "counts") + # Sort dataframe by increasing wavenumbers + ff <- ff[order(ff$wavenum), ] + # ... sort the rownames as well row.names(ff) <- seq(1, dim(ff)[1]) - # Do not re-calculate the spectrum with evenly spaced points here! - # You must first remove cosmic peaks, and as long as that is done - # manually, re-calculation to evenly spaced shifts must also be - # done manually. + # Add interpolated, evenly spaced data to dataframe + ff <- cbind(ff, + wnum.interp = approx(x = ff$wavenum, + y = ff$counts, + method = "linear", + n = length(ff$wavenum))$x, + cts.interp = approx(x = ff$wavenum, + y = ff$counts, + method = "linear", + n = length(ff$wavenum))$y) ## return(ff) } \ No newline at end of file diff --git a/Renishaw/RamanWrapper.R b/Renishaw/RamanWrapper.R new file mode 100644 index 0000000..275da96 --- /dev/null +++ b/Renishaw/RamanWrapper.R @@ -0,0 +1,64 @@ +RamanWrapper <- + function(data.exp, run, override = FALSE, + kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) { + # the override flag is currently not used + + print("... Started RamanWrapper") + + # check if Ramanpk has already completed successfully for the current job + current.dirname <- getwd() + print(current.dirname) + current.filename <- "raman-peak-data.rda" + ramandatafile <- paste(current.dirname, current.filename, sep = "/") + + + + if (file.exists(ramandatafile)) { + print("... Started if-clause 1") + + # File already exists + # return the data using load() or data() + + load(file = ramandatafile) + + if (run > length(ramres)) { + + print("... Started if-clause 1:1") + + # the it does not really exist + ramres[[run]] <- Ramanpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor) + save(ramres, file = ramandatafile) + + print("... Ended if-clause 1:1") + } + + print("... Ended if-clause 1") + + return(ramres) + } else { + + print("... Started else-clause 1") + + if (!exists("ramres")) { + ramres <- list() + print("... ramres list created") + } + + # Need to call Ramanpk() and save its results to file as above + ramres[[run]] <- Ramanpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor) + save(ramres, file = ramandatafile) + + print("... Ended else-clause 1") + + return(ramres) + } + +} \ No newline at end of file diff --git a/Renishaw/RamanWrapper.tex b/Renishaw/RamanWrapper.tex new file mode 100644 index 0000000..7c8f49b --- /dev/null +++ b/Renishaw/RamanWrapper.tex @@ -0,0 +1,4 @@ +I wanted to run the \texttt{diffractometry} peak-analysis code (which can be both time-consuming and prone to stray errors halting compilation) without the need to re-run the peak analysis every time the document is re-compiled. + +For that purpose, I created this wrapper function. +Its job is to call \Rfun{Ramanpk()} only if necessary (i.e., peak analysis has not already been performed). That is made possible by saving the results of a successful analysis to a \texttt{raman-peak-data.rda} file in the directory of the report. diff --git a/Renishaw/Ramanpk.R b/Renishaw/Ramanpk.R index 5e15de6..131fb6f 100644 --- a/Renishaw/Ramanpk.R +++ b/Renishaw/Ramanpk.R @@ -1,95 +1,104 @@ -source("/home/taha/chepec/chetex/common/R/common.R") - -################################################## -################## Ramanpk ####################### -################################################## -Ramanpk <- function(Raman.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) { +Ramanpk <- + function(data.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) { - Raman.base <- baselinefit(Raman.exp, tau = 2.0, gam = gam, scl.factor = scl.factor, maxwdth = 200) + print("... Starting baseline fitting") + + data.basl <- baselinefit(data.exp, + tau = 2.0, + gam = gam, + scl.factor = scl.factor, + maxwdth = 200) + + print("... Ended baseline fitting") # This loop deals with the output from baselinefit() # It makes a "melted" dataframe in long form for each # separated peak for some baseline parameters - Raman.pks <- data.frame() - Raman.pks.basl <- data.frame() - Raman.pks.pmg <- data.frame() - Raman.pks.spl <- data.frame() - peaks <- 1:length(Raman.base$npks) + data.pks <- data.frame() + data.pks.basl <- data.frame() + data.pks.pmg <- data.frame() + data.pks.spl <- data.frame() + peaks <- 1:length(data.basl$npks) + + for (s in peaks) { # recorded data in long form by separated peak - Raman.pks <- rbind(Raman.pks, # column names assigned after loop + data.pks <- rbind(data.pks, # column names assigned after loop data.frame(peak = factor(peaks[s]), kernel = NA, - Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s], ])) + data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], ])) # the calculated baseline in long form by separated peak - Raman.pks.basl <- rbind(Raman.pks.basl, + data.pks.basl <- rbind(data.pks.basl, data.frame(peak = factor(peaks[s]), kernel = NA, - x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]], - y = Raman.base$baseline$basisl[Raman.base$indlsep[s]:Raman.base$indrsep[s]])) + x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1], + y = data.basl$baseline$basisl[data.basl$indlsep[s]:data.basl$indrsep[s]])) # the taut string estimation in long form by separated peak - Raman.pks.pmg <- rbind(Raman.pks.pmg, + data.pks.pmg <- rbind(data.pks.pmg, data.frame(peak = factor(peaks[s]), kernel = NA, - x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]], - y = Raman.base$pmg$fn[Raman.base$indlsep[s]:Raman.base$indrsep[s]])) + x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1], + y = data.basl$pmg$fn[data.basl$indlsep[s]:data.basl$indrsep[s]])) # the weighted smoothed spline in long form by separated peak - Raman.pks.spl <- rbind(Raman.pks.spl, + data.pks.spl <- rbind(data.pks.spl, data.frame(peak = factor(peaks[s]), kernel = NA, - x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]], - y = Raman.base$spl$reg[Raman.base$indlsep[s]:Raman.base$indrsep[s]])) + x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1], + y = data.basl$spl$reg[data.basl$indlsep[s]:data.basl$indrsep[s]])) } + + + # Column names assigned to d.pks - names(Raman.pks) <- c("peak", "kernel", "x", "y") + names(data.pks) <- c("peak", "kernel", "x", "y") # This loop calls pkdecompint() on each peak separately # It makes a "melted" dataframe in long form for: - Raman.fit <- list() # holds pkdecompint output - Raman.fit.fitpk <- data.frame() # contains fitting curves - Raman.fit.parpk <- data.frame() # physical parameters by peak and kernel - Raman.nobasl <- data.frame() # data with baseline removed - peaks <- 1:length(Raman.base$npks) + data.fit <- list() # holds pkdecompint output + data.fit.fitpk <- data.frame() # contains fitting curves + data.fit.parpk <- data.frame() # physical parameters by peak and kernel + data.fit.basl <- data.frame() # data with baseline removed + peaks <- 1:length(data.basl$npks) for (s in peaks) { ######## PKDECOMPINT ######## if (length(kerpk) > 1) { # set number of kernels per peak manually - Raman.fit[[s]] <- pkdecompint(Raman.base, intnum = s, + data.fit[[s]] <- pkdecompint(data.basl, intnum = s, k = kerpk[s], maxiter = fitmaxiter) } else { # use number of kernels determined by baselinefit() - Raman.fit[[s]] <- pkdecompint(Raman.base, intnum = s, - k = Raman.base$npks[s], maxiter = fitmaxiter) + data.fit[[s]] <- pkdecompint(data.basl, intnum = s, + k = data.basl$npks[s], maxiter = fitmaxiter) } # Setup the dataframe that makes up the peak table - for (kernel in 1:Raman.fit[[s]]$num.ker) { - Raman.fit.parpk <- rbind(Raman.fit.parpk, - data.frame(peak = factor(Raman.fit[[s]]$intnr), + for (kernel in 1:data.fit[[s]]$num.ker) { + data.fit.parpk <- rbind(data.fit.parpk, + data.frame(peak = factor(data.fit[[s]]$intnr), kernel = factor(kernel), - x = Raman.fit[[s]]$parpks[kernel, "loc"], - height = Raman.fit[[s]]$parpks[kernel, "height"], - area = Raman.fit[[s]]$parpks[kernel, "intens"], - fwhm = Raman.fit[[s]]$parpks[kernel, "FWHM"], - m = Raman.fit[[s]]$parpks[kernel, "m"], - accept = Raman.fit[[s]]$accept)) - Raman.fit.fitpk <- rbind(Raman.fit.fitpk, + x = data.fit[[s]]$parpks[kernel, "loc"], + height = data.fit[[s]]$parpks[kernel, "height"], + area = data.fit[[s]]$parpks[kernel, "intens"], + fwhm = data.fit[[s]]$parpks[kernel, "FWHM"], + m = data.fit[[s]]$parpks[kernel, "m"], + accept = data.fit[[s]]$accept)) + data.fit.fitpk <- rbind(data.fit.fitpk, data.frame(peak = factor(peaks[s]), kernel = factor(kernel), - x = Raman.fit[[s]]$x, - y = Raman.fit[[s]]$fitpk[kernel, ])) + x = data.fit[[s]]$x, + y = data.fit[[s]]$fitpk[kernel, ])) } - Raman.nobasl <- rbind(Raman.nobasl, + data.fit.basl <- rbind(data.fit.basl, data.frame(peak = factor(peaks[s]), - x = Raman.fit[[s]]$x, - y = Raman.fit[[s]]$y)) + x = data.fit[[s]]$x, + y = data.fit[[s]]$y)) } - return(list(Raman.base = Raman.base, - Raman.peaks = Raman.pks, - Raman.fit.parpk = Raman.fit.parpk, - Raman.fit.fitpk = Raman.fit.fitpk, - Raman.nobasl = Raman.nobasl)) + return(list(data.basl = data.basl, + data.peaks = data.pks, + data.fit.parpk = data.fit.parpk, + data.fit.fitpk = data.fit.fitpk, + data.fit.basl = data.fit.basl)) } \ No newline at end of file diff --git a/Renishaw/SiliconWrapper.R b/Renishaw/SiliconWrapper.R new file mode 100644 index 0000000..3672e20 --- /dev/null +++ b/Renishaw/SiliconWrapper.R @@ -0,0 +1,65 @@ +SiliconWrapper <- + function(data.exp, run, override = FALSE, + kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) { + # the override flag is currently not used + + print("... Started SiliconWrapper") + + # check if Ramanpk has already completed successfully for the current job + current.dirname <- getwd() + print(current.dirname) + current.filename <- "silicon-peak-data.rda" + ramandatafile <- paste(current.dirname, current.filename, sep = "/") + + + + if (file.exists(ramandatafile) && !override) { + print("... Started if-clause 1") + + # File already exists + # return the data using load() or data() + + load(file = ramandatafile) + + if (run > length(ramres)) { + + print("... Started if-clause 1:1") + + # the it does not really exist + ramres[[run]] <- Ramanpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor) + save(ramres, file = ramandatafile) + + print("... Ended if-clause 1:1") + } + + print("... Ended if-clause 1") + + return(ramres) + } else { + # File does not exist, or override requested + + print("... Started else-clause 1") + + if (!exists("ramres")) { + ramres <- list() + print("... ramres list created") + } + + # Need to call Ramanpk() and save its results to file as above + ramres[[run]] <- Ramanpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor) + save(ramres, file = ramandatafile) + + print("... Ended else-clause 1") + + return(ramres) + } + +} \ No newline at end of file diff --git a/XRF-SPECTRO/xrfpk.R b/XRF-SPECTRO/xrfpk.R new file mode 100644 index 0000000..79e629e --- /dev/null +++ b/XRF-SPECTRO/xrfpk.R @@ -0,0 +1,104 @@ +xrfpk <- + function(data.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1, maxwdth = 200) { + + print("... Starting baseline fitting") + + data.basl <- baselinefit(data.exp, + tau = 2.0, + gam = gam, + scl.factor = scl.factor, + maxwdth = maxwdth) + + print("... Ended baseline fitting") + + # This loop deals with the output from baselinefit() + # It makes a "melted" dataframe in long form for each + # separated peak for some baseline parameters + data.pks <- data.frame() + data.pks.basl <- data.frame() + data.pks.pmg <- data.frame() + data.pks.spl <- data.frame() + peaks <- 1:length(data.basl$npks) + + + for (s in peaks) { + # recorded data in long form by separated peak + data.pks <- rbind(data.pks, # column names assigned after loop + data.frame(peak = factor(peaks[s]), + kernel = NA, + data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], ])) + # the calculated baseline in long form by separated peak + data.pks.basl <- rbind(data.pks.basl, + data.frame(peak = factor(peaks[s]), + kernel = NA, + x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1], + y = data.basl$baseline$basisl[data.basl$indlsep[s]:data.basl$indrsep[s]])) + # the taut string estimation in long form by separated peak + data.pks.pmg <- rbind(data.pks.pmg, + data.frame(peak = factor(peaks[s]), + kernel = NA, + x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1], + y = data.basl$pmg$fn[data.basl$indlsep[s]:data.basl$indrsep[s]])) + # the weighted smoothed spline in long form by separated peak + data.pks.spl <- rbind(data.pks.spl, + data.frame(peak = factor(peaks[s]), + kernel = NA, + x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1], + y = data.basl$spl$reg[data.basl$indlsep[s]:data.basl$indrsep[s]])) + } + + + + # Column names assigned to d.pks + names(data.pks) <- c("peak", "kernel", "x", "y") + + + # This loop calls pkdecompint() on each peak separately + # It makes a "melted" dataframe in long form for: + data.fit <- list() # holds pkdecompint output + data.fit.fitpk <- data.frame() # contains fitting curves + data.fit.parpk <- data.frame() # physical parameters by peak and kernel + data.fit.basl <- data.frame() # data with baseline removed + peaks <- 1:length(data.basl$npks) + for (s in peaks) { + ######## PKDECOMPINT ######## + if (length(kerpk) > 1) { + # set number of kernels per peak manually + data.fit[[s]] <- pkdecompint(data.basl, intnum = s, + k = kerpk[s], maxiter = fitmaxiter) + } else { + # use number of kernels determined by baselinefit() + data.fit[[s]] <- pkdecompint(data.basl, intnum = s, + k = data.basl$npks[s], maxiter = fitmaxiter) + } + # Setup the dataframe that makes up the peak table + for (kernel in 1:data.fit[[s]]$num.ker) { + data.fit.parpk <- rbind(data.fit.parpk, + data.frame(peak = factor(data.fit[[s]]$intnr), + kernel = factor(kernel), + x = data.fit[[s]]$parpks[kernel, "loc"], + height = data.fit[[s]]$parpks[kernel, "height"], + area = data.fit[[s]]$parpks[kernel, "intens"], + fwhm = data.fit[[s]]$parpks[kernel, "FWHM"], + m = data.fit[[s]]$parpks[kernel, "m"], + accept = data.fit[[s]]$accept)) + data.fit.fitpk <- rbind(data.fit.fitpk, + data.frame(peak = factor(peaks[s]), + kernel = factor(kernel), + x = data.fit[[s]]$x, + y = data.fit[[s]]$fitpk[kernel, ])) + } + data.fit.basl <- rbind(data.fit.basl, + data.frame(peak = factor(peaks[s]), + x = data.fit[[s]]$x, + y = data.fit[[s]]$y)) + } + + + + return(list(data.basl = data.basl, + data.peaks = data.pks, + data.fit.parpk = data.fit.parpk, + data.fit.fitpk = data.fit.fitpk, + data.fit.basl = data.fit.basl)) +} \ No newline at end of file diff --git a/XRF-SPECTRO/xrfpkWrapper.R b/XRF-SPECTRO/xrfpkWrapper.R new file mode 100644 index 0000000..f04cf92 --- /dev/null +++ b/XRF-SPECTRO/xrfpkWrapper.R @@ -0,0 +1,67 @@ +xrfpkWrapper <- + function(data.exp, run, override = FALSE, + kerpk = 1, fitmaxiter = 100, gam = 0.64, scl.factor = 0.06, maxwdth = 10) { + # kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) { + # the override flag is IN USE + + print("... Started xrfpkWrapper") + + # check if xrfpk has already completed successfully for the current job + current.dirname <- getwd() + print(current.dirname) + current.filename <- "xrf-peak-data.rda" + xrfdatafile <- paste(current.dirname, current.filename, sep = "/") + + + + if (file.exists(xrfdatafile) && !override) { + print("... Started if-clause 1") + + # File already exists + # return the data using load() or data() + + load(file = xrfdatafile) + + if (run > length(xrfres)) { + + print("... Started if-clause 1:1") + + # the it does not really exist + xrfres[[run]] <- xrfpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor, + maxwdth = maxwdth) + save(xrfres, file = xrfdatafile) + + print("... Ended if-clause 1:1") + } + + print("... Ended if-clause 1") + + return(xrfres) + } else { + + print("... Started else-clause 1") + + if (!exists("xrfres")) { + xrfres <- list() + print("... xrfres list created") + } + + # Need to call xrfpk() and save its results to file as above + xrfres[[run]] <- xrfpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor, + maxwdth = maxwdth) + save(xrfres, file = xrfdatafile) + + print("... Ended else-clause 1") + + return(xrfres) + } + +} \ No newline at end of file diff --git a/XRF-SPECTRO/xrfpkWrapper.tex b/XRF-SPECTRO/xrfpkWrapper.tex new file mode 100644 index 0000000..1c2ef2a --- /dev/null +++ b/XRF-SPECTRO/xrfpkWrapper.tex @@ -0,0 +1,3 @@ +I have been attempting to fine-tune the parameters of the \texttt{baselinefit()} function. + +Increasing the number of iterations above 100 have been useless. Small changes in \texttt{gam} leads to noticeable changes in the peak and kernel distribution. diff --git a/XRF-SPECTRO/xrfspectro2df.R b/XRF-SPECTRO/xrfspectro2df.R new file mode 100644 index 0000000..77ea0fb --- /dev/null +++ b/XRF-SPECTRO/xrfspectro2df.R @@ -0,0 +1,86 @@ +source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R") + +xrfspectro2df <- function(smpfile) { + ## Description: + ## Reads XRF textfile from XLAB SPECTRO XRF. + ## Stores data in data frame and parameters in an attributed dataframe. + ## Usage: + ## xrfspectro2df(smpfile) + ## Arguments: + ## smpfile: character string, the full filename + ## (with path) to one SMP file (ASCII). + ## Value: + ## A dataframe with attributed dataframe + # + filecon <- file(smpfile, "r") + smpcontents <- readLines(filecon, n = -1) #read all lines of input file + close(filecon) + # + sampleid <- ProvideSampleId(smpfile) + # + rgxp.data <- "^Kanal\\s[\\d]+:" + # + numrow.idx <- regexpr(rgxp.data, smpcontents, perl = TRUE) + # scrap the match.length attribute + attr(numrow.idx, "match.length") <- NULL + # + # Determine how many columns the data contains + smpdata.cols <- length(strsplit(smpcontents[which(numrow.idx == 1)][1], "\t")[[1]]) - 1 + # While we are at it, save row count to a variable as well + smpdata.rows <- length(smpcontents[which(numrow.idx == 1)]) + # strip prefix off each data row + #smpdata <- matrix(NA, ncol = smpdata.cols, nrow = smpdata.rows) + smpdata.txt <- vector(length = smpdata.rows) + for (i in 1:smpdata.rows) { + smpdata.txt[i] <- strsplit(smpcontents[which(numrow.idx == 1)][i], ":")[[1]][2] + } + smpdata.txt.clean <- gsub("^\\s", "", + gsub("\\t", " ", smpdata.txt)) + + zz <- textConnection(smpdata.txt.clean, "r") + ff <- data.frame(stringsAsFactors = FALSE, + sampleid = sampleid, + channel = seq(1, smpdata.rows), + matrix(scan(zz, what = numeric(), sep = " "), + ncol = smpdata.cols, byrow = TRUE)) + close(zz) + names(ff) <- c("sampleid", "channel", paste("Y", seq(1, smpdata.cols), sep = "")) + + + # + ### Collect attributes of this experiment + SMPattrEdit <- matrix(c("Voltage", "^Voltage:", + "Current", "^Current:", + "Target", "^Target:", + "Duration", "^Meas\\.\\sDuration:", + "Impulse", "^Imp\\.\\sRate:", + "DeadTime", "^Rel\\.\\sDead\\sTime:", + "FirstChannel", "^First\\sChannel:", + "LastChannel", "^Last\\sChannel:", + "PeakTime", "^Peak\\sTime:", + "Gain", "^Gain:", + "ZeroPeak", "^Zero\\sPeak:"), + ncol = 2, byrow = T) + + SMPattr <- matrix(NA, nrow = smpdata.cols + 1, ncol = dim(SMPattrEdit)[1]) + + for (c in 1:dim(SMPattrEdit)[1]) { + SMPattr[1, c] <- SMPattrEdit[c, 1] + SMPattr[2:dim(SMPattr)[1], c] <- + matrix(strsplit(gsub("^\\t", "", + strsplit(smpcontents[which(regexpr(SMPattrEdit[c, 2], smpcontents) == 1)], + ":")[[1]][2]), "\\t")[[1]], ncol = smpdata.cols) + } + SMPdf <- data.frame(stringsAsFactors = FALSE, + SMPattr[2:dim(SMPattr)[1], ]) + colnames(SMPdf) <- SMPattr[1, ] + + + ### Now calculate the energy (keV) scale (convert from channels to energy) + ff$X <- ff$channel * (as.numeric(SMPdf$Gain[1]) / as.numeric(SMPdf$LastChannel[1])) + + # Attach parameters to returned dataframe + attr(ff, "parameters") <- SMPdf + # + return(ff) +} \ No newline at end of file diff --git a/common/ProvideSampleId.R b/common/ProvideSampleId.R index 6b8b7c1..6f1449e 100644 --- a/common/ProvideSampleId.R +++ b/common/ProvideSampleId.R @@ -1,25 +1,34 @@ -################################################## -################ ProvideSampleId ################# -################################################## -ProvideSampleId <- function (fullpathwithfilename) { - ### OBS! Only very rudimentary error-checking. - ### If the filename is formatted as \w*-\w*-\w*, we use the middle segment, - ### otherwise we use the whole string (excluding the extension) - # Extract the name of the parent directory of the datafilename argument - substrateid <- basename(dirname(fullpathwithfilename)) - # Extract the name of the method from the filename-part - # First split the filename over all hyphens - nameparts <- strsplit(basename(fullpathwithfilename), "-")[[1]] - # If the number of nameparts exceed 3, save the whole filename as methodid, otherwise use the middle part - if (length(nameparts) > 3) { - # We need to lose the file extension from the last namepart - nameparts[length(nameparts)] <- strsplit(nameparts[length(nameparts)], "\\.")[[1]][1] - methodid <- paste(nameparts, collapse = "-") - } else { - methodid <- nameparts[2] - } - # Make an informative sampleid - sampleid <- paste(substrateid, methodid, sep = "-") - # - return(sampleid) +ProvideSampleId <- function (pathexpfile) { + # Returns a "unique" sample ID when supplied + # with a path to an experimental file. + ## Note: the sample ID must derive directly from the file or path. + + sampleid <- sub("\\.[\\w]+$", "", basename(pathexpfile), perl = TRUE) + + #### The code below is the old ProvideSampleId() function + # ### OBS! Only very rudimentary error-checking. + # ### If the filename is formatted as \w*-\w*-\w*, we use the middle segment, + # ### otherwise we use the whole string (excluding the extension) + # # Extract the name of the parent directory of the datafilename argument + # substrateid <- basename(dirname(fullpathwithfilename)) + # # Extract the name of the method from the filename-part + # # First split the filename over all hyphens + # nameparts <- strsplit(basename(fullpathwithfilename), "-")[[1]] + # # If the number of nameparts exceed 3, save the whole filename + # # as methodid, otherwise use the middle part + # if (length(nameparts) > 3) { + # # We need to lose the file extension from the last namepart + # nameparts[length(nameparts)] <- + # strsplit(nameparts[length(nameparts)], "\\.")[[1]][1] + # methodid <- paste(nameparts, collapse = "-") + # } else { + # methodid <- nameparts[2] + # } + # # Make an informative sampleid + # sampleid <- paste(substrateid, methodid, sep = "-") + # # + # return(sampleid) + #### + + return(sampleid) } \ No newline at end of file diff --git a/common/ProvideSampleId.tex b/common/ProvideSampleId.tex new file mode 100644 index 0000000..c7d96c3 --- /dev/null +++ b/common/ProvideSampleId.tex @@ -0,0 +1,6 @@ +The following function, \Rfun{ProvideSampleId()}, strives to supply a unique sample ID from any path supplied to it. Of course, a certain structure on the part of the path and filename are assumed, namely: +\begin{itemize} +\item That the filename (not including the extension) consists of three alphanumeric strings separated by hyphens. +\item That\ldots +\end{itemize} +