Solved the issue with repeating diffractometry analyses by wrapper functions.

Caught some errors and made some improvements in the other functions.
master
Taha Ahmed 13 years ago
parent 4fff82be0c
commit beab0b9e64

@ -1,10 +1,25 @@
source("/home/taha/chepec/chetex/common/R/common.R") source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
##################################################
################### Raman2df #######################
##################################################
Raman2df <- function(datafilename) { Raman2df <- function(datafilename) {
# Function description: for reading Raman spectrum into dataframe ## Description:
## Reads Raman data in ASCII format
## (wavenumber, counts)
## and returns a dataframe with the original data,
## as well as interpolated wavenumber and counts values
## (interpolated data is evenly spaced along x-axis)
## Usage:
## Raman2df(datafilename)
## Arguments:
## datafilename: text string with full path to experimental file
## Value:
## Dataframe with the following columns (and no extra attributes):
## $ sampleid : chr (id)
## $ wavenum : num (measure)
## $ counts : num (measure)
## $ wnum.interp : num (measure)
## $ cts.interp : num (measure)
## Note:
##
# #
datafile <- file(datafilename, "r") datafile <- file(datafilename, "r")
chifile <- readLines(datafile, n = -1) #read all lines of input file chifile <- readLines(datafile, n = -1) #read all lines of input file
@ -20,15 +35,21 @@ Raman2df <- function(datafilename) {
matrix(scan(zz, what = numeric(), sep = "\t"), matrix(scan(zz, what = numeric(), sep = "\t"),
ncol = 2, byrow = T))) ncol = 2, byrow = T)))
close(zz) close(zz)
names(ff) <- c("sampleid", "shift", "counts") names(ff) <- c("sampleid", "wavenum", "counts")
# Re-order by increasing shift # Sort dataframe by increasing wavenumbers
ff <- ff[order(ff$shift), ] ff <- ff[order(ff$wavenum), ]
# And fix the row.names # ... sort the rownames as well
row.names(ff) <- seq(1, dim(ff)[1]) row.names(ff) <- seq(1, dim(ff)[1])
# Do not re-calculate the spectrum with evenly spaced points here! # Add interpolated, evenly spaced data to dataframe
# You must first remove cosmic peaks, and as long as that is done ff <- cbind(ff,
# manually, re-calculation to evenly spaced shifts must also be wnum.interp = approx(x = ff$wavenum,
# done manually. y = ff$counts,
method = "linear",
n = length(ff$wavenum))$x,
cts.interp = approx(x = ff$wavenum,
y = ff$counts,
method = "linear",
n = length(ff$wavenum))$y)
## ##
return(ff) return(ff)
} }

@ -0,0 +1,64 @@
RamanWrapper <-
function(data.exp, run, override = FALSE,
kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
# the override flag is currently not used
print("... Started RamanWrapper")
# check if Ramanpk has already completed successfully for the current job
current.dirname <- getwd()
print(current.dirname)
current.filename <- "raman-peak-data.rda"
ramandatafile <- paste(current.dirname, current.filename, sep = "/")
if (file.exists(ramandatafile)) {
print("... Started if-clause 1")
# File already exists
# return the data using load() or data()
load(file = ramandatafile)
if (run > length(ramres)) {
print("... Started if-clause 1:1")
# the it does not really exist
ramres[[run]] <- Ramanpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor)
save(ramres, file = ramandatafile)
print("... Ended if-clause 1:1")
}
print("... Ended if-clause 1")
return(ramres)
} else {
print("... Started else-clause 1")
if (!exists("ramres")) {
ramres <- list()
print("... ramres list created")
}
# Need to call Ramanpk() and save its results to file as above
ramres[[run]] <- Ramanpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor)
save(ramres, file = ramandatafile)
print("... Ended else-clause 1")
return(ramres)
}
}

@ -0,0 +1,4 @@
I wanted to run the \texttt{diffractometry} peak-analysis code (which can be both time-consuming and prone to stray errors halting compilation) without the need to re-run the peak analysis every time the document is re-compiled.
For that purpose, I created this wrapper function.
Its job is to call \Rfun{Ramanpk()} only if necessary (i.e., peak analysis has not already been performed). That is made possible by saving the results of a successful analysis to a \texttt{raman-peak-data.rda} file in the directory of the report.

@ -1,95 +1,104 @@
source("/home/taha/chepec/chetex/common/R/common.R") Ramanpk <-
function(data.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
##################################################
################## Ramanpk #######################
##################################################
Ramanpk <- function(Raman.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
Raman.base <- baselinefit(Raman.exp, tau = 2.0, gam = gam, scl.factor = scl.factor, maxwdth = 200) print("... Starting baseline fitting")
data.basl <- baselinefit(data.exp,
tau = 2.0,
gam = gam,
scl.factor = scl.factor,
maxwdth = 200)
print("... Ended baseline fitting")
# This loop deals with the output from baselinefit() # This loop deals with the output from baselinefit()
# It makes a "melted" dataframe in long form for each # It makes a "melted" dataframe in long form for each
# separated peak for some baseline parameters # separated peak for some baseline parameters
Raman.pks <- data.frame() data.pks <- data.frame()
Raman.pks.basl <- data.frame() data.pks.basl <- data.frame()
Raman.pks.pmg <- data.frame() data.pks.pmg <- data.frame()
Raman.pks.spl <- data.frame() data.pks.spl <- data.frame()
peaks <- 1:length(Raman.base$npks) peaks <- 1:length(data.basl$npks)
for (s in peaks) { for (s in peaks) {
# recorded data in long form by separated peak # recorded data in long form by separated peak
Raman.pks <- rbind(Raman.pks, # column names assigned after loop data.pks <- rbind(data.pks, # column names assigned after loop
data.frame(peak = factor(peaks[s]), data.frame(peak = factor(peaks[s]),
kernel = NA, kernel = NA,
Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s], ])) data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], ]))
# the calculated baseline in long form by separated peak # the calculated baseline in long form by separated peak
Raman.pks.basl <- rbind(Raman.pks.basl, data.pks.basl <- rbind(data.pks.basl,
data.frame(peak = factor(peaks[s]), data.frame(peak = factor(peaks[s]),
kernel = NA, kernel = NA,
x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]], x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
y = Raman.base$baseline$basisl[Raman.base$indlsep[s]:Raman.base$indrsep[s]])) y = data.basl$baseline$basisl[data.basl$indlsep[s]:data.basl$indrsep[s]]))
# the taut string estimation in long form by separated peak # the taut string estimation in long form by separated peak
Raman.pks.pmg <- rbind(Raman.pks.pmg, data.pks.pmg <- rbind(data.pks.pmg,
data.frame(peak = factor(peaks[s]), data.frame(peak = factor(peaks[s]),
kernel = NA, kernel = NA,
x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]], x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
y = Raman.base$pmg$fn[Raman.base$indlsep[s]:Raman.base$indrsep[s]])) y = data.basl$pmg$fn[data.basl$indlsep[s]:data.basl$indrsep[s]]))
# the weighted smoothed spline in long form by separated peak # the weighted smoothed spline in long form by separated peak
Raman.pks.spl <- rbind(Raman.pks.spl, data.pks.spl <- rbind(data.pks.spl,
data.frame(peak = factor(peaks[s]), data.frame(peak = factor(peaks[s]),
kernel = NA, kernel = NA,
x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]], x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
y = Raman.base$spl$reg[Raman.base$indlsep[s]:Raman.base$indrsep[s]])) y = data.basl$spl$reg[data.basl$indlsep[s]:data.basl$indrsep[s]]))
} }
# Column names assigned to d.pks # Column names assigned to d.pks
names(Raman.pks) <- c("peak", "kernel", "x", "y") names(data.pks) <- c("peak", "kernel", "x", "y")
# This loop calls pkdecompint() on each peak separately # This loop calls pkdecompint() on each peak separately
# It makes a "melted" dataframe in long form for: # It makes a "melted" dataframe in long form for:
Raman.fit <- list() # holds pkdecompint output data.fit <- list() # holds pkdecompint output
Raman.fit.fitpk <- data.frame() # contains fitting curves data.fit.fitpk <- data.frame() # contains fitting curves
Raman.fit.parpk <- data.frame() # physical parameters by peak and kernel data.fit.parpk <- data.frame() # physical parameters by peak and kernel
Raman.nobasl <- data.frame() # data with baseline removed data.fit.basl <- data.frame() # data with baseline removed
peaks <- 1:length(Raman.base$npks) peaks <- 1:length(data.basl$npks)
for (s in peaks) { for (s in peaks) {
######## PKDECOMPINT ######## ######## PKDECOMPINT ########
if (length(kerpk) > 1) { if (length(kerpk) > 1) {
# set number of kernels per peak manually # set number of kernels per peak manually
Raman.fit[[s]] <- pkdecompint(Raman.base, intnum = s, data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
k = kerpk[s], maxiter = fitmaxiter) k = kerpk[s], maxiter = fitmaxiter)
} else { } else {
# use number of kernels determined by baselinefit() # use number of kernels determined by baselinefit()
Raman.fit[[s]] <- pkdecompint(Raman.base, intnum = s, data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
k = Raman.base$npks[s], maxiter = fitmaxiter) k = data.basl$npks[s], maxiter = fitmaxiter)
} }
# Setup the dataframe that makes up the peak table # Setup the dataframe that makes up the peak table
for (kernel in 1:Raman.fit[[s]]$num.ker) { for (kernel in 1:data.fit[[s]]$num.ker) {
Raman.fit.parpk <- rbind(Raman.fit.parpk, data.fit.parpk <- rbind(data.fit.parpk,
data.frame(peak = factor(Raman.fit[[s]]$intnr), data.frame(peak = factor(data.fit[[s]]$intnr),
kernel = factor(kernel), kernel = factor(kernel),
x = Raman.fit[[s]]$parpks[kernel, "loc"], x = data.fit[[s]]$parpks[kernel, "loc"],
height = Raman.fit[[s]]$parpks[kernel, "height"], height = data.fit[[s]]$parpks[kernel, "height"],
area = Raman.fit[[s]]$parpks[kernel, "intens"], area = data.fit[[s]]$parpks[kernel, "intens"],
fwhm = Raman.fit[[s]]$parpks[kernel, "FWHM"], fwhm = data.fit[[s]]$parpks[kernel, "FWHM"],
m = Raman.fit[[s]]$parpks[kernel, "m"], m = data.fit[[s]]$parpks[kernel, "m"],
accept = Raman.fit[[s]]$accept)) accept = data.fit[[s]]$accept))
Raman.fit.fitpk <- rbind(Raman.fit.fitpk, data.fit.fitpk <- rbind(data.fit.fitpk,
data.frame(peak = factor(peaks[s]), data.frame(peak = factor(peaks[s]),
kernel = factor(kernel), kernel = factor(kernel),
x = Raman.fit[[s]]$x, x = data.fit[[s]]$x,
y = Raman.fit[[s]]$fitpk[kernel, ])) y = data.fit[[s]]$fitpk[kernel, ]))
} }
Raman.nobasl <- rbind(Raman.nobasl, data.fit.basl <- rbind(data.fit.basl,
data.frame(peak = factor(peaks[s]), data.frame(peak = factor(peaks[s]),
x = Raman.fit[[s]]$x, x = data.fit[[s]]$x,
y = Raman.fit[[s]]$y)) y = data.fit[[s]]$y))
} }
return(list(Raman.base = Raman.base, return(list(data.basl = data.basl,
Raman.peaks = Raman.pks, data.peaks = data.pks,
Raman.fit.parpk = Raman.fit.parpk, data.fit.parpk = data.fit.parpk,
Raman.fit.fitpk = Raman.fit.fitpk, data.fit.fitpk = data.fit.fitpk,
Raman.nobasl = Raman.nobasl)) data.fit.basl = data.fit.basl))
} }

@ -0,0 +1,65 @@
SiliconWrapper <-
function(data.exp, run, override = FALSE,
kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
# the override flag is currently not used
print("... Started SiliconWrapper")
# check if Ramanpk has already completed successfully for the current job
current.dirname <- getwd()
print(current.dirname)
current.filename <- "silicon-peak-data.rda"
ramandatafile <- paste(current.dirname, current.filename, sep = "/")
if (file.exists(ramandatafile) && !override) {
print("... Started if-clause 1")
# File already exists
# return the data using load() or data()
load(file = ramandatafile)
if (run > length(ramres)) {
print("... Started if-clause 1:1")
# the it does not really exist
ramres[[run]] <- Ramanpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor)
save(ramres, file = ramandatafile)
print("... Ended if-clause 1:1")
}
print("... Ended if-clause 1")
return(ramres)
} else {
# File does not exist, or override requested
print("... Started else-clause 1")
if (!exists("ramres")) {
ramres <- list()
print("... ramres list created")
}
# Need to call Ramanpk() and save its results to file as above
ramres[[run]] <- Ramanpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor)
save(ramres, file = ramandatafile)
print("... Ended else-clause 1")
return(ramres)
}
}

@ -0,0 +1,104 @@
xrfpk <-
function(data.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1, maxwdth = 200) {
print("... Starting baseline fitting")
data.basl <- baselinefit(data.exp,
tau = 2.0,
gam = gam,
scl.factor = scl.factor,
maxwdth = maxwdth)
print("... Ended baseline fitting")
# This loop deals with the output from baselinefit()
# It makes a "melted" dataframe in long form for each
# separated peak for some baseline parameters
data.pks <- data.frame()
data.pks.basl <- data.frame()
data.pks.pmg <- data.frame()
data.pks.spl <- data.frame()
peaks <- 1:length(data.basl$npks)
for (s in peaks) {
# recorded data in long form by separated peak
data.pks <- rbind(data.pks, # column names assigned after loop
data.frame(peak = factor(peaks[s]),
kernel = NA,
data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], ]))
# the calculated baseline in long form by separated peak
data.pks.basl <- rbind(data.pks.basl,
data.frame(peak = factor(peaks[s]),
kernel = NA,
x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
y = data.basl$baseline$basisl[data.basl$indlsep[s]:data.basl$indrsep[s]]))
# the taut string estimation in long form by separated peak
data.pks.pmg <- rbind(data.pks.pmg,
data.frame(peak = factor(peaks[s]),
kernel = NA,
x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
y = data.basl$pmg$fn[data.basl$indlsep[s]:data.basl$indrsep[s]]))
# the weighted smoothed spline in long form by separated peak
data.pks.spl <- rbind(data.pks.spl,
data.frame(peak = factor(peaks[s]),
kernel = NA,
x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
y = data.basl$spl$reg[data.basl$indlsep[s]:data.basl$indrsep[s]]))
}
# Column names assigned to d.pks
names(data.pks) <- c("peak", "kernel", "x", "y")
# This loop calls pkdecompint() on each peak separately
# It makes a "melted" dataframe in long form for:
data.fit <- list() # holds pkdecompint output
data.fit.fitpk <- data.frame() # contains fitting curves
data.fit.parpk <- data.frame() # physical parameters by peak and kernel
data.fit.basl <- data.frame() # data with baseline removed
peaks <- 1:length(data.basl$npks)
for (s in peaks) {
######## PKDECOMPINT ########
if (length(kerpk) > 1) {
# set number of kernels per peak manually
data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
k = kerpk[s], maxiter = fitmaxiter)
} else {
# use number of kernels determined by baselinefit()
data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
k = data.basl$npks[s], maxiter = fitmaxiter)
}
# Setup the dataframe that makes up the peak table
for (kernel in 1:data.fit[[s]]$num.ker) {
data.fit.parpk <- rbind(data.fit.parpk,
data.frame(peak = factor(data.fit[[s]]$intnr),
kernel = factor(kernel),
x = data.fit[[s]]$parpks[kernel, "loc"],
height = data.fit[[s]]$parpks[kernel, "height"],
area = data.fit[[s]]$parpks[kernel, "intens"],
fwhm = data.fit[[s]]$parpks[kernel, "FWHM"],
m = data.fit[[s]]$parpks[kernel, "m"],
accept = data.fit[[s]]$accept))
data.fit.fitpk <- rbind(data.fit.fitpk,
data.frame(peak = factor(peaks[s]),
kernel = factor(kernel),
x = data.fit[[s]]$x,
y = data.fit[[s]]$fitpk[kernel, ]))
}
data.fit.basl <- rbind(data.fit.basl,
data.frame(peak = factor(peaks[s]),
x = data.fit[[s]]$x,
y = data.fit[[s]]$y))
}
return(list(data.basl = data.basl,
data.peaks = data.pks,
data.fit.parpk = data.fit.parpk,
data.fit.fitpk = data.fit.fitpk,
data.fit.basl = data.fit.basl))
}

@ -0,0 +1,67 @@
xrfpkWrapper <-
function(data.exp, run, override = FALSE,
kerpk = 1, fitmaxiter = 100, gam = 0.64, scl.factor = 0.06, maxwdth = 10) {
# kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
# the override flag is IN USE
print("... Started xrfpkWrapper")
# check if xrfpk has already completed successfully for the current job
current.dirname <- getwd()
print(current.dirname)
current.filename <- "xrf-peak-data.rda"
xrfdatafile <- paste(current.dirname, current.filename, sep = "/")
if (file.exists(xrfdatafile) && !override) {
print("... Started if-clause 1")
# File already exists
# return the data using load() or data()
load(file = xrfdatafile)
if (run > length(xrfres)) {
print("... Started if-clause 1:1")
# the it does not really exist
xrfres[[run]] <- xrfpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor,
maxwdth = maxwdth)
save(xrfres, file = xrfdatafile)
print("... Ended if-clause 1:1")
}
print("... Ended if-clause 1")
return(xrfres)
} else {
print("... Started else-clause 1")
if (!exists("xrfres")) {
xrfres <- list()
print("... xrfres list created")
}
# Need to call xrfpk() and save its results to file as above
xrfres[[run]] <- xrfpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor,
maxwdth = maxwdth)
save(xrfres, file = xrfdatafile)
print("... Ended else-clause 1")
return(xrfres)
}
}

@ -0,0 +1,3 @@
I have been attempting to fine-tune the parameters of the \texttt{baselinefit()} function.
Increasing the number of iterations above 100 have been useless. Small changes in \texttt{gam} leads to noticeable changes in the peak and kernel distribution.

@ -0,0 +1,86 @@
source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
xrfspectro2df <- function(smpfile) {
## Description:
## Reads XRF textfile from XLAB SPECTRO XRF.
## Stores data in data frame and parameters in an attributed dataframe.
## Usage:
## xrfspectro2df(smpfile)
## Arguments:
## smpfile: character string, the full filename
## (with path) to one SMP file (ASCII).
## Value:
## A dataframe with attributed dataframe
#
filecon <- file(smpfile, "r")
smpcontents <- readLines(filecon, n = -1) #read all lines of input file
close(filecon)
#
sampleid <- ProvideSampleId(smpfile)
#
rgxp.data <- "^Kanal\\s[\\d]+:"
#
numrow.idx <- regexpr(rgxp.data, smpcontents, perl = TRUE)
# scrap the match.length attribute
attr(numrow.idx, "match.length") <- NULL
#
# Determine how many columns the data contains
smpdata.cols <- length(strsplit(smpcontents[which(numrow.idx == 1)][1], "\t")[[1]]) - 1
# While we are at it, save row count to a variable as well
smpdata.rows <- length(smpcontents[which(numrow.idx == 1)])
# strip prefix off each data row
#smpdata <- matrix(NA, ncol = smpdata.cols, nrow = smpdata.rows)
smpdata.txt <- vector(length = smpdata.rows)
for (i in 1:smpdata.rows) {
smpdata.txt[i] <- strsplit(smpcontents[which(numrow.idx == 1)][i], ":")[[1]][2]
}
smpdata.txt.clean <- gsub("^\\s", "",
gsub("\\t", " ", smpdata.txt))
zz <- textConnection(smpdata.txt.clean, "r")
ff <- data.frame(stringsAsFactors = FALSE,
sampleid = sampleid,
channel = seq(1, smpdata.rows),
matrix(scan(zz, what = numeric(), sep = " "),
ncol = smpdata.cols, byrow = TRUE))
close(zz)
names(ff) <- c("sampleid", "channel", paste("Y", seq(1, smpdata.cols), sep = ""))
#
### Collect attributes of this experiment
SMPattrEdit <- matrix(c("Voltage", "^Voltage:",
"Current", "^Current:",
"Target", "^Target:",
"Duration", "^Meas\\.\\sDuration:",
"Impulse", "^Imp\\.\\sRate:",
"DeadTime", "^Rel\\.\\sDead\\sTime:",
"FirstChannel", "^First\\sChannel:",
"LastChannel", "^Last\\sChannel:",
"PeakTime", "^Peak\\sTime:",
"Gain", "^Gain:",
"ZeroPeak", "^Zero\\sPeak:"),
ncol = 2, byrow = T)
SMPattr <- matrix(NA, nrow = smpdata.cols + 1, ncol = dim(SMPattrEdit)[1])
for (c in 1:dim(SMPattrEdit)[1]) {
SMPattr[1, c] <- SMPattrEdit[c, 1]
SMPattr[2:dim(SMPattr)[1], c] <-
matrix(strsplit(gsub("^\\t", "",
strsplit(smpcontents[which(regexpr(SMPattrEdit[c, 2], smpcontents) == 1)],
":")[[1]][2]), "\\t")[[1]], ncol = smpdata.cols)
}
SMPdf <- data.frame(stringsAsFactors = FALSE,
SMPattr[2:dim(SMPattr)[1], ])
colnames(SMPdf) <- SMPattr[1, ]
### Now calculate the energy (keV) scale (convert from channels to energy)
ff$X <- ff$channel * (as.numeric(SMPdf$Gain[1]) / as.numeric(SMPdf$LastChannel[1]))
# Attach parameters to returned dataframe
attr(ff, "parameters") <- SMPdf
#
return(ff)
}

@ -1,25 +1,34 @@
################################################## ProvideSampleId <- function (pathexpfile) {
################ ProvideSampleId ################# # Returns a "unique" sample ID when supplied
################################################## # with a path to an experimental file.
ProvideSampleId <- function (fullpathwithfilename) { ## Note: the sample ID must derive directly from the file or path.
### OBS! Only very rudimentary error-checking.
### If the filename is formatted as \w*-\w*-\w*, we use the middle segment, sampleid <- sub("\\.[\\w]+$", "", basename(pathexpfile), perl = TRUE)
### otherwise we use the whole string (excluding the extension)
# Extract the name of the parent directory of the datafilename argument #### The code below is the old ProvideSampleId() function
substrateid <- basename(dirname(fullpathwithfilename)) # ### OBS! Only very rudimentary error-checking.
# Extract the name of the method from the filename-part # ### If the filename is formatted as \w*-\w*-\w*, we use the middle segment,
# First split the filename over all hyphens # ### otherwise we use the whole string (excluding the extension)
nameparts <- strsplit(basename(fullpathwithfilename), "-")[[1]] # # Extract the name of the parent directory of the datafilename argument
# If the number of nameparts exceed 3, save the whole filename as methodid, otherwise use the middle part # substrateid <- basename(dirname(fullpathwithfilename))
if (length(nameparts) > 3) { # # Extract the name of the method from the filename-part
# We need to lose the file extension from the last namepart # # First split the filename over all hyphens
nameparts[length(nameparts)] <- strsplit(nameparts[length(nameparts)], "\\.")[[1]][1] # nameparts <- strsplit(basename(fullpathwithfilename), "-")[[1]]
methodid <- paste(nameparts, collapse = "-") # # If the number of nameparts exceed 3, save the whole filename
} else { # # as methodid, otherwise use the middle part
methodid <- nameparts[2] # if (length(nameparts) > 3) {
} # # We need to lose the file extension from the last namepart
# Make an informative sampleid # nameparts[length(nameparts)] <-
sampleid <- paste(substrateid, methodid, sep = "-") # strsplit(nameparts[length(nameparts)], "\\.")[[1]][1]
# # methodid <- paste(nameparts, collapse = "-")
return(sampleid) # } else {
# methodid <- nameparts[2]
# }
# # Make an informative sampleid
# sampleid <- paste(substrateid, methodid, sep = "-")
# #
# return(sampleid)
####
return(sampleid)
} }

@ -0,0 +1,6 @@
The following function, \Rfun{ProvideSampleId()}, strives to supply a unique sample ID from any path supplied to it. Of course, a certain structure on the part of the path and filename are assumed, namely:
\begin{itemize}
\item That the filename (not including the extension) consists of three alphanumeric strings separated by hyphens.
\item That\ldots
\end{itemize}
Loading…
Cancel
Save