Solved the issue with repeating diffractometry analyses by wrapper functions.

Caught some errors and made some improvements in the other functions.
14 years ago · beab0b9e64
parent 4fff82be0c
commit beab0b9e64
11 changed files with 527 additions and 89 deletions
--- a/Renishaw/Raman2df.R
+++ b/Renishaw/Raman2df.R
@ -1,10 +1,25 @@
-source("/home/taha/chepec/chetex/common/R/common.R")
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")

-##################################################
-################### Raman2df #######################
-##################################################
 Raman2df <- function(datafilename) {
-   # Function description: for reading Raman spectrum into dataframe
+   ## Description:
+   ##   Reads Raman data in ASCII format 
+   ##   (wavenumber, counts)
+   ##   and returns a dataframe with the original data, 
+   ##   as well as interpolated wavenumber and counts values
+   ##   (interpolated data is evenly spaced along x-axis)
+   ## Usage:
+   ##   Raman2df(datafilename)
+   ## Arguments:
+   ##   datafilename: text string with full path to experimental file
+   ## Value:
+   ##   Dataframe with the following columns (and no extra attributes):
+   ##   $ sampleid        : chr (id)
+   ##   $ wavenum         : num (measure)
+   ##   $ counts          : num (measure)
+   ##   $ wnum.interp     : num (measure)
+   ##   $ cts.interp      : num (measure)
+   ## Note:
+   ##   
   #
   datafile <- file(datafilename, "r")
   chifile <- readLines(datafile, n = -1) #read all lines of input file
@ -20,15 +35,21 @@ Raman2df <- function(datafilename) {
            matrix(scan(zz, what = numeric(), sep = "\t"),
            ncol = 2, byrow = T)))
   close(zz)
-   names(ff) <- c("sampleid", "shift", "counts")
-   # Re-order by increasing shift
-   ff <- ff[order(ff$shift), ]
-   # And fix the row.names
+   names(ff) <- c("sampleid", "wavenum", "counts")
+   # Sort dataframe by increasing wavenumbers
+   ff <- ff[order(ff$wavenum), ]
+   # ... sort the rownames as well
   row.names(ff) <- seq(1, dim(ff)[1])
-   # Do not re-calculate the spectrum with evenly spaced points here!
-   # You must first remove cosmic peaks, and as long as that is done
-   # manually, re-calculation to evenly spaced shifts must also be
-   # done manually.
+   # Add interpolated, evenly spaced data to dataframe
+   ff <- cbind(ff, 
+               wnum.interp = approx(x = ff$wavenum, 
+                                    y = ff$counts, 
+                                    method = "linear", 
+                                    n = length(ff$wavenum))$x,
+               cts.interp = approx(x = ff$wavenum, 
+                                    y = ff$counts, 
+                                    method = "linear", 
+                                    n = length(ff$wavenum))$y)
   ##
   return(ff)
 }
--- a/Renishaw/RamanWrapper.R
+++ b/Renishaw/RamanWrapper.R
@ -0,0 +1,64 @@
+RamanWrapper <- 
+   function(data.exp, run, override = FALSE, 
+            kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) { 
+   # the override flag is currently not used
+      
+   print("... Started RamanWrapper")
+      
+   # check if Ramanpk has already completed successfully for the current job
+   current.dirname <- getwd()
+   print(current.dirname)
+   current.filename <- "raman-peak-data.rda"
+   ramandatafile <- paste(current.dirname, current.filename, sep = "/")
+   
+   
+   
+   if (file.exists(ramandatafile)) {
+      print("... Started if-clause 1")
+      
+      # File already exists
+      # return the data using load() or data()
+      
+      load(file = ramandatafile)
+            
+      if (run > length(ramres)) {
+         
+         print("... Started if-clause 1:1")
+         
+         # the it does not really exist
+         ramres[[run]] <- Ramanpk(data.exp, 
+                               kerpk = kerpk, 
+                               fitmaxiter = fitmaxiter, 
+                               gam = gam, 
+                               scl.factor = scl.factor)
+         save(ramres, file = ramandatafile)
+         
+         print("... Ended if-clause 1:1")
+      }
+      
+      print("... Ended if-clause 1")
+      
+      return(ramres)
+   } else {
+      
+      print("... Started else-clause 1")
+      
+      if (!exists("ramres")) {
+         ramres <- list()
+         print("... ramres list created")
+      }   
+      
+      # Need to call Ramanpk() and save its results to file as above
+      ramres[[run]] <- Ramanpk(data.exp, 
+                               kerpk = kerpk, 
+                               fitmaxiter = fitmaxiter, 
+                               gam = gam, 
+                               scl.factor = scl.factor)
+      save(ramres, file = ramandatafile)
+      
+      print("... Ended else-clause 1")
+      
+      return(ramres)
+   }
+      
+}
--- a/Renishaw/RamanWrapper.tex
+++ b/Renishaw/RamanWrapper.tex
@ -0,0 +1,4 @@
+I wanted to run the \texttt{diffractometry} peak-analysis code (which can be both time-consuming and prone to stray errors halting compilation) without the need to re-run the peak analysis every time the document is re-compiled.
+
+For that purpose, I created this wrapper function.
+Its job is to call \Rfun{Ramanpk()} only if necessary (i.e., peak analysis has not already been performed). That is made possible by saving the results of a successful analysis to a \texttt{raman-peak-data.rda} file in the directory of the report.
--- a/Renishaw/Ramanpk.R
+++ b/Renishaw/Ramanpk.R
@ -1,95 +1,104 @@
-source("/home/taha/chepec/chetex/common/R/common.R")
-
-##################################################
-################## Ramanpk #######################
-##################################################
-Ramanpk <- function(Raman.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
+Ramanpk <- 
+   function(data.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
   
-   Raman.base <- baselinefit(Raman.exp, tau = 2.0, gam = gam, scl.factor = scl.factor, maxwdth = 200)
+   print("... Starting baseline fitting")
+      
+   data.basl <- baselinefit(data.exp, 
+                            tau = 2.0, 
+                            gam = gam, 
+                            scl.factor = scl.factor, 
+                            maxwdth = 200)
+   
+   print("... Ended baseline fitting")
   
   # This loop deals with the output from baselinefit()
   # It makes a "melted" dataframe in long form for each 
   # separated peak for some baseline parameters
-   Raman.pks      <- data.frame()
-   Raman.pks.basl <- data.frame()
-   Raman.pks.pmg  <- data.frame()
-   Raman.pks.spl  <- data.frame()
-   peaks <- 1:length(Raman.base$npks)
+   data.pks      <- data.frame()
+   data.pks.basl <- data.frame()
+   data.pks.pmg  <- data.frame()
+   data.pks.spl  <- data.frame()
+   peaks <- 1:length(data.basl$npks)
+   
+   
   for (s in peaks) {
      # recorded data in long form by separated peak
-      Raman.pks <- rbind(Raman.pks, # column names assigned after loop
+      data.pks <- rbind(data.pks, # column names assigned after loop
         data.frame(peak = factor(peaks[s]),
            kernel = NA,
-            Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s], ]))
+            data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], ]))
      # the calculated baseline in long form by separated peak
-      Raman.pks.basl <- rbind(Raman.pks.basl, 
+      data.pks.basl <- rbind(data.pks.basl, 
         data.frame(peak = factor(peaks[s]),
            kernel = NA,
-            x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]],
-            y = Raman.base$baseline$basisl[Raman.base$indlsep[s]:Raman.base$indrsep[s]]))
+            x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
+            y = data.basl$baseline$basisl[data.basl$indlsep[s]:data.basl$indrsep[s]]))
      # the taut string estimation in long form by separated peak
-      Raman.pks.pmg <- rbind(Raman.pks.pmg,
+      data.pks.pmg <- rbind(data.pks.pmg,
         data.frame(peak = factor(peaks[s]),
            kernel = NA,
-            x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]],
-            y = Raman.base$pmg$fn[Raman.base$indlsep[s]:Raman.base$indrsep[s]]))
+            x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
+            y = data.basl$pmg$fn[data.basl$indlsep[s]:data.basl$indrsep[s]]))
      # the weighted smoothed spline in long form by separated peak
-      Raman.pks.spl <- rbind(Raman.pks.spl,
+      data.pks.spl <- rbind(data.pks.spl,
         data.frame(peak = factor(peaks[s]),
            kernel = NA,
-            x = Raman.exp[Raman.base$indlsep[s]:Raman.base$indrsep[s]],
-            y = Raman.base$spl$reg[Raman.base$indlsep[s]:Raman.base$indrsep[s]]))
+            x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
+            y = data.basl$spl$reg[data.basl$indlsep[s]:data.basl$indrsep[s]]))
   }
+   
+   
+   
   # Column names assigned to d.pks
-   names(Raman.pks) <- c("peak", "kernel", "x", "y")
+   names(data.pks) <- c("peak", "kernel", "x", "y")
   
   
   # This loop calls pkdecompint() on each peak separately
   # It makes a "melted" dataframe in long form for:
-   Raman.fit       <- list()       # holds pkdecompint output
-   Raman.fit.fitpk <- data.frame() # contains fitting curves
-   Raman.fit.parpk <- data.frame() # physical parameters by peak and kernel
-   Raman.nobasl    <- data.frame() # data with baseline removed
-   peaks <- 1:length(Raman.base$npks)
+   data.fit       <- list()       # holds pkdecompint output
+   data.fit.fitpk <- data.frame() # contains fitting curves
+   data.fit.parpk <- data.frame() # physical parameters by peak and kernel
+   data.fit.basl  <- data.frame() # data with baseline removed
+   peaks <- 1:length(data.basl$npks)
   for (s in peaks) {
      ######## PKDECOMPINT ########
      if (length(kerpk) > 1) {
         # set number of kernels per peak manually
-         Raman.fit[[s]] <- pkdecompint(Raman.base, intnum = s,
+         data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
            k = kerpk[s], maxiter = fitmaxiter)
      } else {
         # use number of kernels determined by baselinefit()
-         Raman.fit[[s]] <- pkdecompint(Raman.base, intnum = s,
-            k = Raman.base$npks[s], maxiter = fitmaxiter)
+         data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
+            k = data.basl$npks[s], maxiter = fitmaxiter)
      }
      # Setup the dataframe that makes up the peak table
-      for (kernel in 1:Raman.fit[[s]]$num.ker) {
-         Raman.fit.parpk <- rbind(Raman.fit.parpk,
-            data.frame(peak   = factor(Raman.fit[[s]]$intnr),
+      for (kernel in 1:data.fit[[s]]$num.ker) {
+         data.fit.parpk <- rbind(data.fit.parpk,
+            data.frame(peak   = factor(data.fit[[s]]$intnr),
                       kernel = factor(kernel),
-                       x      = Raman.fit[[s]]$parpks[kernel, "loc"],
-                       height = Raman.fit[[s]]$parpks[kernel, "height"],
-                       area   = Raman.fit[[s]]$parpks[kernel, "intens"],
-                       fwhm   = Raman.fit[[s]]$parpks[kernel, "FWHM"],
-                       m      = Raman.fit[[s]]$parpks[kernel, "m"],
-                       accept = Raman.fit[[s]]$accept))
-         Raman.fit.fitpk <- rbind(Raman.fit.fitpk, 
+                       x      = data.fit[[s]]$parpks[kernel, "loc"],
+                       height = data.fit[[s]]$parpks[kernel, "height"],
+                       area   = data.fit[[s]]$parpks[kernel, "intens"],
+                       fwhm   = data.fit[[s]]$parpks[kernel, "FWHM"],
+                       m      = data.fit[[s]]$parpks[kernel, "m"],
+                       accept = data.fit[[s]]$accept))
+         data.fit.fitpk <- rbind(data.fit.fitpk, 
            data.frame(peak = factor(peaks[s]),
                       kernel = factor(kernel),
-                       x = Raman.fit[[s]]$x,
-                       y = Raman.fit[[s]]$fitpk[kernel, ]))
+                       x = data.fit[[s]]$x,
+                       y = data.fit[[s]]$fitpk[kernel, ]))
      }
-      Raman.nobasl <- rbind(Raman.nobasl,
+      data.fit.basl <- rbind(data.fit.basl,
         data.frame(peak = factor(peaks[s]),
-                    x = Raman.fit[[s]]$x,
-                    y = Raman.fit[[s]]$y))
+                    x = data.fit[[s]]$x,
+                    y = data.fit[[s]]$y))
   }
   
   
   
-   return(list(Raman.base = Raman.base, 
-      Raman.peaks = Raman.pks, 
-      Raman.fit.parpk = Raman.fit.parpk, 
-      Raman.fit.fitpk = Raman.fit.fitpk,
-      Raman.nobasl = Raman.nobasl)) 
+   return(list(data.basl = data.basl, 
+      data.peaks = data.pks, 
+      data.fit.parpk = data.fit.parpk, 
+      data.fit.fitpk = data.fit.fitpk,
+      data.fit.basl = data.fit.basl))
 }
--- a/Renishaw/SiliconWrapper.R
+++ b/Renishaw/SiliconWrapper.R
@ -0,0 +1,65 @@
+SiliconWrapper <- 
+   function(data.exp, run, override = FALSE, 
+            kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) { 
+   # the override flag is currently not used
+      
+   print("... Started SiliconWrapper")
+      
+   # check if Ramanpk has already completed successfully for the current job
+   current.dirname <- getwd()
+   print(current.dirname)
+   current.filename <- "silicon-peak-data.rda"
+   ramandatafile <- paste(current.dirname, current.filename, sep = "/")
+   
+   
+   
+   if (file.exists(ramandatafile) && !override) {
+      print("... Started if-clause 1")
+      
+      # File already exists
+      # return the data using load() or data()
+      
+      load(file = ramandatafile)
+            
+      if (run > length(ramres)) {
+         
+         print("... Started if-clause 1:1")
+         
+         # the it does not really exist
+         ramres[[run]] <- Ramanpk(data.exp, 
+                               kerpk = kerpk, 
+                               fitmaxiter = fitmaxiter, 
+                               gam = gam, 
+                               scl.factor = scl.factor)
+         save(ramres, file = ramandatafile)
+         
+         print("... Ended if-clause 1:1")
+      }
+      
+      print("... Ended if-clause 1")
+      
+      return(ramres)
+   } else {
+      # File does not exist, or override requested
+      
+      print("... Started else-clause 1")
+      
+      if (!exists("ramres")) {
+         ramres <- list()
+         print("... ramres list created")
+      }   
+      
+      # Need to call Ramanpk() and save its results to file as above
+      ramres[[run]] <- Ramanpk(data.exp, 
+                               kerpk = kerpk, 
+                               fitmaxiter = fitmaxiter, 
+                               gam = gam, 
+                               scl.factor = scl.factor)
+      save(ramres, file = ramandatafile)
+      
+      print("... Ended else-clause 1")
+      
+      return(ramres)
+   }
+      
+}
--- a/XRF-SPECTRO/xrfpk.R
+++ b/XRF-SPECTRO/xrfpk.R
@ -0,0 +1,104 @@
+xrfpk <- 
+   function(data.exp, kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1, maxwdth = 200) {
+   
+   print("... Starting baseline fitting")
+      
+   data.basl <- baselinefit(data.exp, 
+                            tau = 2.0, 
+                            gam = gam, 
+                            scl.factor = scl.factor, 
+                            maxwdth = maxwdth)
+   
+   print("... Ended baseline fitting")
+   
+   # This loop deals with the output from baselinefit()
+   # It makes a "melted" dataframe in long form for each 
+   # separated peak for some baseline parameters
+   data.pks      <- data.frame()
+   data.pks.basl <- data.frame()
+   data.pks.pmg  <- data.frame()
+   data.pks.spl  <- data.frame()
+   peaks <- 1:length(data.basl$npks)
+   
+   
+   for (s in peaks) {
+      # recorded data in long form by separated peak
+      data.pks <- rbind(data.pks, # column names assigned after loop
+         data.frame(peak = factor(peaks[s]),
+            kernel = NA,
+            data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], ]))
+      # the calculated baseline in long form by separated peak
+      data.pks.basl <- rbind(data.pks.basl, 
+         data.frame(peak = factor(peaks[s]),
+            kernel = NA,
+            x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
+            y = data.basl$baseline$basisl[data.basl$indlsep[s]:data.basl$indrsep[s]]))
+      # the taut string estimation in long form by separated peak
+      data.pks.pmg <- rbind(data.pks.pmg,
+         data.frame(peak = factor(peaks[s]),
+            kernel = NA,
+            x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
+            y = data.basl$pmg$fn[data.basl$indlsep[s]:data.basl$indrsep[s]]))
+      # the weighted smoothed spline in long form by separated peak
+      data.pks.spl <- rbind(data.pks.spl,
+         data.frame(peak = factor(peaks[s]),
+            kernel = NA,
+            x = data.exp[data.basl$indlsep[s]:data.basl$indrsep[s], 1],
+            y = data.basl$spl$reg[data.basl$indlsep[s]:data.basl$indrsep[s]]))
+   }
+   
+   
+   
+   # Column names assigned to d.pks
+   names(data.pks) <- c("peak", "kernel", "x", "y")
+   
+   
+   # This loop calls pkdecompint() on each peak separately
+   # It makes a "melted" dataframe in long form for:
+   data.fit       <- list()       # holds pkdecompint output
+   data.fit.fitpk <- data.frame() # contains fitting curves
+   data.fit.parpk <- data.frame() # physical parameters by peak and kernel
+   data.fit.basl  <- data.frame() # data with baseline removed
+   peaks <- 1:length(data.basl$npks)
+   for (s in peaks) {
+      ######## PKDECOMPINT ########
+      if (length(kerpk) > 1) {
+         # set number of kernels per peak manually
+         data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
+            k = kerpk[s], maxiter = fitmaxiter)
+      } else {
+         # use number of kernels determined by baselinefit()
+         data.fit[[s]] <- pkdecompint(data.basl, intnum = s,
+            k = data.basl$npks[s], maxiter = fitmaxiter)
+      }
+      # Setup the dataframe that makes up the peak table
+      for (kernel in 1:data.fit[[s]]$num.ker) {
+         data.fit.parpk <- rbind(data.fit.parpk,
+            data.frame(peak   = factor(data.fit[[s]]$intnr),
+                       kernel = factor(kernel),
+                       x      = data.fit[[s]]$parpks[kernel, "loc"],
+                       height = data.fit[[s]]$parpks[kernel, "height"],
+                       area   = data.fit[[s]]$parpks[kernel, "intens"],
+                       fwhm   = data.fit[[s]]$parpks[kernel, "FWHM"],
+                       m      = data.fit[[s]]$parpks[kernel, "m"],
+                       accept = data.fit[[s]]$accept))
+         data.fit.fitpk <- rbind(data.fit.fitpk, 
+            data.frame(peak = factor(peaks[s]),
+                       kernel = factor(kernel),
+                       x = data.fit[[s]]$x,
+                       y = data.fit[[s]]$fitpk[kernel, ]))
+      }
+      data.fit.basl <- rbind(data.fit.basl,
+         data.frame(peak = factor(peaks[s]),
+                    x = data.fit[[s]]$x,
+                    y = data.fit[[s]]$y))
+   }
+   
+   
+   
+   return(list(data.basl = data.basl, 
+      data.peaks = data.pks, 
+      data.fit.parpk = data.fit.parpk, 
+      data.fit.fitpk = data.fit.fitpk,
+      data.fit.basl = data.fit.basl))
+}
--- a/XRF-SPECTRO/xrfpkWrapper.R
+++ b/XRF-SPECTRO/xrfpkWrapper.R
@ -0,0 +1,67 @@
+xrfpkWrapper <- 
+   function(data.exp, run, override = FALSE, 
+            kerpk = 1, fitmaxiter = 100, gam = 0.64, scl.factor = 0.06, maxwdth = 10) { 
+          # kerpk = 1, fitmaxiter = 50, gam = 0.6, scl.factor = 0.1) {
+   # the override flag is IN USE
+      
+   print("... Started xrfpkWrapper")
+      
+   # check if xrfpk has already completed successfully for the current job
+   current.dirname <- getwd()
+   print(current.dirname)
+   current.filename <- "xrf-peak-data.rda"
+   xrfdatafile <- paste(current.dirname, current.filename, sep = "/")
+   
+   
+   
+   if (file.exists(xrfdatafile) && !override) {
+      print("... Started if-clause 1")
+      
+      # File already exists
+      # return the data using load() or data()
+      
+      load(file = xrfdatafile)
+            
+      if (run > length(xrfres)) {
+         
+         print("... Started if-clause 1:1")
+         
+         # the it does not really exist
+         xrfres[[run]] <- xrfpk(data.exp, 
+                                kerpk = kerpk, 
+                                fitmaxiter = fitmaxiter, 
+                                gam = gam, 
+                                scl.factor = scl.factor,
+                                maxwdth = maxwdth)
+         save(xrfres, file = xrfdatafile)
+         
+         print("... Ended if-clause 1:1")
+      }
+      
+      print("... Ended if-clause 1")
+      
+      return(xrfres)
+   } else {
+      
+      print("... Started else-clause 1")
+      
+      if (!exists("xrfres")) {
+         xrfres <- list()
+         print("... xrfres list created")
+      }   
+      
+      # Need to call xrfpk() and save its results to file as above
+      xrfres[[run]] <- xrfpk(data.exp, 
+                             kerpk = kerpk, 
+                             fitmaxiter = fitmaxiter, 
+                             gam = gam, 
+                             scl.factor = scl.factor,
+                             maxwdth = maxwdth)
+      save(xrfres, file = xrfdatafile)
+      
+      print("... Ended else-clause 1")
+      
+      return(xrfres)
+   }
+      
+}
--- a/XRF-SPECTRO/xrfpkWrapper.tex
+++ b/XRF-SPECTRO/xrfpkWrapper.tex
@ -0,0 +1,3 @@
+I have been attempting to fine-tune the parameters of the \texttt{baselinefit()} function.
+
+Increasing the number of iterations above 100 have been useless. Small changes in \texttt{gam} leads to noticeable changes in the peak and kernel distribution. 
--- a/XRF-SPECTRO/xrfspectro2df.R
+++ b/XRF-SPECTRO/xrfspectro2df.R
@ -0,0 +1,86 @@
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
+
+xrfspectro2df <- function(smpfile) {
+   ## Description:
+   ##   Reads XRF textfile from XLAB SPECTRO XRF.
+   ##   Stores data in data frame and parameters in an attributed dataframe.
+   ## Usage:
+   ##   xrfspectro2df(smpfile)
+   ## Arguments:
+   ##    smpfile: character string, the full filename
+   ##                (with path) to one SMP file (ASCII).
+   ## Value:
+   ##   A dataframe with attributed dataframe
+   #
+   filecon <- file(smpfile, "r")
+   smpcontents <- readLines(filecon, n = -1) #read all lines of input file
+   close(filecon)
+   #
+   sampleid <- ProvideSampleId(smpfile)
+   #
+   rgxp.data <- "^Kanal\\s[\\d]+:"
+   #
+   numrow.idx <- regexpr(rgxp.data, smpcontents, perl = TRUE)
+   # scrap the match.length attribute
+   attr(numrow.idx, "match.length") <- NULL
+   #
+   # Determine how many columns the data contains
+   smpdata.cols <- length(strsplit(smpcontents[which(numrow.idx == 1)][1], "\t")[[1]]) - 1
+   # While we are at it, save row count to a variable as well
+   smpdata.rows <- length(smpcontents[which(numrow.idx == 1)])
+   # strip prefix off each data row
+   #smpdata <- matrix(NA, ncol = smpdata.cols, nrow = smpdata.rows)
+   smpdata.txt <- vector(length = smpdata.rows)
+   for (i in 1:smpdata.rows) {
+      smpdata.txt[i] <- strsplit(smpcontents[which(numrow.idx == 1)][i], ":")[[1]][2]
+   }
+   smpdata.txt.clean <- gsub("^\\s", "", 
+                             gsub("\\t", " ", smpdata.txt))
+   
+   zz <- textConnection(smpdata.txt.clean, "r")
+   ff <- data.frame(stringsAsFactors = FALSE, 
+                    sampleid = sampleid,
+                    channel = seq(1, smpdata.rows),
+                    matrix(scan(zz, what = numeric(), sep = " "), 
+                           ncol = smpdata.cols, byrow = TRUE))
+   close(zz)
+   names(ff) <- c("sampleid", "channel", paste("Y", seq(1, smpdata.cols), sep = ""))
+   
+
+   #
+   ### Collect attributes of this experiment
+   SMPattrEdit <- matrix(c("Voltage",      "^Voltage:",
+                           "Current",      "^Current:",
+                           "Target",       "^Target:",
+                           "Duration",     "^Meas\\.\\sDuration:",
+                           "Impulse",      "^Imp\\.\\sRate:",
+                           "DeadTime",     "^Rel\\.\\sDead\\sTime:",
+                           "FirstChannel", "^First\\sChannel:",
+                           "LastChannel",  "^Last\\sChannel:",
+                           "PeakTime",     "^Peak\\sTime:",
+                           "Gain",         "^Gain:",
+                           "ZeroPeak",     "^Zero\\sPeak:"),
+                         ncol = 2, byrow = T)
+                         
+   SMPattr <- matrix(NA, nrow = smpdata.cols + 1, ncol = dim(SMPattrEdit)[1])
+   
+   for (c in 1:dim(SMPattrEdit)[1]) {
+      SMPattr[1, c] <- SMPattrEdit[c, 1]
+      SMPattr[2:dim(SMPattr)[1], c] <- 
+         matrix(strsplit(gsub("^\\t", "", 
+            strsplit(smpcontents[which(regexpr(SMPattrEdit[c, 2], smpcontents) == 1)],
+               ":")[[1]][2]), "\\t")[[1]], ncol = smpdata.cols)
+   }
+   SMPdf <- data.frame(stringsAsFactors = FALSE, 
+                       SMPattr[2:dim(SMPattr)[1], ])
+   colnames(SMPdf) <- SMPattr[1, ]
+                           
+                           
+   ### Now calculate the energy (keV) scale (convert from channels to energy)
+   ff$X <- ff$channel * (as.numeric(SMPdf$Gain[1]) / as.numeric(SMPdf$LastChannel[1]))
+
+   # Attach parameters to returned dataframe
+   attr(ff, "parameters") <- SMPdf
+   #
+   return(ff)
+}
--- a/common/ProvideSampleId.R
+++ b/common/ProvideSampleId.R
@ -1,25 +1,34 @@
-##################################################
-################ ProvideSampleId #################
-##################################################
-ProvideSampleId <- function (fullpathwithfilename) {
-     ### OBS! Only very rudimentary error-checking.
-     ### If the filename is formatted as \w*-\w*-\w*, we use the middle segment, 
-     ### otherwise we use the whole string (excluding the extension)
-     # Extract the name of the parent directory of the datafilename argument
-     substrateid <- basename(dirname(fullpathwithfilename))
-     # Extract the name of the method from the filename-part
-     # First split the filename over all hyphens
-     nameparts <- strsplit(basename(fullpathwithfilename), "-")[[1]]
-     # If the number of nameparts exceed 3, save the whole filename as methodid, otherwise use the middle part
-     if (length(nameparts) > 3) {
-        # We need to lose the file extension from the last namepart
-        nameparts[length(nameparts)] <- strsplit(nameparts[length(nameparts)], "\\.")[[1]][1]
-        methodid <- paste(nameparts, collapse = "-")
-     } else {
-        methodid <- nameparts[2]
-     }
-     # Make an informative sampleid
-     sampleid <- paste(substrateid, methodid, sep = "-")
-     #
-     return(sampleid)
+ProvideSampleId <- function (pathexpfile) {
+   # Returns a "unique" sample ID when supplied 
+   # with a path to an experimental file.
+   ## Note: the sample ID must derive directly from the file or path.
+   
+   sampleid <- sub("\\.[\\w]+$", "", basename(pathexpfile), perl = TRUE)
+   
+   #### The code below is the old ProvideSampleId() function   
+   #  ### OBS! Only very rudimentary error-checking.
+   #  ### If the filename is formatted as \w*-\w*-\w*, we use the middle segment, 
+   #  ### otherwise we use the whole string (excluding the extension)
+   #  # Extract the name of the parent directory of the datafilename argument
+   #  substrateid <- basename(dirname(fullpathwithfilename))
+   #  # Extract the name of the method from the filename-part
+   #  # First split the filename over all hyphens
+   #  nameparts <- strsplit(basename(fullpathwithfilename), "-")[[1]]
+   #  # If the number of nameparts exceed 3, save the whole filename
+   #  # as methodid, otherwise use the middle part
+   #  if (length(nameparts) > 3) {
+   #     # We need to lose the file extension from the last namepart
+   #     nameparts[length(nameparts)] <- 
+   #        strsplit(nameparts[length(nameparts)], "\\.")[[1]][1]
+   #     methodid <- paste(nameparts, collapse = "-")
+   #  } else {
+   #     methodid <- nameparts[2]
+   #  }
+   #  # Make an informative sampleid
+   #  sampleid <- paste(substrateid, methodid, sep = "-")
+   #  #
+   #  return(sampleid)
+   ####
+   
+   return(sampleid)
 }
--- a/common/ProvideSampleId.tex
+++ b/common/ProvideSampleId.tex
@ -0,0 +1,6 @@
+The following function, \Rfun{ProvideSampleId()}, strives to supply a unique sample ID from any path supplied to it. Of course, a certain structure on the part of the path and filename are assumed, namely:
+\begin{itemize}
+\item That the filename (not including the extension) consists of three alphanumeric strings separated by hyphens.
+\item That\ldots
+\end{itemize}
+