R-common/XRF-SPECTRO/xrfspectro2df.R

source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")

xrfspectro2df <- function(smpfile) {
   ## Description:
   ##   Total remake of xrfspectro2df(). Idea is to accomodate all 6 possible
   ##   datasets of each SMP file, plus the attributes.
   ##   Reads XRF textfile from XLAB SPECTRO XRF.
   ##   Stores data in data frame and parameters in an attributed dataframe.
   ## Usage:
   ##   xrfspectro2df(smpfile)
   ## Arguments:
   ##    smpfile: character string, the full filename
   ##                (with path) to one SMP file (ASCII).
   ## Value:
   ##   A dataframe with attributed dataframe

   #### ONLY BOTHER WITH THE FIRST MEASUREMENT IN THE SMP-FILE.

   filecon <- file(smpfile, "r")
   smpcontents <- readLines(filecon, n = -1) #read all lines of input file
   close(filecon)

   # Parameter table
   # Those are the parameter we may access later in this function
   xrf.param <- data.frame(stringsAsFactors = FALSE,
      matrix(c("Method",       "^Method:",
               "Job",          "^Job:",
               "Status",       "^Status:",
               "Description",  "^Description:",
               "Date",         "^Date\\sof\\sMeasurement:",
               "Measurements", "^Measurements:",
               "Voltage",      "^Voltage:",
               "Current",      "^Current:",
               "Target",       "^Target:",
               "Duration",     "^Meas\\.\\sDuration:",
               "Impulse",      "^Imp\\.\\sRate:",
               "DeadTime",     "^Rel\\.\\sDead\\sTime:",
               "FirstChannel", "^First\\sChannel:",
               "LastChannel",  "^Last\\sChannel:",
               "PeakTime",     "^Peak\\sTime:",
               "Gain",         "^Gain:",
               "ZeroPeak",     "^Zero\\sPeak:",
               "Data",         "^Kanal\\s[\\d]+:"),
             ncol = 2, byrow = T))
   names(xrf.param) <- c("parameter", "regexp")


   # Data table
   # Contains the regexp used for identifiying rows containing data
   xrf.data <- data.frame(stringsAsFactors = FALSE,
      matrix(c("Data", "^Kanal\\s[\\d]+:"), ncol = 2, byrow = T))
   names(xrf.data) <- c("parameter", "regexp")


   # Find out how many measurements we have in this
   # file by accessing the Measurements field
   n_measurements <- as.numeric(strsplit(gsub("^\\t", "",
      strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "Measurements", select = "regexp")$regexp,
         smpcontents) == 1)], ":")[[1]][2]), "\\t")[[1]])
   # If more than one measurement, issue warning
   if (n_measurements > 1) {
      warning(paste(paste(n_measurements, " measurements detected in ",
                          basename(smpfile), sep = ""),
                    "Only the first measurement will be recorded",
                    sep = "\n", collapse = ""))
   }


   # How many rows of data?
   n_rowsdata <-
      length(which(regexpr(subset(xrf.data, parameter == "Data",
         select = "regexp")$regexp, smpcontents, perl = TRUE) == 1))

   # Build an empty matrix big enough to hold all data
   # (i.e., ncol = 3, and nrow = n_rowsdata * n_measurements)
   data.long <- data.frame(matrix(NA, ncol = 5, nrow = 6 * n_rowsdata))
   names(data.long) <- c("sampleid", "measurement", "channel", "energy", "counts")


   data.mtx <- matrix(NA, ncol = 6, nrow = n_rowsdata)
   for (j in 1:n_rowsdata) {
      data.mtx[j, ] <- as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.data, parameter == "Data",
         select = "regexp")$regexp, smpcontents, perl = TRUE) == 1)], ":\\t")[[j]][2], "\\t")[[1]])
   }


   # Sampleid to column 1
   data.long[, 1] <- rep(ProvideSampleId(smpfile), n_rowsdata)
   # Channel to column 3
   data.long[, 3] <- rep(seq(1, n_rowsdata), dim(data.mtx)[2])
   for (c in 1:6) {
      # Measurement no. in column 2
      data.long[((c * n_rowsdata) - n_rowsdata + 1):(((c + 1) * n_rowsdata) - n_rowsdata), 2] <- rep(c, n_rowsdata)
      # Counts in column 5
      data.long[((c * n_rowsdata) - n_rowsdata + 1):(((c + 1) * n_rowsdata) - n_rowsdata), 5] <- data.mtx[, c]
   }

   # Drop all rows with measurement-number not equal to 1
   data.long <- subset(data.long, measurement == 1)


   # Fetch the measurement parameters
   data.long[, subset(xrf.param, parameter == "Date")$parameter] <-
   rep(substr(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
      parameter == "Date")$regexp, smpcontents) == 1)], ":")[[1]][2]),
         "\\t")[[1]][1], 1, 8), n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Method")$parameter] <-
   rep(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
      parameter == "Method")$regexp, smpcontents) == 1)], ":")[[1]][2]),
         "\\t")[[1]][1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Job")$parameter] <-
   rep(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
      parameter == "Job")$regexp, smpcontents) == 1)], ":")[[1]][2]),
         "\\t")[[1]][1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Status")$parameter] <-
   rep(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
      parameter == "Status")$regexp, smpcontents) == 1)], ":")[[1]][2]),
         "\\t")[[1]][1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Description")$parameter] <-
   rep(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
      parameter == "Description")$regexp, smpcontents) == 1)], ":")[[1]][2]),
      n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Voltage")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "Voltage")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Current")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "Current")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Target")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "Target")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Duration")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "Duration")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Impulse")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "Impulse")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "DeadTime")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "DeadTime")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "FirstChannel")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "FirstChannel")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "LastChannel")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "LastChannel")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "PeakTime")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "PeakTime")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "Gain")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "Gain")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)

   data.long[, subset(xrf.param, parameter == "ZeroPeak")$parameter] <-
      rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
         parameter == "ZeroPeak")$regexp, smpcontents, perl = TRUE) == 1)],
            ":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)


   # Convert channel into energy scale
   # Using the following assumptions:
   #    1. Zero peak is always the strongest (highest) peak in the spectrum
   # The channel with maximum counts should correspond to 0 keV
   # This gives a one-channel deviation from what the instrument shows
   # for a 12.5 keV range measurement using 1024 channels (so far)
   # This is good enough for our purposes, since the peak energies for most
   # ions do not match with reference values without a correction term anyway.
   max.channel <- which(data.long$counts == max(data.long$counts))
   data.long$energy <- (data.long$channel * (data.long$Gain / data.long$LastChannel)) -
                       ((max.channel / data.long$LastChannel) * data.long$Gain)
   # Save the maxchannel to the returned dataframe
   data.long$ZeroChannel <- rep(max.channel, n_rowsdata)

   # Calculate energy from channel # this is no longer viable
   #data.long$energy <- (data.long$channel * (data.long$Gain / data.long$LastChannel)) -
   #   ((24 / data.long$LastChannel) * data.long$Gain)

   return(data.long)
}