|
|
|
@ -2,6 +2,8 @@ source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
|
|
|
|
|
|
|
|
|
|
xrfspectro2df <- function(smpfile) {
|
|
|
|
|
## Description:
|
|
|
|
|
## Total remake of xrfspectro2df(). Idea is to accomodate all 6 possible
|
|
|
|
|
## datasets of each SMP file, plus the attributes.
|
|
|
|
|
## Reads XRF textfile from XLAB SPECTRO XRF.
|
|
|
|
|
## Stores data in data frame and parameters in an attributed dataframe.
|
|
|
|
|
## Usage:
|
|
|
|
@ -11,45 +13,23 @@ xrfspectro2df <- function(smpfile) {
|
|
|
|
|
## (with path) to one SMP file (ASCII).
|
|
|
|
|
## Value:
|
|
|
|
|
## A dataframe with attributed dataframe
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
#### ONLY BOTHER WITH THE FIRST MEASUREMENT IN THE SMP-FILE.
|
|
|
|
|
|
|
|
|
|
filecon <- file(smpfile, "r")
|
|
|
|
|
smpcontents <- readLines(filecon, n = -1) #read all lines of input file
|
|
|
|
|
close(filecon)
|
|
|
|
|
#
|
|
|
|
|
sampleid <- ProvideSampleId(smpfile)
|
|
|
|
|
#
|
|
|
|
|
rgxp.data <- "^Kanal\\s[\\d]+:"
|
|
|
|
|
#
|
|
|
|
|
numrow.idx <- regexpr(rgxp.data, smpcontents, perl = TRUE)
|
|
|
|
|
# scrap the match.length attribute
|
|
|
|
|
attr(numrow.idx, "match.length") <- NULL
|
|
|
|
|
#
|
|
|
|
|
# Determine how many columns the data contains
|
|
|
|
|
smpdata.cols <- length(strsplit(smpcontents[which(numrow.idx == 1)][1], "\t")[[1]]) - 1
|
|
|
|
|
# While we are at it, save row count to a variable as well
|
|
|
|
|
smpdata.rows <- length(smpcontents[which(numrow.idx == 1)])
|
|
|
|
|
# strip prefix off each data row
|
|
|
|
|
#smpdata <- matrix(NA, ncol = smpdata.cols, nrow = smpdata.rows)
|
|
|
|
|
smpdata.txt <- vector(length = smpdata.rows)
|
|
|
|
|
for (i in 1:smpdata.rows) {
|
|
|
|
|
smpdata.txt[i] <- strsplit(smpcontents[which(numrow.idx == 1)][i], ":")[[1]][2]
|
|
|
|
|
}
|
|
|
|
|
smpdata.txt.clean <- gsub("^\\s", "",
|
|
|
|
|
gsub("\\t", " ", smpdata.txt))
|
|
|
|
|
|
|
|
|
|
zz <- textConnection(smpdata.txt.clean, "r")
|
|
|
|
|
ff <- data.frame(stringsAsFactors = FALSE,
|
|
|
|
|
sampleid = sampleid,
|
|
|
|
|
channel = seq(1, smpdata.rows),
|
|
|
|
|
matrix(scan(zz, what = numeric(), sep = " "),
|
|
|
|
|
ncol = smpdata.cols, byrow = TRUE))
|
|
|
|
|
close(zz)
|
|
|
|
|
names(ff) <- c("sampleid", "channel", paste("Y", seq(1, smpdata.cols), sep = ""))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
### Collect attributes of this experiment
|
|
|
|
|
SMPattrEdit <- matrix(c("Voltage", "^Voltage:",
|
|
|
|
|
|
|
|
|
|
# Parameter table
|
|
|
|
|
# Those are the parameter we may access later in this function
|
|
|
|
|
xrf.param <- data.frame(stringsAsFactors = FALSE,
|
|
|
|
|
matrix(c("Method", "^Method:",
|
|
|
|
|
"Job", "^Job:",
|
|
|
|
|
"Status", "^Status:",
|
|
|
|
|
"Description", "^Description:",
|
|
|
|
|
"Date", "^Date\\sof\\sMeasurement:",
|
|
|
|
|
"Measurements", "^Measurements:",
|
|
|
|
|
"Voltage", "^Voltage:",
|
|
|
|
|
"Current", "^Current:",
|
|
|
|
|
"Target", "^Target:",
|
|
|
|
|
"Duration", "^Meas\\.\\sDuration:",
|
|
|
|
@ -59,28 +39,167 @@ xrfspectro2df <- function(smpfile) {
|
|
|
|
|
"LastChannel", "^Last\\sChannel:",
|
|
|
|
|
"PeakTime", "^Peak\\sTime:",
|
|
|
|
|
"Gain", "^Gain:",
|
|
|
|
|
"ZeroPeak", "^Zero\\sPeak:"),
|
|
|
|
|
ncol = 2, byrow = T)
|
|
|
|
|
"ZeroPeak", "^Zero\\sPeak:",
|
|
|
|
|
"Data", "^Kanal\\s[\\d]+:"),
|
|
|
|
|
ncol = 2, byrow = T))
|
|
|
|
|
names(xrf.param) <- c("parameter", "regexp")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Data table
|
|
|
|
|
# Contains the regexp used for identifiying rows containing data
|
|
|
|
|
xrf.data <- data.frame(stringsAsFactors = FALSE,
|
|
|
|
|
matrix(c("Data", "^Kanal\\s[\\d]+:"), ncol = 2, byrow = T))
|
|
|
|
|
names(xrf.data) <- c("parameter", "regexp")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Find out how many measurements we have in this
|
|
|
|
|
# file by accessing the Measurements field
|
|
|
|
|
n_measurements <- as.numeric(strsplit(gsub("^\\t", "",
|
|
|
|
|
strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Measurements", select = "regexp")$regexp,
|
|
|
|
|
smpcontents) == 1)], ":")[[1]][2]), "\\t")[[1]])
|
|
|
|
|
# If more than one measurement, issue warning
|
|
|
|
|
if (n_measurements > 1) {
|
|
|
|
|
warning(paste(paste(n_measurements, " measurements detected in ",
|
|
|
|
|
basename(smpfile), sep = ""),
|
|
|
|
|
"Only the first measurement will be recorded",
|
|
|
|
|
sep = "\n", collapse = ""))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# How many rows of data?
|
|
|
|
|
n_rowsdata <-
|
|
|
|
|
length(which(regexpr(subset(xrf.data, parameter == "Data",
|
|
|
|
|
select = "regexp")$regexp, smpcontents, perl = TRUE) == 1))
|
|
|
|
|
|
|
|
|
|
SMPattr <- matrix(NA, nrow = smpdata.cols + 1, ncol = dim(SMPattrEdit)[1])
|
|
|
|
|
# Build an empty matrix big enough to hold all data
|
|
|
|
|
# (i.e., ncol = 3, and nrow = n_rowsdata * n_measurements)
|
|
|
|
|
data.long <- data.frame(matrix(NA, ncol = 5, nrow = 6 * n_rowsdata))
|
|
|
|
|
names(data.long) <- c("sampleid", "measurement", "channel", "energy", "counts")
|
|
|
|
|
|
|
|
|
|
for (c in 1:dim(SMPattrEdit)[1]) {
|
|
|
|
|
SMPattr[1, c] <- SMPattrEdit[c, 1]
|
|
|
|
|
SMPattr[2:dim(SMPattr)[1], c] <-
|
|
|
|
|
matrix(strsplit(gsub("^\\t", "",
|
|
|
|
|
strsplit(smpcontents[which(regexpr(SMPattrEdit[c, 2], smpcontents) == 1)],
|
|
|
|
|
":")[[1]][2]), "\\t")[[1]], ncol = smpdata.cols)
|
|
|
|
|
|
|
|
|
|
data.mtx <- matrix(NA, ncol = 6, nrow = n_rowsdata)
|
|
|
|
|
for (j in 1:n_rowsdata) {
|
|
|
|
|
data.mtx[j, ] <- as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.data, parameter == "Data",
|
|
|
|
|
select = "regexp")$regexp, smpcontents, perl = TRUE) == 1)], ":\\t")[[j]][2], "\\t")[[1]])
|
|
|
|
|
}
|
|
|
|
|
SMPdf <- data.frame(stringsAsFactors = FALSE,
|
|
|
|
|
SMPattr[2:dim(SMPattr)[1], ])
|
|
|
|
|
colnames(SMPdf) <- SMPattr[1, ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Now calculate the energy (keV) scale (convert from channels to energy)
|
|
|
|
|
ff$X <- ff$channel * (as.numeric(SMPdf$Gain[1]) / as.numeric(SMPdf$LastChannel[1]))
|
|
|
|
|
# Sampleid to column 1
|
|
|
|
|
data.long[, 1] <- rep(ProvideSampleId(smpfile), n_rowsdata)
|
|
|
|
|
# Channel to column 3
|
|
|
|
|
data.long[, 3] <- rep(seq(1, n_rowsdata), dim(data.mtx)[2])
|
|
|
|
|
for (c in 1:6) {
|
|
|
|
|
# Measurement no. in column 2
|
|
|
|
|
data.long[((c * n_rowsdata) - n_rowsdata + 1):(((c + 1) * n_rowsdata) - n_rowsdata), 2] <- rep(c, n_rowsdata)
|
|
|
|
|
# Counts in column 5
|
|
|
|
|
data.long[((c * n_rowsdata) - n_rowsdata + 1):(((c + 1) * n_rowsdata) - n_rowsdata), 5] <- data.mtx[, c]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Drop all rows with measurement-number not equal to 1
|
|
|
|
|
data.long <- subset(data.long, measurement == 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Fetch the measurement parameters
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Date")$parameter] <-
|
|
|
|
|
rep(substr(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Date")$regexp, smpcontents) == 1)], ":")[[1]][2]),
|
|
|
|
|
"\\t")[[1]][1], 1, 8), n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Method")$parameter] <-
|
|
|
|
|
rep(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Method")$regexp, smpcontents) == 1)], ":")[[1]][2]),
|
|
|
|
|
"\\t")[[1]][1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Job")$parameter] <-
|
|
|
|
|
rep(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Job")$regexp, smpcontents) == 1)], ":")[[1]][2]),
|
|
|
|
|
"\\t")[[1]][1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Status")$parameter] <-
|
|
|
|
|
rep(strsplit(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Status")$regexp, smpcontents) == 1)], ":")[[1]][2]),
|
|
|
|
|
"\\t")[[1]][1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Description")$parameter] <-
|
|
|
|
|
rep(gsub("^\\t", "", strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Description")$regexp, smpcontents) == 1)], ":")[[1]][2]),
|
|
|
|
|
n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Voltage")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Voltage")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Current")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Current")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Target")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Target")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Duration")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Duration")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Impulse")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Impulse")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "DeadTime")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "DeadTime")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "FirstChannel")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "FirstChannel")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "LastChannel")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "LastChannel")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "PeakTime")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "PeakTime")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "Gain")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "Gain")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
data.long[, subset(xrf.param, parameter == "ZeroPeak")$parameter] <-
|
|
|
|
|
rep(as.numeric(strsplit(strsplit(smpcontents[which(regexpr(subset(xrf.param,
|
|
|
|
|
parameter == "ZeroPeak")$regexp, smpcontents, perl = TRUE) == 1)],
|
|
|
|
|
":\\t")[[1]][2], "\\t")[[1]])[1], n_rowsdata)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Convert channel into energy scale
|
|
|
|
|
# Using the following assumptions:
|
|
|
|
|
# 1. Zero peak is always the strongest (highest) peak in the spectrum
|
|
|
|
|
# The channel with maximum counts should correspond to 0 keV
|
|
|
|
|
# This gives a one-channel deviation from what the instrument shows
|
|
|
|
|
# for a 12.5 keV range measurement using 1024 channels (so far)
|
|
|
|
|
# This is good enough for our purposes, since the peak energies for most
|
|
|
|
|
# ions do not match with reference values without a correction term anyway.
|
|
|
|
|
max.channel <- which(data.long$counts == max(data.long$counts))
|
|
|
|
|
data.long$energy <- (data.long$channel * (data.long$Gain / data.long$LastChannel)) -
|
|
|
|
|
((max.channel / data.long$LastChannel) * data.long$Gain)
|
|
|
|
|
# Save the maxchannel to the returned dataframe
|
|
|
|
|
data.long$ZeroChannel <- rep(max.channel, n_rowsdata)
|
|
|
|
|
|
|
|
|
|
# Calculate energy from channel # this is no longer viable
|
|
|
|
|
#data.long$energy <- (data.long$channel * (data.long$Gain / data.long$LastChannel)) -
|
|
|
|
|
# ((24 / data.long$LastChannel) * data.long$Gain)
|
|
|
|
|
|
|
|
|
|
# Attach parameters to returned dataframe
|
|
|
|
|
attr(ff, "parameters") <- SMPdf
|
|
|
|
|
#
|
|
|
|
|
return(ff)
|
|
|
|
|
return(data.long)
|
|
|
|
|
}
|