|
|
|
source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
|
|
|
|
|
|
|
|
|
|
|
|
##################################################
|
|
|
|
################### muxd2df ######################
|
|
|
|
##################################################
|
|
|
|
muxd2df <- function(uxdfile) {
|
|
|
|
## Description:
|
|
|
|
## Reads UXD files with multiple ranges (converted using XCH v1.0)
|
|
|
|
## Extracts both data (thth, intensity) and parameters
|
|
|
|
## Also automatically calculates cps is counts are present, and vice versa
|
|
|
|
## (note that this depends on specific strings in the UXD format).
|
|
|
|
## Usage:
|
|
|
|
## muxd2df(uxdfile)
|
|
|
|
## Arguments:
|
|
|
|
## uxdfile: text string with full path to UXD file
|
|
|
|
## Value:
|
|
|
|
## Dataframe with the following columns:
|
|
|
|
## $ sampleid : chr
|
|
|
|
## $ thth : num
|
|
|
|
## $ counts (or cps) : num
|
|
|
|
## $ steptime : num
|
|
|
|
## $ stepsize : num
|
|
|
|
## $ theta : num
|
|
|
|
## $ khi : num
|
|
|
|
## $ phi : num
|
|
|
|
## $ x : num
|
|
|
|
## $ y : num
|
|
|
|
## $ z : num
|
|
|
|
## $ divergence : num
|
|
|
|
## $ antiscatter : num
|
|
|
|
## $ cps (or counts) : num
|
|
|
|
#
|
|
|
|
range.header.start.rexp <- "^; \\(Data for Range" #regexp
|
|
|
|
range.header.end.rexp <- "^_2THETA[^=]" #regexp
|
|
|
|
|
|
|
|
# Read the input multirange file
|
|
|
|
ufile <- file(uxdfile, "r")
|
|
|
|
# Note that readLines apparently completely skips empty lines.
|
|
|
|
# In that case line numbers do not match between source and f.
|
|
|
|
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
|
|
|
|
close(ufile)
|
|
|
|
|
|
|
|
# Fetch a sampleid for the current job
|
|
|
|
sampleid <- ProvideSampleId(uxdfile)
|
|
|
|
|
|
|
|
# Look for header start rows
|
|
|
|
range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
|
|
|
|
# Look for header end rows
|
|
|
|
range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
|
|
|
|
|
|
|
|
# Calculate number of ranges
|
|
|
|
ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
|
|
|
|
if (is.na(ranges.total)) {
|
|
|
|
# Obviously something bad happened.
|
|
|
|
# Do something about it. echo an error message perhaps.
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
# Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file
|
|
|
|
# Assuming it is the same for all ranges in this job (a safe assumption).
|
|
|
|
if (f[range.header.end.rows][1] == "_2THETACOUNTS") {
|
|
|
|
# we got counts
|
|
|
|
counts.flag <- TRUE
|
|
|
|
cps.flag <- FALSE
|
|
|
|
}
|
|
|
|
if (f[range.header.end.rows][1] == "_2THETACPS") {
|
|
|
|
# we got counts per second
|
|
|
|
counts.flag <-FALSE
|
|
|
|
cps.flag <- TRUE
|
|
|
|
}
|
|
|
|
|
|
|
|
# Extract headers (as-is) and put them in a list (by range)
|
|
|
|
headers.raw <- list()
|
|
|
|
for (range in 1:ranges.total) {
|
|
|
|
headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
|
|
|
|
}
|
|
|
|
|
|
|
|
# Data always start on the row after header end
|
|
|
|
range.data.start.rows <- range.header.end.rows + 1
|
|
|
|
# Data end rows precedes header with one row, except for the first range
|
|
|
|
range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
|
|
|
|
|
|
|
|
# Extract data (as-is) and put it an list (by range)
|
|
|
|
data.raw <- list()
|
|
|
|
for (range in 1:ranges.total) {
|
|
|
|
data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
|
|
|
|
}
|
|
|
|
|
|
|
|
# Specify header parameters to include in dataframe
|
|
|
|
header.param.rexp <- c(steptime = "^_STEPTIME=",
|
|
|
|
stepsize = "^_STEPSIZE=",
|
|
|
|
theta = "^_THETA=",
|
|
|
|
khi = "^_KHI=",
|
|
|
|
phi = "^_PHI=",
|
|
|
|
x = "^_X=",
|
|
|
|
y = "^_Y=",
|
|
|
|
z = "^_Z=",
|
|
|
|
divergence = "^_DIVERGENCE=",
|
|
|
|
antiscatter = "^_ANTISCATTER=")
|
|
|
|
|
|
|
|
# Collect data and header parameters in dataframes, by range in a list
|
|
|
|
data <- list()
|
|
|
|
for (range in 1:ranges.total) {
|
|
|
|
zz <- textConnection(data.raw[[range]], "r")
|
|
|
|
data[[range]] <- data.frame(stringsAsFactors = F,
|
|
|
|
sampleid,
|
|
|
|
matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
|
|
|
|
close(zz)
|
|
|
|
# Collect header parameters
|
|
|
|
for (param in 1:length(header.param.rexp)) {
|
|
|
|
data[[range]] <- cbind(data[[range]],
|
|
|
|
as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]),
|
|
|
|
headers.raw[[range]]) == 1)], "=")[[1]][2]))
|
|
|
|
}
|
|
|
|
names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
|
|
|
|
}
|
|
|
|
|
|
|
|
# Calculate the other of the pair counts <-> cps
|
|
|
|
if (counts.flag) {
|
|
|
|
for (range in 1:ranges.total) {
|
|
|
|
data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (cps.flag) {
|
|
|
|
for (range in 1:ranges.total) {
|
|
|
|
data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Return a unified dataframe
|
|
|
|
data.df <- data[[1]]
|
|
|
|
for (range in 2:ranges.total) {
|
|
|
|
data.df <- rbind(data.df, data[[range]])
|
|
|
|
}
|
|
|
|
|
|
|
|
return(data.df)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#### OLD VERSION - DEPRECATE
|
|
|
|
##################################################
|
|
|
|
################### muxd2df ######################
|
|
|
|
##################################################
|
|
|
|
muxd2df.old <- function(uxdfile, range.descriptor) {
|
|
|
|
# Function that reads an UXD file which contains several ranges
|
|
|
|
# (created in a programmed run, for example)
|
|
|
|
# Arguments
|
|
|
|
# :: uxdfile (filename with extension)
|
|
|
|
# :: range.descriptor (an array with as many elements as
|
|
|
|
# there are ranges in the uxdfile)
|
|
|
|
# Returns: dataframe with 3 columns
|
|
|
|
|
|
|
|
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
|
|
|
|
cdata <- "[0-9]" #regexpr matching one character of any digit
|
|
|
|
# Create filenames for the output # no longer used, return dataframe instead
|
|
|
|
#datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
|
|
|
|
|
|
|
|
# Read the input multirange file
|
|
|
|
ufile <- file(uxdfile, "r")
|
|
|
|
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
|
|
|
|
close(ufile)
|
|
|
|
|
|
|
|
# This way we identify data rows by looking for numeric characters.
|
|
|
|
#wh <- regexpr("[0-9]", f)
|
|
|
|
# This way we identify header rows
|
|
|
|
# Later we will assume that all other rows are data
|
|
|
|
wh <- regexpr(cchar, f)
|
|
|
|
|
|
|
|
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
|
|
|
|
# the value of each element corresponds to the position of the regexp match.
|
|
|
|
# value = 1 means the first character of the row is cchar (row is header)
|
|
|
|
# value =-1 means no cchar occur on the row (row is data)
|
|
|
|
|
|
|
|
#length(mh[mh == -1]) #total number of datarows in uxdfile
|
|
|
|
#mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
|
|
|
|
|
|
|
|
i <- seq(1, length(mh) - 1, 1)
|
|
|
|
j <- seq(2, length(mh), 1)
|
|
|
|
starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
|
|
|
|
ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
|
|
|
|
ends <- c(ends, length(mh)) #fixed the last index of ends
|
|
|
|
|
|
|
|
ff <- data.frame(NULL)
|
|
|
|
for (s in 1:length(range.descriptor)) {
|
|
|
|
zz <- textConnection(f[starts[s]:ends[s]], "r")
|
|
|
|
ff <- rbind(ff, data.frame(range.descriptor[s],
|
|
|
|
matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
|
|
|
|
close(zz)
|
|
|
|
}
|
|
|
|
names(ff) <- c("sampleid", "angle", "intensity")
|
|
|
|
|
|
|
|
# Return dataframe
|
|
|
|
ff
|
|
|
|
}
|