xrdpkWrapper.R now correctly handles situations where file already exists,

override = TRUE, and run > 1. Previously only the latest run was actually saved,
although the created list had the right length.
Works correctly and as intended now, as far as I can tell.
master
Taha Ahmed 13 years ago
parent 3d4244f1fd
commit 70c39b9145

1
.gitignore vendored

@ -1,4 +1,5 @@
*.RData
*.Rdeprecated
*.Rhistory
*.Rhistory.save
*.ROLD

@ -63,7 +63,11 @@ matchpdf <- function(expcol, pdfrow) {
if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) {
# Reset mtch
mtch <- list()
mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
mtch <- list(csums = colSums(diff.indx),
rsums = rowSums(diff.indx),
expthth = expcol[colSums(diff.indx) != 0],
pdfthth = pdfrow[rowSums(diff.indx) != 0],
deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
# List of 5
# $ csums : num - consisting of ones and zeroes. Shows you which positions of expcol matched.
# $ rsums : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched.

@ -6,14 +6,15 @@ source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
##################################################
muxd2df <- function(uxdfile) {
## Description:
## Reads UXD files with multiple ranges (converted using XCH v1.0)
## Reads UXD files with one or multiple ranges (converted using XCH v1.0)
## Extracts both data (thth, intensity) and parameters
## Also automatically calculates cps is counts are present, and vice versa
## Also automatically calculates cps if counts are present, and vice versa
## (note that this depends on specific strings in the UXD format).
## Usage:
## muxd2df(uxdfile)
## Arguments:
## uxdfile: text string with full path to UXD file
## containing single or multiple data ranges
## Value:
## Dataframe with the following columns:
## $ sampleid : chr
@ -50,10 +51,14 @@ muxd2df <- function(uxdfile) {
range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
# Calculate number of ranges
ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
ranges.total <-
ifelse(length(range.header.start.rows) == length(range.header.end.rows),
length(range.header.start.rows),
NA) #why would they not be equal?
if (is.na(ranges.total)) {
# Obviously something bad happened.
# Do something about it. echo an error message perhaps.
# But why would they not be equal?
}
@ -79,7 +84,15 @@ muxd2df <- function(uxdfile) {
# Data always start on the row after header end
range.data.start.rows <- range.header.end.rows + 1
# Data end rows precedes header with one row, except for the first range
range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
# But only if data contained more than one range, obviously. Let's make the code check for that
if (ranges.total > 1) {
range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
} else {
# Data in fact only contains one range
range.data.end.rows <- length(f)
}
####
# Extract data (as-is) and put it an list (by range)
data.raw <- list()
@ -113,7 +126,8 @@ muxd2df <- function(uxdfile) {
as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]),
headers.raw[[range]]) == 1)], "=")[[1]][2]))
}
names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
names(data[[range]]) <-
c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
}
# Calculate the other of the pair counts <-> cps
@ -130,69 +144,11 @@ muxd2df <- function(uxdfile) {
# Return a unified dataframe
data.df <- data[[1]]
for (range in 2:ranges.total) {
data.df <- rbind(data.df, data[[range]])
if (ranges.total > 1) {
for (range in 2:ranges.total) {
data.df <- rbind(data.df, data[[range]])
}
}
return(data.df)
}
#### OLD VERSION - DEPRECATE
##################################################
################### muxd2df ######################
##################################################
muxd2df.old <- function(uxdfile, range.descriptor) {
# Function that reads an UXD file which contains several ranges
# (created in a programmed run, for example)
# Arguments
# :: uxdfile (filename with extension)
# :: range.descriptor (an array with as many elements as
# there are ranges in the uxdfile)
# Returns: dataframe with 3 columns
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
cdata <- "[0-9]" #regexpr matching one character of any digit
# Create filenames for the output # no longer used, return dataframe instead
#datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
# Read the input multirange file
ufile <- file(uxdfile, "r")
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
close(ufile)
# This way we identify data rows by looking for numeric characters.
#wh <- regexpr("[0-9]", f)
# This way we identify header rows
# Later we will assume that all other rows are data
wh <- regexpr(cchar, f)
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
# the value of each element corresponds to the position of the regexp match.
# value = 1 means the first character of the row is cchar (row is header)
# value =-1 means no cchar occur on the row (row is data)
#length(mh[mh == -1]) #total number of datarows in uxdfile
#mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
ends <- c(ends, length(mh)) #fixed the last index of ends
ff <- data.frame(NULL)
for (s in 1:length(range.descriptor)) {
zz <- textConnection(f[starts[s]:ends[s]], "r")
ff <- rbind(ff, data.frame(range.descriptor[s],
matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
close(zz)
}
names(ff) <- c("sampleid", "angle", "intensity")
# Return dataframe
ff
}

@ -1,63 +0,0 @@
##################################################
#################### uxd2df ######################
##################################################
uxd2df <- function(uxdfile) {
# Function for reading UXD files # Assumptions: data in two columns
# Args: uxdfile (filename with extension)
# Returns: dataframe with three columns
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
cdata <- "[0-9]" #regexpr matching one character of any digit
# A new file (datafile) containing only data will be created,
# extension ".data" appended to uxdfile
#datafile <- paste(uxdfile,".data",sep="")
ufile <- file(uxdfile, "r")
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
close(ufile)
# This way we identify data rows by looking for numeric characters.
#wh <- regexpr("[0-9]", f)
# This way we identify header rows
# We assume that all other rows are data
wh <- regexpr(cchar, f)
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
# the value of each element corresponds to the position of the regexp match.
# value = 1 means the first character of the row is cchar (row is header)
# value =-1 means no cchar occur on the row (row is data)
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1
ends <- length(mh)
f <- f[starts:ends]
rgxp.sampleid <- "[^/]*(?=\\.\\w*)" ## THIS REQUIRES perl=TRUE
# Regular expression that extracts the filename out of a full path.
# Matches and extracts everything from the last forward slash (assuming Unix slashes)
# up until a dot folllowed by an arbitrary number of alphanumeric characters.
sampleidmtch <- regexpr(rgxp.sampleid, uxdfile, perl=TRUE)
# Check that there was a match
if (sampleidmtch < 0) {
# -1 means no match
sampleid <- uxdfile
# If match was unsuccessful we use the argument as passed to this function as sampleid
}
sampleid <- substr(uxdfile, sampleidmtch, (sampleidmtch + attr(sampleidmtch, "match.length") - 1))
zz <- textConnection(f, "r")
ff <- data.frame(sampleid, matrix(scan(zz,
what = numeric()), ncol=2, byrow=T))
names(ff) <- c("sampleid", "angle", "intensity")
close(zz)
#zz <- file(datafile, "w") #open connection to datafile
#write.table(ff, file=datafile, row.names=F, sep=",")
#close(zz)
# Return dataframe
ff
}

@ -67,6 +67,7 @@ xrdpk <-
height = xrd.fit[[s]]$parpks[kernel, "height"],
area = xrd.fit[[s]]$parpks[kernel, "intens"],
fwhm = xrd.fit[[s]]$parpks[kernel, "FWHM"],
beta = xrd.fit[[s]]$parpks[kernel, "intens"] / xrd.fit[[s]]$parpks[kernel, "height"],
m = xrd.fit[[s]]$parpks[kernel, "m"],
accept = xrd.fit[[s]]$accept))
xrd.fit.fitpk <- rbind(xrd.fit.fitpk,

@ -39,22 +39,39 @@ xrdpkWrapper <-
return(xrdres)
} else {
# File does not exist
# OR override is TRUE
print("... Started else-clause 1")
if (!exists("xrdres")) {
# If file does not exist at all, run all necessary code to re-create it
if (!file.exists(xrddatafile)) {
xrdres <- list()
print("... xrdres list created")
}
# Need to call xrdpk() and save its results to file as above
xrdres[[run]] <- xrdpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor,
maxwdth = maxwdth)
save(xrdres, file = xrddatafile)
xrdres[[run]] <-
xrdpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor,
maxwdth = maxwdth)
save(xrdres, file = xrddatafile)
} else {
# File already exists, but override is TRUE
load(file = xrddatafile)
xrdres[[run]] <-
xrdpk(data.exp,
kerpk = kerpk,
fitmaxiter = fitmaxiter,
gam = gam,
scl.factor = scl.factor,
maxwdth = maxwdth)
save(xrdres, file = xrddatafile)
}
print("... Ended else-clause 1")

@ -0,0 +1,6 @@
# Function that rounds UP to the nearest interval specified by "nearest"
# http://stackoverflow.com/questions/6461209/how-to-round-up-to-the-nearest-10-or-100-or-x
roundup <- function(x, nearest=1000) {
ceiling(max(x+10^-9)/nearest + 1/nearest)*nearest
}
Loading…
Cancel
Save