From 70c39b914529874f3de957f2d42e69aec19afdb1 Mon Sep 17 00:00:00 2001 From: Taha Ahmed Date: Sat, 24 Sep 2011 12:30:37 +0200 Subject: [PATCH] xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. --- .gitignore | 1 + XRD-TF/matchpdf.R | 6 ++- XRD-TF/muxd2df.R | 90 +++++++++++-------------------------------- XRD-TF/uxd2df.R | 63 ------------------------------ XRD-TF/xrdpk.R | 1 + XRD-TF/xrdpkWrapper.R | 39 +++++++++++++------ common/roundup.R | 6 +++ 7 files changed, 64 insertions(+), 142 deletions(-) delete mode 100644 XRD-TF/uxd2df.R create mode 100644 common/roundup.R diff --git a/.gitignore b/.gitignore index 57c2127..d6969e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.RData +*.Rdeprecated *.Rhistory *.Rhistory.save *.ROLD diff --git a/XRD-TF/matchpdf.R b/XRD-TF/matchpdf.R index 6eb36dc..54eef60 100644 --- a/XRD-TF/matchpdf.R +++ b/XRD-TF/matchpdf.R @@ -63,7 +63,11 @@ matchpdf <- function(expcol, pdfrow) { if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) { # Reset mtch mtch <- list() - mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0]) + mtch <- list(csums = colSums(diff.indx), + rsums = rowSums(diff.indx), + expthth = expcol[colSums(diff.indx) != 0], + pdfthth = pdfrow[rowSums(diff.indx) != 0], + deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0]) # List of 5 # $ csums : num - consisting of ones and zeroes. Shows you which positions of expcol matched. # $ rsums : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched. diff --git a/XRD-TF/muxd2df.R b/XRD-TF/muxd2df.R index a67439a..9015530 100644 --- a/XRD-TF/muxd2df.R +++ b/XRD-TF/muxd2df.R @@ -6,14 +6,15 @@ source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R") ################################################## muxd2df <- function(uxdfile) { ## Description: - ## Reads UXD files with multiple ranges (converted using XCH v1.0) + ## Reads UXD files with one or multiple ranges (converted using XCH v1.0) ## Extracts both data (thth, intensity) and parameters - ## Also automatically calculates cps is counts are present, and vice versa + ## Also automatically calculates cps if counts are present, and vice versa ## (note that this depends on specific strings in the UXD format). ## Usage: ## muxd2df(uxdfile) ## Arguments: ## uxdfile: text string with full path to UXD file + ## containing single or multiple data ranges ## Value: ## Dataframe with the following columns: ## $ sampleid : chr @@ -50,10 +51,14 @@ muxd2df <- function(uxdfile) { range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1) # Calculate number of ranges - ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA) + ranges.total <- + ifelse(length(range.header.start.rows) == length(range.header.end.rows), + length(range.header.start.rows), + NA) #why would they not be equal? if (is.na(ranges.total)) { # Obviously something bad happened. # Do something about it. echo an error message perhaps. + # But why would they not be equal? } @@ -79,7 +84,15 @@ muxd2df <- function(uxdfile) { # Data always start on the row after header end range.data.start.rows <- range.header.end.rows + 1 # Data end rows precedes header with one row, except for the first range - range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f)) + # But only if data contained more than one range, obviously. Let's make the code check for that + if (ranges.total > 1) { + range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f)) + } else { + # Data in fact only contains one range + range.data.end.rows <- length(f) + } + + #### # Extract data (as-is) and put it an list (by range) data.raw <- list() @@ -113,7 +126,8 @@ muxd2df <- function(uxdfile) { as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), headers.raw[[range]]) == 1)], "=")[[1]][2])) } - names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp)) + names(data[[range]]) <- + c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp)) } # Calculate the other of the pair counts <-> cps @@ -130,69 +144,11 @@ muxd2df <- function(uxdfile) { # Return a unified dataframe data.df <- data[[1]] - for (range in 2:ranges.total) { - data.df <- rbind(data.df, data[[range]]) + if (ranges.total > 1) { + for (range in 2:ranges.total) { + data.df <- rbind(data.df, data[[range]]) + } } return(data.df) } - - - - - -#### OLD VERSION - DEPRECATE -################################################## -################### muxd2df ###################### -################################################## -muxd2df.old <- function(uxdfile, range.descriptor) { - # Function that reads an UXD file which contains several ranges - # (created in a programmed run, for example) - # Arguments - # :: uxdfile (filename with extension) - # :: range.descriptor (an array with as many elements as - # there are ranges in the uxdfile) - # Returns: dataframe with 3 columns - - cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD - cdata <- "[0-9]" #regexpr matching one character of any digit - # Create filenames for the output # no longer used, return dataframe instead - #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="") - - # Read the input multirange file - ufile <- file(uxdfile, "r") - f <- readLines(ufile, n=-1) #read _all_ lines from UXD file - close(ufile) - - # This way we identify data rows by looking for numeric characters. - #wh <- regexpr("[0-9]", f) - # This way we identify header rows - # Later we will assume that all other rows are data - wh <- regexpr(cchar, f) - - mh <- wh[1:length(wh)] # this gives you the corresponding index vector - # the value of each element corresponds to the position of the regexp match. - # value = 1 means the first character of the row is cchar (row is header) - # value =-1 means no cchar occur on the row (row is data) - - #length(mh[mh == -1]) #total number of datarows in uxdfile - #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier) - - i <- seq(1, length(mh) - 1, 1) - j <- seq(2, length(mh), 1) - starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices - ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last - ends <- c(ends, length(mh)) #fixed the last index of ends - - ff <- data.frame(NULL) - for (s in 1:length(range.descriptor)) { - zz <- textConnection(f[starts[s]:ends[s]], "r") - ff <- rbind(ff, data.frame(range.descriptor[s], - matrix(scan(zz, what = numeric()), ncol=2, byrow=T))) - close(zz) - } - names(ff) <- c("sampleid", "angle", "intensity") - - # Return dataframe - ff -} diff --git a/XRD-TF/uxd2df.R b/XRD-TF/uxd2df.R deleted file mode 100644 index 8d6fd5e..0000000 --- a/XRD-TF/uxd2df.R +++ /dev/null @@ -1,63 +0,0 @@ -################################################## -#################### uxd2df ###################### -################################################## -uxd2df <- function(uxdfile) { - # Function for reading UXD files # Assumptions: data in two columns - # Args: uxdfile (filename with extension) - # Returns: dataframe with three columns - - cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD - cdata <- "[0-9]" #regexpr matching one character of any digit - - # A new file (datafile) containing only data will be created, - # extension ".data" appended to uxdfile - #datafile <- paste(uxdfile,".data",sep="") - - ufile <- file(uxdfile, "r") - f <- readLines(ufile, n=-1) #read _all_ lines from UXD file - close(ufile) - - # This way we identify data rows by looking for numeric characters. - #wh <- regexpr("[0-9]", f) - # This way we identify header rows - # We assume that all other rows are data - wh <- regexpr(cchar, f) - - mh <- wh[1:length(wh)] # this gives you the corresponding index vector - # the value of each element corresponds to the position of the regexp match. - # value = 1 means the first character of the row is cchar (row is header) - # value =-1 means no cchar occur on the row (row is data) - - i <- seq(1, length(mh) - 1, 1) - j <- seq(2, length(mh), 1) - - starts <- which(mh[i] == 1 & mh[j] != 1) + 1 - ends <- length(mh) - f <- f[starts:ends] - - rgxp.sampleid <- "[^/]*(?=\\.\\w*)" ## THIS REQUIRES perl=TRUE - # Regular expression that extracts the filename out of a full path. - # Matches and extracts everything from the last forward slash (assuming Unix slashes) - # up until a dot folllowed by an arbitrary number of alphanumeric characters. - sampleidmtch <- regexpr(rgxp.sampleid, uxdfile, perl=TRUE) - # Check that there was a match - if (sampleidmtch < 0) { - # -1 means no match - sampleid <- uxdfile - # If match was unsuccessful we use the argument as passed to this function as sampleid - } - sampleid <- substr(uxdfile, sampleidmtch, (sampleidmtch + attr(sampleidmtch, "match.length") - 1)) - - zz <- textConnection(f, "r") - ff <- data.frame(sampleid, matrix(scan(zz, - what = numeric()), ncol=2, byrow=T)) - names(ff) <- c("sampleid", "angle", "intensity") - close(zz) - - #zz <- file(datafile, "w") #open connection to datafile - #write.table(ff, file=datafile, row.names=F, sep=",") - #close(zz) - - # Return dataframe - ff -} diff --git a/XRD-TF/xrdpk.R b/XRD-TF/xrdpk.R index 7d1f2f3..0401248 100644 --- a/XRD-TF/xrdpk.R +++ b/XRD-TF/xrdpk.R @@ -67,6 +67,7 @@ xrdpk <- height = xrd.fit[[s]]$parpks[kernel, "height"], area = xrd.fit[[s]]$parpks[kernel, "intens"], fwhm = xrd.fit[[s]]$parpks[kernel, "FWHM"], + beta = xrd.fit[[s]]$parpks[kernel, "intens"] / xrd.fit[[s]]$parpks[kernel, "height"], m = xrd.fit[[s]]$parpks[kernel, "m"], accept = xrd.fit[[s]]$accept)) xrd.fit.fitpk <- rbind(xrd.fit.fitpk, diff --git a/XRD-TF/xrdpkWrapper.R b/XRD-TF/xrdpkWrapper.R index c6fb0f6..2df5f1b 100644 --- a/XRD-TF/xrdpkWrapper.R +++ b/XRD-TF/xrdpkWrapper.R @@ -39,22 +39,39 @@ xrdpkWrapper <- return(xrdres) } else { + # File does not exist + # OR override is TRUE print("... Started else-clause 1") - if (!exists("xrdres")) { + # If file does not exist at all, run all necessary code to re-create it + if (!file.exists(xrddatafile)) { xrdres <- list() print("... xrdres list created") - } - - # Need to call xrdpk() and save its results to file as above - xrdres[[run]] <- xrdpk(data.exp, - kerpk = kerpk, - fitmaxiter = fitmaxiter, - gam = gam, - scl.factor = scl.factor, - maxwdth = maxwdth) - save(xrdres, file = xrddatafile) + + xrdres[[run]] <- + xrdpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor, + maxwdth = maxwdth) + + save(xrdres, file = xrddatafile) + } else { + # File already exists, but override is TRUE + load(file = xrddatafile) + + xrdres[[run]] <- + xrdpk(data.exp, + kerpk = kerpk, + fitmaxiter = fitmaxiter, + gam = gam, + scl.factor = scl.factor, + maxwdth = maxwdth) + + save(xrdres, file = xrddatafile) + } print("... Ended else-clause 1") diff --git a/common/roundup.R b/common/roundup.R new file mode 100644 index 0000000..21c02ca --- /dev/null +++ b/common/roundup.R @@ -0,0 +1,6 @@ +# Function that rounds UP to the nearest interval specified by "nearest" +# http://stackoverflow.com/questions/6461209/how-to-round-up-to-the-nearest-10-or-100-or-x + +roundup <- function(x, nearest=1000) { + ceiling(max(x+10^-9)/nearest + 1/nearest)*nearest +} \ No newline at end of file