xrdpkWrapper.R now correctly handles situations where file already exists,

override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell.
15 years ago · 70c39b9145
parent 3d4244f1fd
commit 70c39b9145
7 changed files with 64 additions and 142 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 *.RData
 *.Rdeprecated
 *.Rhistory
 *.Rhistory.save
 *.ROLD
--- a/XRD-TF/matchpdf.R
+++ b/XRD-TF/matchpdf.R
@ -63,7 +63,11 @@ matchpdf <- function(expcol, pdfrow) {
   if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) {
      # Reset mtch
      mtch <- list()
-      mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
+      mtch <- list(csums = colSums(diff.indx), 
                   rsums = rowSums(diff.indx), 
                   expthth = expcol[colSums(diff.indx) != 0], 
                   pdfthth = pdfrow[rowSums(diff.indx) != 0], 
                   deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
      # List of 5
      # $ csums     : num - consisting of ones and zeroes. Shows you which positions of expcol matched.
      # $ rsums     : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched.
--- a/XRD-TF/muxd2df.R
+++ b/XRD-TF/muxd2df.R
@ -6,14 +6,15 @@ source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 ##################################################
 muxd2df <- function(uxdfile) {
   ## Description:
-   ##   Reads UXD files with multiple ranges (converted using XCH v1.0)
+   ##   Reads UXD files with one or multiple ranges (converted using XCH v1.0)
   ##   Extracts both data (thth, intensity) and parameters
-   ##   Also automatically calculates cps is counts are present, and vice versa
+   ##   Also automatically calculates cps if counts are present, and vice versa
   ##   (note that this depends on specific strings in the UXD format).
   ## Usage:
   ##   muxd2df(uxdfile)
   ## Arguments:
   ##   uxdfile: text string with full path to UXD file
   ##            containing single or multiple data ranges
   ## Value:
   ##   Dataframe with the following columns:
   ##   $ sampleid        : chr
@ -50,10 +51,14 @@ muxd2df <- function(uxdfile) {
   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
   # Calculate number of ranges
-   ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
+   ranges.total <- 
      ifelse(length(range.header.start.rows) == length(range.header.end.rows), 
             length(range.header.start.rows),
             NA) #why would they not be equal?
   if (is.na(ranges.total)) {
      # Obviously something bad happened.
      # Do something about it. echo an error message perhaps.
      # But why would they not be equal?
   }
@ -79,7 +84,15 @@ muxd2df <- function(uxdfile) {
   # Data always start on the row after header end
   range.data.start.rows <- range.header.end.rows + 1
   # Data end rows precedes header with one row, except for the first range
-   range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   # But only if data contained more than one range, obviously. Let's make the code check for that
   if (ranges.total > 1) {
      range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
   } else {
      # Data in fact only contains one range
      range.data.end.rows <- length(f)
   }
   ####
   # Extract data (as-is) and put it an list (by range)
   data.raw <- list()
@ -113,7 +126,8 @@ muxd2df <- function(uxdfile) {
            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
               headers.raw[[range]]) == 1)], "=")[[1]][2]))
      }
-      names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+      names(data[[range]]) <- 
         c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
   }
   # Calculate the other of the pair counts <-> cps
@ -130,69 +144,11 @@ muxd2df <- function(uxdfile) {
   # Return a unified dataframe
   data.df <- data[[1]]
-   for (range in 2:ranges.total) {
+   if (ranges.total > 1) {
-      data.df <- rbind(data.df, data[[range]])
+      for (range in 2:ranges.total) {
         data.df <- rbind(data.df, data[[range]])
      }
   }
   return(data.df)
 }
 #### OLD VERSION - DEPRECATE
 ##################################################
 ################### muxd2df ######################
 ##################################################
 muxd2df.old <- function(uxdfile, range.descriptor) {
   # Function that reads an UXD file which contains several ranges
   # (created in a programmed run, for example)
   # Arguments
   # :: uxdfile (filename with extension)
   # :: range.descriptor (an array with as many elements as
   #    there are ranges in the uxdfile)
   # Returns: dataframe with 3 columns
   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
   cdata <- "[0-9]" #regexpr matching one character of any digit
   # Create filenames for the output # no longer used, return dataframe instead
   #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
   # Read the input multirange file
   ufile <- file(uxdfile, "r")
   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
   close(ufile)
   # This way we identify data rows by looking for numeric characters.
   #wh <- regexpr("[0-9]", f)
   # This way we identify header rows
   # Later we will assume that all other rows are data
   wh <- regexpr(cchar, f)
   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
   # the value of each element corresponds to the position of the regexp match.
   # value = 1 means the first character of the row is cchar (row is header)
   # value =-1 means no cchar occur on the row (row is data)
   #length(mh[mh == -1]) #total number of datarows in uxdfile
   #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
   i <- seq(1, length(mh) - 1, 1)
   j <- seq(2, length(mh), 1)
   starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
   ends   <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
   ends   <- c(ends, length(mh)) #fixed the last index of ends   
   ff <- data.frame(NULL)
   for (s in 1:length(range.descriptor)) {
      zz <- textConnection(f[starts[s]:ends[s]], "r")
      ff <- rbind(ff, data.frame(range.descriptor[s],
            matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
      close(zz)
   }
   names(ff) <- c("sampleid", "angle", "intensity")
   # Return dataframe
   ff
 }
--- a/XRD-TF/uxd2df.R
+++ b/XRD-TF/uxd2df.R
@ -1,63 +0,0 @@
 ##################################################
 #################### uxd2df ######################
 ##################################################
 uxd2df <- function(uxdfile) {
   # Function for reading UXD files  # Assumptions: data in two columns
   # Args: uxdfile (filename with extension)
   # Returns: dataframe with three columns
   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
   cdata <- "[0-9]" #regexpr matching one character of any digit
   # A new file (datafile) containing only data will be created,
   # extension ".data" appended to uxdfile
   #datafile <- paste(uxdfile,".data",sep="")
   ufile <- file(uxdfile, "r")
   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
   close(ufile)
   # This way we identify data rows by looking for numeric characters.
   #wh <- regexpr("[0-9]", f)
   # This way we identify header rows
   # We assume that all other rows are data
   wh <- regexpr(cchar, f)
   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
   # the value of each element corresponds to the position of the regexp match.
   # value = 1 means the first character of the row is cchar (row is header)
   # value =-1 means no cchar occur on the row (row is data)
   i <- seq(1, length(mh) - 1, 1)
   j <- seq(2, length(mh), 1)
   starts <- which(mh[i] == 1 & mh[j] != 1) + 1
   ends   <- length(mh)
   f <- f[starts:ends]
   rgxp.sampleid <- "[^/]*(?=\\.\\w*)" ## THIS REQUIRES perl=TRUE
   # Regular expression that extracts the filename out of a full path.
   # Matches and extracts everything from the last forward slash (assuming Unix slashes)
   # up until a dot folllowed by an arbitrary number of alphanumeric characters.
   sampleidmtch <- regexpr(rgxp.sampleid, uxdfile, perl=TRUE)
   # Check that there was a match
   if (sampleidmtch < 0) {
      # -1 means no match
      sampleid <- uxdfile
      # If match was unsuccessful we use the argument as passed to this function as sampleid
   }
   sampleid <- substr(uxdfile, sampleidmtch, (sampleidmtch + attr(sampleidmtch, "match.length") - 1))
   zz <- textConnection(f, "r")
   ff <- data.frame(sampleid, matrix(scan(zz,
         what = numeric()), ncol=2, byrow=T))
   names(ff) <- c("sampleid", "angle", "intensity")
   close(zz)
   #zz <- file(datafile, "w") #open connection to datafile
   #write.table(ff, file=datafile, row.names=F, sep=",")
   #close(zz)
   # Return dataframe
   ff
 }
--- a/XRD-TF/xrdpk.R
+++ b/XRD-TF/xrdpk.R
@ -67,6 +67,7 @@ xrdpk <-
                       height = xrd.fit[[s]]$parpks[kernel, "height"],
                       area   = xrd.fit[[s]]$parpks[kernel, "intens"],
                       fwhm   = xrd.fit[[s]]$parpks[kernel, "FWHM"],
                       beta   = xrd.fit[[s]]$parpks[kernel, "intens"] / xrd.fit[[s]]$parpks[kernel, "height"],
                       m      = xrd.fit[[s]]$parpks[kernel, "m"],
                       accept = xrd.fit[[s]]$accept))
         xrd.fit.fitpk <- rbind(xrd.fit.fitpk, 
--- a/XRD-TF/xrdpkWrapper.R
+++ b/XRD-TF/xrdpkWrapper.R
@ -39,22 +39,39 @@ xrdpkWrapper <-
      return(xrdres)
   } else {
      # File does not exist
      # OR override is TRUE
      print("... Started else-clause 1")
-      if (!exists("xrdres")) {
+      # If file does not exist at all, run all necessary code to re-create it
      if (!file.exists(xrddatafile)) {
         xrdres <- list()
         print("... xrdres list created")
-      }   
+         
-      
+         xrdres[[run]] <- 
-      # Need to call xrdpk() and save its results to file as above
+            xrdpk(data.exp, 
-      xrdres[[run]] <- xrdpk(data.exp, 
+                  kerpk = kerpk, 
-                             kerpk = kerpk, 
+                  fitmaxiter = fitmaxiter, 
-                             fitmaxiter = fitmaxiter, 
+                  gam = gam, 
-                             gam = gam, 
+                  scl.factor = scl.factor,
-                             scl.factor = scl.factor,
+                  maxwdth = maxwdth)
-                             maxwdth = maxwdth)
+         
-      save(xrdres, file = xrddatafile)
+         save(xrdres, file = xrddatafile)
      } else {
         # File already exists, but override is TRUE
         load(file = xrddatafile)
         xrdres[[run]] <- 
            xrdpk(data.exp, 
                  kerpk = kerpk, 
                  fitmaxiter = fitmaxiter, 
                  gam = gam, 
                  scl.factor = scl.factor,
                  maxwdth = maxwdth)
         save(xrdres, file = xrddatafile)
      }
      print("... Ended else-clause 1")
--- a/common/roundup.R
+++ b/common/roundup.R
@ -0,0 +1,6 @@
 # Function that rounds UP to the nearest interval specified by "nearest"
 # http://stackoverflow.com/questions/6461209/how-to-round-up-to-the-nearest-10-or-100-or-x
 roundup <- function(x, nearest=1000) {
   ceiling(max(x+10^-9)/nearest + 1/nearest)*nearest
 }