xrdpkWrapper.R now correctly handles situations where file already exists,

override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell.
14 years ago · 70c39b9145
parent 3d4244f1fd
commit 70c39b9145
7 changed files with 64 additions and 142 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 *.RData
+*.Rdeprecated
 *.Rhistory
 *.Rhistory.save
 *.ROLD
--- a/XRD-TF/matchpdf.R
+++ b/XRD-TF/matchpdf.R
@ -63,7 +63,11 @@ matchpdf <- function(expcol, pdfrow) {
   if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) {
      # Reset mtch
      mtch <- list()
-      mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
+      mtch <- list(csums = colSums(diff.indx), 
+                   rsums = rowSums(diff.indx), 
+                   expthth = expcol[colSums(diff.indx) != 0], 
+                   pdfthth = pdfrow[rowSums(diff.indx) != 0], 
+                   deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
      # List of 5
      # $ csums     : num - consisting of ones and zeroes. Shows you which positions of expcol matched.
      # $ rsums     : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched.
--- a/XRD-TF/muxd2df.R
+++ b/XRD-TF/muxd2df.R
@ -6,14 +6,15 @@ source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 ##################################################
 muxd2df <- function(uxdfile) {
   ## Description:
-   ##   Reads UXD files with multiple ranges (converted using XCH v1.0)
+   ##   Reads UXD files with one or multiple ranges (converted using XCH v1.0)
   ##   Extracts both data (thth, intensity) and parameters
-   ##   Also automatically calculates cps is counts are present, and vice versa
+   ##   Also automatically calculates cps if counts are present, and vice versa
   ##   (note that this depends on specific strings in the UXD format).
   ## Usage:
   ##   muxd2df(uxdfile)
   ## Arguments:
   ##   uxdfile: text string with full path to UXD file
+   ##            containing single or multiple data ranges
   ## Value:
   ##   Dataframe with the following columns:
   ##   $ sampleid        : chr
@ -50,10 +51,14 @@ muxd2df <- function(uxdfile) {
   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
   
   # Calculate number of ranges
-   ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
+   ranges.total <- 
+      ifelse(length(range.header.start.rows) == length(range.header.end.rows), 
+             length(range.header.start.rows),
+             NA) #why would they not be equal?
   if (is.na(ranges.total)) {
      # Obviously something bad happened.
      # Do something about it. echo an error message perhaps.
+      # But why would they not be equal?
      
   }
         
@ -79,7 +84,15 @@ muxd2df <- function(uxdfile) {
   # Data always start on the row after header end
   range.data.start.rows <- range.header.end.rows + 1
   # Data end rows precedes header with one row, except for the first range
-   range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   # But only if data contained more than one range, obviously. Let's make the code check for that
+   if (ranges.total > 1) {
+      range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   } else {
+      # Data in fact only contains one range
+      range.data.end.rows <- length(f)
+   }
+   
+   ####
   
   # Extract data (as-is) and put it an list (by range)
   data.raw <- list()
@ -113,7 +126,8 @@ muxd2df <- function(uxdfile) {
            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
               headers.raw[[range]]) == 1)], "=")[[1]][2]))
      }
-      names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+      names(data[[range]]) <- 
+         c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
   }
                                                                   
   # Calculate the other of the pair counts <-> cps
@ -130,69 +144,11 @@ muxd2df <- function(uxdfile) {
      
   # Return a unified dataframe
   data.df <- data[[1]]
-   for (range in 2:ranges.total) {
-      data.df <- rbind(data.df, data[[range]])
+   if (ranges.total > 1) {
+      for (range in 2:ranges.total) {
+         data.df <- rbind(data.df, data[[range]])
+      }
   }
   
   return(data.df)
 }
-
-
-
-
-
-#### OLD VERSION - DEPRECATE
-##################################################
-################### muxd2df ######################
-##################################################
-muxd2df.old <- function(uxdfile, range.descriptor) {
-   # Function that reads an UXD file which contains several ranges
-   # (created in a programmed run, for example)
-   # Arguments
-   # :: uxdfile (filename with extension)
-   # :: range.descriptor (an array with as many elements as
-   #    there are ranges in the uxdfile)
-   # Returns: dataframe with 3 columns
-   
-   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
-   cdata <- "[0-9]" #regexpr matching one character of any digit
-   # Create filenames for the output # no longer used, return dataframe instead
-   #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
-   
-   # Read the input multirange file
-   ufile <- file(uxdfile, "r")
-   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
-   close(ufile)
-   
-   # This way we identify data rows by looking for numeric characters.
-   #wh <- regexpr("[0-9]", f)
-   # This way we identify header rows
-   # Later we will assume that all other rows are data
-   wh <- regexpr(cchar, f)
-   
-   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
-   # the value of each element corresponds to the position of the regexp match.
-   # value = 1 means the first character of the row is cchar (row is header)
-   # value =-1 means no cchar occur on the row (row is data)
-   
-   #length(mh[mh == -1]) #total number of datarows in uxdfile
-   #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
-   
-   i <- seq(1, length(mh) - 1, 1)
-   j <- seq(2, length(mh), 1)
-   starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
-   ends   <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
-   ends   <- c(ends, length(mh)) #fixed the last index of ends   
-   
-   ff <- data.frame(NULL)
-   for (s in 1:length(range.descriptor)) {
-      zz <- textConnection(f[starts[s]:ends[s]], "r")
-      ff <- rbind(ff, data.frame(range.descriptor[s],
-            matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
-      close(zz)
-   }
-   names(ff) <- c("sampleid", "angle", "intensity")
-   
-   # Return dataframe
-   ff
-}
--- a/XRD-TF/uxd2df.R
+++ b/XRD-TF/uxd2df.R
@ -1,63 +0,0 @@
-##################################################
-#################### uxd2df ######################
-##################################################
-uxd2df <- function(uxdfile) {
-   # Function for reading UXD files  # Assumptions: data in two columns
-   # Args: uxdfile (filename with extension)
-   # Returns: dataframe with three columns
-   
-   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
-   cdata <- "[0-9]" #regexpr matching one character of any digit
-   
-   # A new file (datafile) containing only data will be created,
-   # extension ".data" appended to uxdfile
-   #datafile <- paste(uxdfile,".data",sep="")
-   
-   ufile <- file(uxdfile, "r")
-   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
-   close(ufile)
-   
-   # This way we identify data rows by looking for numeric characters.
-   #wh <- regexpr("[0-9]", f)
-   # This way we identify header rows
-   # We assume that all other rows are data
-   wh <- regexpr(cchar, f)
-   
-   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
-   # the value of each element corresponds to the position of the regexp match.
-   # value = 1 means the first character of the row is cchar (row is header)
-   # value =-1 means no cchar occur on the row (row is data)
-   
-   i <- seq(1, length(mh) - 1, 1)
-   j <- seq(2, length(mh), 1)
-   
-   starts <- which(mh[i] == 1 & mh[j] != 1) + 1
-   ends   <- length(mh)
-   f <- f[starts:ends]
-   
-   rgxp.sampleid <- "[^/]*(?=\\.\\w*)" ## THIS REQUIRES perl=TRUE
-   # Regular expression that extracts the filename out of a full path.
-   # Matches and extracts everything from the last forward slash (assuming Unix slashes)
-   # up until a dot folllowed by an arbitrary number of alphanumeric characters.
-   sampleidmtch <- regexpr(rgxp.sampleid, uxdfile, perl=TRUE)
-   # Check that there was a match
-   if (sampleidmtch < 0) {
-      # -1 means no match
-      sampleid <- uxdfile
-      # If match was unsuccessful we use the argument as passed to this function as sampleid
-   }
-   sampleid <- substr(uxdfile, sampleidmtch, (sampleidmtch + attr(sampleidmtch, "match.length") - 1))
-   
-   zz <- textConnection(f, "r")
-   ff <- data.frame(sampleid, matrix(scan(zz,
-         what = numeric()), ncol=2, byrow=T))
-   names(ff) <- c("sampleid", "angle", "intensity")
-   close(zz)
-      
-   #zz <- file(datafile, "w") #open connection to datafile
-   #write.table(ff, file=datafile, row.names=F, sep=",")
-   #close(zz)
-
-   # Return dataframe
-   ff
-}
--- a/XRD-TF/xrdpk.R
+++ b/XRD-TF/xrdpk.R
@ -67,6 +67,7 @@ xrdpk <-
                       height = xrd.fit[[s]]$parpks[kernel, "height"],
                       area   = xrd.fit[[s]]$parpks[kernel, "intens"],
                       fwhm   = xrd.fit[[s]]$parpks[kernel, "FWHM"],
+                       beta   = xrd.fit[[s]]$parpks[kernel, "intens"] / xrd.fit[[s]]$parpks[kernel, "height"],
                       m      = xrd.fit[[s]]$parpks[kernel, "m"],
                       accept = xrd.fit[[s]]$accept))
         xrd.fit.fitpk <- rbind(xrd.fit.fitpk, 
--- a/XRD-TF/xrdpkWrapper.R
+++ b/XRD-TF/xrdpkWrapper.R
@ -39,22 +39,39 @@ xrdpkWrapper <-
      
      return(xrdres)
   } else {
+      # File does not exist
+      # OR override is TRUE
      
      print("... Started else-clause 1")
      
-      if (!exists("xrdres")) {
+      # If file does not exist at all, run all necessary code to re-create it
+      if (!file.exists(xrddatafile)) {
         xrdres <- list()
         print("... xrdres list created")
-      }   
-      
-      # Need to call xrdpk() and save its results to file as above
-      xrdres[[run]] <- xrdpk(data.exp, 
-                             kerpk = kerpk, 
-                             fitmaxiter = fitmaxiter, 
-                             gam = gam, 
-                             scl.factor = scl.factor,
-                             maxwdth = maxwdth)
-      save(xrdres, file = xrddatafile)
+         
+         xrdres[[run]] <- 
+            xrdpk(data.exp, 
+                  kerpk = kerpk, 
+                  fitmaxiter = fitmaxiter, 
+                  gam = gam, 
+                  scl.factor = scl.factor,
+                  maxwdth = maxwdth)
+         
+         save(xrdres, file = xrddatafile)
+      } else {
+         # File already exists, but override is TRUE
+         load(file = xrddatafile)
+         
+         xrdres[[run]] <- 
+            xrdpk(data.exp, 
+                  kerpk = kerpk, 
+                  fitmaxiter = fitmaxiter, 
+                  gam = gam, 
+                  scl.factor = scl.factor,
+                  maxwdth = maxwdth)
+         
+         save(xrdres, file = xrddatafile)
+      }
      
      print("... Ended else-clause 1")
      
--- a/common/roundup.R
+++ b/common/roundup.R
@ -0,0 +1,6 @@
+# Function that rounds UP to the nearest interval specified by "nearest"
+# http://stackoverflow.com/questions/6461209/how-to-round-up-to-the-nearest-10-or-100-or-x
+
+roundup <- function(x, nearest=1000) {
+   ceiling(max(x+10^-9)/nearest + 1/nearest)*nearest
+}