From 70c39b914529874f3de957f2d42e69aec19afdb1 Mon Sep 17 00:00:00 2001
From: Taha Ahmed <taha@asks2>
Date: Sat, 24 Sep 2011 12:30:37 +0200
Subject: [PATCH] xrdpkWrapper.R now correctly handles situations where file
 already exists,

override = TRUE, and run > 1. Previously only the latest run was actually saved,
although the created list had the right length.
Works correctly and as intended now, as far as I can tell.
---
 .gitignore            |  1 +
 XRD-TF/matchpdf.R     |  6 ++-
 XRD-TF/muxd2df.R      | 90 +++++++++++--------------------------------
 XRD-TF/uxd2df.R       | 63 ------------------------------
 XRD-TF/xrdpk.R        |  1 +
 XRD-TF/xrdpkWrapper.R | 39 +++++++++++++------
 common/roundup.R      |  6 +++
 7 files changed, 64 insertions(+), 142 deletions(-)
 delete mode 100644 XRD-TF/uxd2df.R
 create mode 100644 common/roundup.R

diff --git a/.gitignore b/.gitignore
index 57c2127..d6969e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 *.RData
+*.Rdeprecated
 *.Rhistory
 *.Rhistory.save
 *.ROLD
diff --git a/XRD-TF/matchpdf.R b/XRD-TF/matchpdf.R
index 6eb36dc..54eef60 100644
--- a/XRD-TF/matchpdf.R
+++ b/XRD-TF/matchpdf.R
@@ -63,7 +63,11 @@ matchpdf <- function(expcol, pdfrow) {
    if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) {
       # Reset mtch
       mtch <- list()
-      mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
+      mtch <- list(csums = colSums(diff.indx), 
+                   rsums = rowSums(diff.indx), 
+                   expthth = expcol[colSums(diff.indx) != 0], 
+                   pdfthth = pdfrow[rowSums(diff.indx) != 0], 
+                   deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
       # List of 5
       # $ csums     : num - consisting of ones and zeroes. Shows you which positions of expcol matched.
       # $ rsums     : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched.
diff --git a/XRD-TF/muxd2df.R b/XRD-TF/muxd2df.R
index a67439a..9015530 100644
--- a/XRD-TF/muxd2df.R
+++ b/XRD-TF/muxd2df.R
@@ -6,14 +6,15 @@ source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 ##################################################
 muxd2df <- function(uxdfile) {
    ## Description:
-   ##   Reads UXD files with multiple ranges (converted using XCH v1.0)
+   ##   Reads UXD files with one or multiple ranges (converted using XCH v1.0)
    ##   Extracts both data (thth, intensity) and parameters
-   ##   Also automatically calculates cps is counts are present, and vice versa
+   ##   Also automatically calculates cps if counts are present, and vice versa
    ##   (note that this depends on specific strings in the UXD format).
    ## Usage:
    ##   muxd2df(uxdfile)
    ## Arguments:
    ##   uxdfile: text string with full path to UXD file
+   ##            containing single or multiple data ranges
    ## Value:
    ##   Dataframe with the following columns:
    ##   $ sampleid        : chr
@@ -50,10 +51,14 @@ muxd2df <- function(uxdfile) {
    range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
    
    # Calculate number of ranges
-   ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
+   ranges.total <- 
+      ifelse(length(range.header.start.rows) == length(range.header.end.rows), 
+             length(range.header.start.rows),
+             NA) #why would they not be equal?
    if (is.na(ranges.total)) {
       # Obviously something bad happened.
       # Do something about it. echo an error message perhaps.
+      # But why would they not be equal?
       
    }
          
@@ -79,7 +84,15 @@ muxd2df <- function(uxdfile) {
    # Data always start on the row after header end
    range.data.start.rows <- range.header.end.rows + 1
    # Data end rows precedes header with one row, except for the first range
-   range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   # But only if data contained more than one range, obviously. Let's make the code check for that
+   if (ranges.total > 1) {
+      range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   } else {
+      # Data in fact only contains one range
+      range.data.end.rows <- length(f)
+   }
+   
+   ####
    
    # Extract data (as-is) and put it an list (by range)
    data.raw <- list()
@@ -113,7 +126,8 @@ muxd2df <- function(uxdfile) {
             as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
                headers.raw[[range]]) == 1)], "=")[[1]][2]))
       }
-      names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+      names(data[[range]]) <- 
+         c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
    }
                                                                    
    # Calculate the other of the pair counts <-> cps
@@ -130,69 +144,11 @@ muxd2df <- function(uxdfile) {
       
    # Return a unified dataframe
    data.df <- data[[1]]
-   for (range in 2:ranges.total) {
-      data.df <- rbind(data.df, data[[range]])
+   if (ranges.total > 1) {
+      for (range in 2:ranges.total) {
+         data.df <- rbind(data.df, data[[range]])
+      }
    }
    
    return(data.df)
 }
-
-
-
-
-
-#### OLD VERSION - DEPRECATE
-##################################################
-################### muxd2df ######################
-##################################################
-muxd2df.old <- function(uxdfile, range.descriptor) {
-   # Function that reads an UXD file which contains several ranges
-   # (created in a programmed run, for example)
-   # Arguments
-   # :: uxdfile (filename with extension)
-   # :: range.descriptor (an array with as many elements as
-   #    there are ranges in the uxdfile)
-   # Returns: dataframe with 3 columns
-   
-   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
-   cdata <- "[0-9]" #regexpr matching one character of any digit
-   # Create filenames for the output # no longer used, return dataframe instead
-   #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
-   
-   # Read the input multirange file
-   ufile <- file(uxdfile, "r")
-   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
-   close(ufile)
-   
-   # This way we identify data rows by looking for numeric characters.
-   #wh <- regexpr("[0-9]", f)
-   # This way we identify header rows
-   # Later we will assume that all other rows are data
-   wh <- regexpr(cchar, f)
-   
-   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
-   # the value of each element corresponds to the position of the regexp match.
-   # value = 1 means the first character of the row is cchar (row is header)
-   # value =-1 means no cchar occur on the row (row is data)
-   
-   #length(mh[mh == -1]) #total number of datarows in uxdfile
-   #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
-   
-   i <- seq(1, length(mh) - 1, 1)
-   j <- seq(2, length(mh), 1)
-   starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
-   ends   <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
-   ends   <- c(ends, length(mh)) #fixed the last index of ends   
-   
-   ff <- data.frame(NULL)
-   for (s in 1:length(range.descriptor)) {
-      zz <- textConnection(f[starts[s]:ends[s]], "r")
-      ff <- rbind(ff, data.frame(range.descriptor[s],
-            matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
-      close(zz)
-   }
-   names(ff) <- c("sampleid", "angle", "intensity")
-   
-   # Return dataframe
-   ff
-}
diff --git a/XRD-TF/uxd2df.R b/XRD-TF/uxd2df.R
deleted file mode 100644
index 8d6fd5e..0000000
--- a/XRD-TF/uxd2df.R
+++ /dev/null
@@ -1,63 +0,0 @@
-##################################################
-#################### uxd2df ######################
-##################################################
-uxd2df <- function(uxdfile) {
-   # Function for reading UXD files  # Assumptions: data in two columns
-   # Args: uxdfile (filename with extension)
-   # Returns: dataframe with three columns
-   
-   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
-   cdata <- "[0-9]" #regexpr matching one character of any digit
-   
-   # A new file (datafile) containing only data will be created,
-   # extension ".data" appended to uxdfile
-   #datafile <- paste(uxdfile,".data",sep="")
-   
-   ufile <- file(uxdfile, "r")
-   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
-   close(ufile)
-   
-   # This way we identify data rows by looking for numeric characters.
-   #wh <- regexpr("[0-9]", f)
-   # This way we identify header rows
-   # We assume that all other rows are data
-   wh <- regexpr(cchar, f)
-   
-   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
-   # the value of each element corresponds to the position of the regexp match.
-   # value = 1 means the first character of the row is cchar (row is header)
-   # value =-1 means no cchar occur on the row (row is data)
-   
-   i <- seq(1, length(mh) - 1, 1)
-   j <- seq(2, length(mh), 1)
-   
-   starts <- which(mh[i] == 1 & mh[j] != 1) + 1
-   ends   <- length(mh)
-   f <- f[starts:ends]
-   
-   rgxp.sampleid <- "[^/]*(?=\\.\\w*)" ## THIS REQUIRES perl=TRUE
-   # Regular expression that extracts the filename out of a full path.
-   # Matches and extracts everything from the last forward slash (assuming Unix slashes)
-   # up until a dot folllowed by an arbitrary number of alphanumeric characters.
-   sampleidmtch <- regexpr(rgxp.sampleid, uxdfile, perl=TRUE)
-   # Check that there was a match
-   if (sampleidmtch < 0) {
-      # -1 means no match
-      sampleid <- uxdfile
-      # If match was unsuccessful we use the argument as passed to this function as sampleid
-   }
-   sampleid <- substr(uxdfile, sampleidmtch, (sampleidmtch + attr(sampleidmtch, "match.length") - 1))
-   
-   zz <- textConnection(f, "r")
-   ff <- data.frame(sampleid, matrix(scan(zz,
-         what = numeric()), ncol=2, byrow=T))
-   names(ff) <- c("sampleid", "angle", "intensity")
-   close(zz)
-      
-   #zz <- file(datafile, "w") #open connection to datafile
-   #write.table(ff, file=datafile, row.names=F, sep=",")
-   #close(zz)
-
-   # Return dataframe
-   ff
-}
diff --git a/XRD-TF/xrdpk.R b/XRD-TF/xrdpk.R
index 7d1f2f3..0401248 100644
--- a/XRD-TF/xrdpk.R
+++ b/XRD-TF/xrdpk.R
@@ -67,6 +67,7 @@ xrdpk <-
                        height = xrd.fit[[s]]$parpks[kernel, "height"],
                        area   = xrd.fit[[s]]$parpks[kernel, "intens"],
                        fwhm   = xrd.fit[[s]]$parpks[kernel, "FWHM"],
+                       beta   = xrd.fit[[s]]$parpks[kernel, "intens"] / xrd.fit[[s]]$parpks[kernel, "height"],
                        m      = xrd.fit[[s]]$parpks[kernel, "m"],
                        accept = xrd.fit[[s]]$accept))
          xrd.fit.fitpk <- rbind(xrd.fit.fitpk, 
diff --git a/XRD-TF/xrdpkWrapper.R b/XRD-TF/xrdpkWrapper.R
index c6fb0f6..2df5f1b 100644
--- a/XRD-TF/xrdpkWrapper.R
+++ b/XRD-TF/xrdpkWrapper.R
@@ -39,22 +39,39 @@ xrdpkWrapper <-
       
       return(xrdres)
    } else {
+      # File does not exist
+      # OR override is TRUE
       
       print("... Started else-clause 1")
       
-      if (!exists("xrdres")) {
+      # If file does not exist at all, run all necessary code to re-create it
+      if (!file.exists(xrddatafile)) {
          xrdres <- list()
          print("... xrdres list created")
-      }   
-      
-      # Need to call xrdpk() and save its results to file as above
-      xrdres[[run]] <- xrdpk(data.exp, 
-                             kerpk = kerpk, 
-                             fitmaxiter = fitmaxiter, 
-                             gam = gam, 
-                             scl.factor = scl.factor,
-                             maxwdth = maxwdth)
-      save(xrdres, file = xrddatafile)
+         
+         xrdres[[run]] <- 
+            xrdpk(data.exp, 
+                  kerpk = kerpk, 
+                  fitmaxiter = fitmaxiter, 
+                  gam = gam, 
+                  scl.factor = scl.factor,
+                  maxwdth = maxwdth)
+         
+         save(xrdres, file = xrddatafile)
+      } else {
+         # File already exists, but override is TRUE
+         load(file = xrddatafile)
+         
+         xrdres[[run]] <- 
+            xrdpk(data.exp, 
+                  kerpk = kerpk, 
+                  fitmaxiter = fitmaxiter, 
+                  gam = gam, 
+                  scl.factor = scl.factor,
+                  maxwdth = maxwdth)
+         
+         save(xrdres, file = xrddatafile)
+      }
       
       print("... Ended else-clause 1")
       
diff --git a/common/roundup.R b/common/roundup.R
new file mode 100644
index 0000000..21c02ca
--- /dev/null
+++ b/common/roundup.R
@@ -0,0 +1,6 @@
+# Function that rounds UP to the nearest interval specified by "nearest"
+# http://stackoverflow.com/questions/6461209/how-to-round-up-to-the-nearest-10-or-100-or-x
+
+roundup <- function(x, nearest=1000) {
+   ceiling(max(x+10^-9)/nearest + 1/nearest)*nearest
+}
\ No newline at end of file