From b55af5d47827547f9ed1a0d0967fc9cb7a2727c6 Mon Sep 17 00:00:00 2001
From: Taha Ahmed <taha@mkem150>
Date: Wed, 3 Aug 2011 15:33:01 +0200
Subject: [PATCH] Re-wrote muxd2df() from scratch.

To make it able to include parameters as well as data in the returned df.
muxd2df() now works satisfactorily, outputs parameters such as steptime,
theta, and cps as well as thth and counts.
Other minor changes mostly updates of source file pointers.
---
 AUTOLAB/amp2df.R        |   2 +-
 XRD-TF/muxd2df.R        | 146 +++++++++++++++++++++++++++++++++++++++-
 XRD-TF/pdf2df.R         |   4 +-
 XRD-TF/scherrer.R       |   4 +-
 common/ConvertRefPot.R  |   6 +-
 common/is.wholenumber.R |   5 ++
 6 files changed, 159 insertions(+), 8 deletions(-)
 create mode 100644 common/is.wholenumber.R

diff --git a/AUTOLAB/amp2df.R b/AUTOLAB/amp2df.R
index 0eeaac9..557ba97 100644
--- a/AUTOLAB/amp2df.R
+++ b/AUTOLAB/amp2df.R
@@ -1,4 +1,4 @@
-source("/home/taha/chepec/chetex/common/R/common.R")
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 
 ##################################################
 #################### amp2df ######################
diff --git a/XRD-TF/muxd2df.R b/XRD-TF/muxd2df.R
index 807132e..a67439a 100644
--- a/XRD-TF/muxd2df.R
+++ b/XRD-TF/muxd2df.R
@@ -1,7 +1,151 @@
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
+
+
 ##################################################
 ################### muxd2df ######################
 ##################################################
-muxd2df <- function(uxdfile, range.descriptor) {
+muxd2df <- function(uxdfile) {
+   ## Description:
+   ##   Reads UXD files with multiple ranges (converted using XCH v1.0)
+   ##   Extracts both data (thth, intensity) and parameters
+   ##   Also automatically calculates cps is counts are present, and vice versa
+   ##   (note that this depends on specific strings in the UXD format).
+   ## Usage:
+   ##   muxd2df(uxdfile)
+   ## Arguments:
+   ##   uxdfile: text string with full path to UXD file
+   ## Value:
+   ##   Dataframe with the following columns:
+   ##   $ sampleid        : chr
+   ##   $ thth            : num
+   ##   $ counts (or cps) : num
+   ##   $ steptime        : num
+   ##   $ stepsize        : num
+   ##   $ theta           : num
+   ##   $ khi             : num
+   ##   $ phi             : num
+   ##   $ x               : num
+   ##   $ y               : num
+   ##   $ z               : num
+   ##   $ divergence      : num
+   ##   $ antiscatter     : num
+   ##   $ cps (or counts) : num
+   #
+   range.header.start.rexp <- "^; \\(Data for Range" #regexp
+   range.header.end.rexp <- "^_2THETA[^=]" #regexp
+   
+   # Read the input multirange file
+   ufile <- file(uxdfile, "r")
+   # Note that readLines apparently completely skips empty lines. 
+   # In that case line numbers do not match between source and f.
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   # Fetch a sampleid for the current job
+   sampleid <- ProvideSampleId(uxdfile)
+   
+   # Look for header start rows
+   range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
+   # Look for header end rows
+   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
+   
+   # Calculate number of ranges
+   ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
+   if (is.na(ranges.total)) {
+      # Obviously something bad happened.
+      # Do something about it. echo an error message perhaps.
+      
+   }
+         
+   # Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file
+   # Assuming it is the same for all ranges in this job (a safe assumption).
+   if (f[range.header.end.rows][1] == "_2THETACOUNTS") {
+      # we got counts
+      counts.flag <- TRUE
+      cps.flag <- FALSE
+   }
+   if (f[range.header.end.rows][1] == "_2THETACPS") {
+      # we got counts per second
+      counts.flag <-FALSE
+      cps.flag <- TRUE
+   }
+   
+   # Extract headers (as-is) and put them in a list (by range)
+   headers.raw <- list()
+   for (range in 1:ranges.total) {
+      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
+   }
+
+   # Data always start on the row after header end
+   range.data.start.rows <- range.header.end.rows + 1
+   # Data end rows precedes header with one row, except for the first range
+   range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   
+   # Extract data (as-is) and put it an list (by range)
+   data.raw <- list()
+   for (range in 1:ranges.total) {
+      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
+   }
+   
+   # Specify header parameters to include in dataframe
+   header.param.rexp <- c(steptime = "^_STEPTIME=", 
+                          stepsize = "^_STEPSIZE=", 
+                          theta = "^_THETA=",
+                          khi = "^_KHI=",
+                          phi = "^_PHI=",
+                          x = "^_X=",
+                          y = "^_Y=",
+                          z = "^_Z=",
+                          divergence = "^_DIVERGENCE=",
+                          antiscatter = "^_ANTISCATTER=")
+   
+   # Collect data and header parameters in dataframes, by range in a list
+   data <- list()
+   for (range in 1:ranges.total) {
+      zz <- textConnection(data.raw[[range]], "r")
+      data[[range]] <- data.frame(stringsAsFactors = F,
+                                  sampleid,
+                                  matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
+      close(zz)
+      # Collect header parameters
+      for (param in 1:length(header.param.rexp)) {
+         data[[range]] <- cbind(data[[range]], 
+            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
+               headers.raw[[range]]) == 1)], "=")[[1]][2]))
+      }
+      names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+   }
+                                                                   
+   # Calculate the other of the pair counts <-> cps
+   if (counts.flag) {
+      for (range in 1:ranges.total) {
+         data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)
+      }
+   }
+   if (cps.flag) {
+      for (range in 1:ranges.total) {
+         data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)
+      }
+   }
+      
+   # Return a unified dataframe
+   data.df <- data[[1]]
+   for (range in 2:ranges.total) {
+      data.df <- rbind(data.df, data[[range]])
+   }
+   
+   return(data.df)
+}
+
+
+
+
+
+#### OLD VERSION - DEPRECATE
+##################################################
+################### muxd2df ######################
+##################################################
+muxd2df.old <- function(uxdfile, range.descriptor) {
    # Function that reads an UXD file which contains several ranges
    # (created in a programmed run, for example)
    # Arguments
diff --git a/XRD-TF/pdf2df.R b/XRD-TF/pdf2df.R
index ab5a15c..17369f9 100644
--- a/XRD-TF/pdf2df.R
+++ b/XRD-TF/pdf2df.R
@@ -5,8 +5,8 @@ pdf2df <- function(pdffile) {
    # Function for extracting information from ICDD PDF XML-files
    # For example the PDF files produced by the PDF database at Angstrom's X-ray lab
    # NOTE: sometimes intensity values are specified as less than some value.
-   #       In those cases, this function simply strips the less-than character.
-   #       (Perhaps not true, see the int.Tex column)
+   #       In those cases, the lt sign will be preserved in the column int.Tex.
+   #       The intensity column, on the other hand, is numeric and so strips off the lt sign.
    # ARGS: pdffile (complete path and filename to PDF file)
    # VALUE: dataframe with 9 columns:
    #        thth angles (numeric),
diff --git a/XRD-TF/scherrer.R b/XRD-TF/scherrer.R
index 3fb60dc..eabce34 100644
--- a/XRD-TF/scherrer.R
+++ b/XRD-TF/scherrer.R
@@ -1,3 +1,5 @@
+source("/home/taha/chepec/chetex/common/R/common/as.radians.R")
+
 ##################################################
 ################## scherrer ######################
 ##################################################
@@ -8,7 +10,7 @@ scherrer <- function(integralbreadth, thth, wavelength = 1.54056, shapeconstant
    #       wavelength      - X-ray wavelength used (default 1.54056 A, Cu Ka)
    #       shapeconstant   - Scherrer constant (default spherical, ~0.9)
    # VALUE: vector with size parameters
-   ## REQUIRES: as.radians(), source("/home/taha/chepec/chetex/common/R/common.R")
+   ## REQUIRES: as.radians()
    D <- (shapeconstant * wavelength) / (as.radians(integralbreadth) * cos(as.radians(thth)))
    # cos() - angles must be in radians, not degrees!
    return(D) #units of angstrom
diff --git a/common/ConvertRefPot.R b/common/ConvertRefPot.R
index 1143d63..9c6391c 100644
--- a/common/ConvertRefPot.R
+++ b/common/ConvertRefPot.R
@@ -1,6 +1,6 @@
-source(SHE2AVS.R)
-source(AVS2SHE.R)
-source(ConvertRefPotEC.R)
+source("/home/taha/chepec/chetex/common/R/common/SHE2AVS.R")
+source("/home/taha/chepec/chetex/common/R/common/AVS2SHE.R")
+source("/home/taha/chepec/chetex/common/R/common/ConvertRefPotEC.R")
 
 ##################################################
 ################# ConvertRefPot ##################
diff --git a/common/is.wholenumber.R b/common/is.wholenumber.R
new file mode 100644
index 0000000..ff8093a
--- /dev/null
+++ b/common/is.wholenumber.R
@@ -0,0 +1,5 @@
+# This function was copied from R's documentation (see ?is.integer).
+
+is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
+   abs(x - round(x)) < tol
+}
\ No newline at end of file