Re-wrote muxd2df() from scratch.

To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers.
15 years ago · b55af5d478
parent b976bd3fa4
commit b55af5d478
6 changed files with 159 additions and 8 deletions
--- a/AUTOLAB/amp2df.R
+++ b/AUTOLAB/amp2df.R
@ -1,4 +1,4 @@
-source("/home/taha/chepec/chetex/common/R/common.R")
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")

 ##################################################
 #################### amp2df ######################
--- a/XRD-TF/muxd2df.R
+++ b/XRD-TF/muxd2df.R
@ -1,7 +1,151 @@
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
+
+
 ##################################################
 ################### muxd2df ######################
 ##################################################
-muxd2df <- function(uxdfile, range.descriptor) {
+muxd2df <- function(uxdfile) {
+   ## Description:
+   ##   Reads UXD files with multiple ranges (converted using XCH v1.0)
+   ##   Extracts both data (thth, intensity) and parameters
+   ##   Also automatically calculates cps is counts are present, and vice versa
+   ##   (note that this depends on specific strings in the UXD format).
+   ## Usage:
+   ##   muxd2df(uxdfile)
+   ## Arguments:
+   ##   uxdfile: text string with full path to UXD file
+   ## Value:
+   ##   Dataframe with the following columns:
+   ##   $ sampleid        : chr
+   ##   $ thth            : num
+   ##   $ counts (or cps) : num
+   ##   $ steptime        : num
+   ##   $ stepsize        : num
+   ##   $ theta           : num
+   ##   $ khi             : num
+   ##   $ phi             : num
+   ##   $ x               : num
+   ##   $ y               : num
+   ##   $ z               : num
+   ##   $ divergence      : num
+   ##   $ antiscatter     : num
+   ##   $ cps (or counts) : num
+   #
+   range.header.start.rexp <- "^; \\(Data for Range" #regexp
+   range.header.end.rexp <- "^_2THETA[^=]" #regexp
+   
+   # Read the input multirange file
+   ufile <- file(uxdfile, "r")
+   # Note that readLines apparently completely skips empty lines. 
+   # In that case line numbers do not match between source and f.
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   # Fetch a sampleid for the current job
+   sampleid <- ProvideSampleId(uxdfile)
+   
+   # Look for header start rows
+   range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
+   # Look for header end rows
+   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
+   
+   # Calculate number of ranges
+   ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
+   if (is.na(ranges.total)) {
+      # Obviously something bad happened.
+      # Do something about it. echo an error message perhaps.
+      
+   }
+         
+   # Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file
+   # Assuming it is the same for all ranges in this job (a safe assumption).
+   if (f[range.header.end.rows][1] == "_2THETACOUNTS") {
+      # we got counts
+      counts.flag <- TRUE
+      cps.flag <- FALSE
+   }
+   if (f[range.header.end.rows][1] == "_2THETACPS") {
+      # we got counts per second
+      counts.flag <-FALSE
+      cps.flag <- TRUE
+   }
+   
+   # Extract headers (as-is) and put them in a list (by range)
+   headers.raw <- list()
+   for (range in 1:ranges.total) {
+      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
+   }
+
+   # Data always start on the row after header end
+   range.data.start.rows <- range.header.end.rows + 1
+   # Data end rows precedes header with one row, except for the first range
+   range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   
+   # Extract data (as-is) and put it an list (by range)
+   data.raw <- list()
+   for (range in 1:ranges.total) {
+      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
+   }
+   
+   # Specify header parameters to include in dataframe
+   header.param.rexp <- c(steptime = "^_STEPTIME=", 
+                          stepsize = "^_STEPSIZE=", 
+                          theta = "^_THETA=",
+                          khi = "^_KHI=",
+                          phi = "^_PHI=",
+                          x = "^_X=",
+                          y = "^_Y=",
+                          z = "^_Z=",
+                          divergence = "^_DIVERGENCE=",
+                          antiscatter = "^_ANTISCATTER=")
+   
+   # Collect data and header parameters in dataframes, by range in a list
+   data <- list()
+   for (range in 1:ranges.total) {
+      zz <- textConnection(data.raw[[range]], "r")
+      data[[range]] <- data.frame(stringsAsFactors = F,
+                                  sampleid,
+                                  matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
+      close(zz)
+      # Collect header parameters
+      for (param in 1:length(header.param.rexp)) {
+         data[[range]] <- cbind(data[[range]], 
+            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
+               headers.raw[[range]]) == 1)], "=")[[1]][2]))
+      }
+      names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+   }
+                                                                   
+   # Calculate the other of the pair counts <-> cps
+   if (counts.flag) {
+      for (range in 1:ranges.total) {
+         data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)
+      }
+   }
+   if (cps.flag) {
+      for (range in 1:ranges.total) {
+         data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)
+      }
+   }
+      
+   # Return a unified dataframe
+   data.df <- data[[1]]
+   for (range in 2:ranges.total) {
+      data.df <- rbind(data.df, data[[range]])
+   }
+   
+   return(data.df)
+}
+
+
+
+
+
+#### OLD VERSION - DEPRECATE
+##################################################
+################### muxd2df ######################
+##################################################
+muxd2df.old <- function(uxdfile, range.descriptor) {
   # Function that reads an UXD file which contains several ranges
   # (created in a programmed run, for example)
   # Arguments
--- a/XRD-TF/pdf2df.R
+++ b/XRD-TF/pdf2df.R
@ -5,8 +5,8 @@ pdf2df <- function(pdffile) {
   # Function for extracting information from ICDD PDF XML-files
   # For example the PDF files produced by the PDF database at Angstrom's X-ray lab
   # NOTE: sometimes intensity values are specified as less than some value.
-   #       In those cases, this function simply strips the less-than character.
-   #       (Perhaps not true, see the int.Tex column)
+   #       In those cases, the lt sign will be preserved in the column int.Tex.
+   #       The intensity column, on the other hand, is numeric and so strips off the lt sign.
   # ARGS: pdffile (complete path and filename to PDF file)
   # VALUE: dataframe with 9 columns:
   #        thth angles (numeric),
--- a/XRD-TF/scherrer.R
+++ b/XRD-TF/scherrer.R
@ -1,3 +1,5 @@
+source("/home/taha/chepec/chetex/common/R/common/as.radians.R")
+
 ##################################################
 ################## scherrer ######################
 ##################################################
@ -8,7 +10,7 @@ scherrer <- function(integralbreadth, thth, wavelength = 1.54056, shapeconstant
   #       wavelength      - X-ray wavelength used (default 1.54056 A, Cu Ka)
   #       shapeconstant   - Scherrer constant (default spherical, ~0.9)
   # VALUE: vector with size parameters
-   ## REQUIRES: as.radians(), source("/home/taha/chepec/chetex/common/R/common.R")
+   ## REQUIRES: as.radians()
   D <- (shapeconstant * wavelength) / (as.radians(integralbreadth) * cos(as.radians(thth)))
   # cos() - angles must be in radians, not degrees!
   return(D) #units of angstrom
--- a/common/ConvertRefPot.R
+++ b/common/ConvertRefPot.R
@ -1,6 +1,6 @@
-source(SHE2AVS.R)
-source(AVS2SHE.R)
-source(ConvertRefPotEC.R)
+source("/home/taha/chepec/chetex/common/R/common/SHE2AVS.R")
+source("/home/taha/chepec/chetex/common/R/common/AVS2SHE.R")
+source("/home/taha/chepec/chetex/common/R/common/ConvertRefPotEC.R")

 ##################################################
 ################# ConvertRefPot ##################
--- a/common/is.wholenumber.R
+++ b/common/is.wholenumber.R
@ -0,0 +1,5 @@
+# This function was copied from R's documentation (see ?is.integer).
+
+is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
+   abs(x - round(x)) < tol
+}