From f0ed360c0d1508ea40d72d069530221d0d061f2d Mon Sep 17 00:00:00 2001
From: Taha Ahmed <taha@mkem150>
Date: Wed, 7 Dec 2011 17:02:54 +0100
Subject: [PATCH] Ok, no commits since a while. Last change was remake of
 init.R to improve readability.

---
 AUTOLAB/amp2df.R            |   2 +-
 AUTOLAB/ocp2df.R            |   2 +-
 INCA/eds2df.R               |   3 +
 OOHR2000/OO2df.R            | 157 ++++++++++++++++++++++++++++++++++++
 XRD-TF/mraw2df.R            | 154 +++++++++++++++++++++++++++++++++++
 XRD-TF/muxd2df.R            |   6 +-
 common/AVS2SHE.R            |   2 +-
 common/Celsius2Kelvin.R     |   2 +-
 common/ConvertRefPot.R      |   2 +-
 common/ConvertRefPotEC.R    |   2 +-
 common/Kelvin2Celsius.R     |   2 +-
 common/LoadRData2Variable.R |   2 +-
 common/ProvideSampleId.R    |   2 +-
 common/SHE2AVS.R            |   2 +-
 common/as.degrees.R         |   2 +-
 common/as.radians.R         |   2 +-
 common/int2padstr.R         |   2 +-
 common/is.wholenumber.R     |   2 +-
 common/molarity2mass.R      |   2 +-
 common/roundup.R            |   2 +-
 init.R                      |  15 ++--
 21 files changed, 340 insertions(+), 27 deletions(-)
 create mode 100644 OOHR2000/OO2df.R
 create mode 100644 XRD-TF/mraw2df.R

diff --git a/AUTOLAB/amp2df.R b/AUTOLAB/amp2df.R
index 557ba97..819c61e 100644
--- a/AUTOLAB/amp2df.R
+++ b/AUTOLAB/amp2df.R
@@ -86,4 +86,4 @@ amp2df <- function(datafilename, wearea = 1) {
       chargedensity = chargedensity)
    #
    return(ff)
-}
\ No newline at end of file
+}
diff --git a/AUTOLAB/ocp2df.R b/AUTOLAB/ocp2df.R
index 38e6dae..b34e8c0 100644
--- a/AUTOLAB/ocp2df.R
+++ b/AUTOLAB/ocp2df.R
@@ -58,4 +58,4 @@ ocp2df <- function(datafilename) {
    names(ff) <- c("sampleid", "time", "potential")
    #
    return(ff)
-}
\ No newline at end of file
+}
diff --git a/INCA/eds2df.R b/INCA/eds2df.R
index 4c42ff3..85de629 100644
--- a/INCA/eds2df.R
+++ b/INCA/eds2df.R
@@ -1,3 +1,6 @@
+## This function should be renamed to EMSA2df(). The file it reads is a so-called EMSA spectral data file.
+# /TA, 111110
+
 eds2df <- function(edstxtfile) {
    ## Description:
    ##   Reads EDS textfile from INCA EDS.
diff --git a/OOHR2000/OO2df.R b/OOHR2000/OO2df.R
new file mode 100644
index 0000000..383689f
--- /dev/null
+++ b/OOHR2000/OO2df.R
@@ -0,0 +1,157 @@
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
+source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")
+
+
+##################################################
+#################### OO2df #######################
+##################################################
+OO2df <- function(datafile) {
+   ## Description:
+   ##   
+   ##   
+   ##   
+   ##   
+   ## Usage:
+   ##   OO2df(datafile)
+   ## Arguments:
+   ##   datafile: text string with full path to TXT file
+   ##             containing single or multiple data ranges
+   ## Value:
+   ##   Dataframe with the following columns:
+   ##   $ sampleid        : chr
+   ##   $ wavelength      : num
+   ##   $ counts          : num
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ 
+   ##   $ cps ?
+   #
+#    range.header.start.rexp <- "^; \\(Data for Range" #regexp
+#    range.header.end.rexp <- "^_2THETA[^=]" #regexp
+   
+   range.data.start.rexp <- ">+Begin[\\s\\w]*<+"
+   range.data.end.rexp <- ">+End[\\s\\w]*<+"
+   
+   # Read the input file
+   dfile <- file(datafile, "r")
+   # Note that readLines apparently completely skips empty lines. 
+   # That causes line numbers to not match between source and f vector.
+   f <- readLines(dfile, n=-1) # read _all_ lines from data file
+   close(dfile)
+   
+   # Fetch a sampleid for the current job
+   sampleid <- ProvideSampleId(datafile)
+   
+#    # Look for header start rows
+#    range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
+#    # Look for header end rows
+#    range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
+   
+   # Look for data start marker line
+   range.data.start.rows <- which(regexpr(range.data.start.rexp, f, perl = TRUE) == 1) + 1
+   # Look for data end marker line
+   range.data.end.rows <- which(regexpr(range.data.end.rexp, f, perl = TRUE) == 1) - 1
+   
+   # Calculate number of ranges
+   ranges.total <- 
+      ifelse(length(range.data.start.rows) == length(range.data.end.rows), 
+             length(range.data.start.rows),
+             NA) #why would they not be equal?
+   if (is.na(ranges.total)) {
+      # Obviously something bad happened.
+      # Do something about it. echo an error message perhaps.
+      # But why would they not be equal?
+      
+   }
+   
+   
+   
+   # Header always precedes start of data
+   range.header.end.rows <- range.data.start.rows - 2
+   if (ranges.total > 1) {
+      range.header.start.rows <- c(1, range.data.end.rows[2:length(range.data.end.rows)])
+   } else {
+      # Data in fact only contains one range
+      range.header.start.rows <- 1
+   }
+   
+   # Extract headers (as-is) and put them in a list (by range)
+   headers.raw <- list()
+   for (range in 1:ranges.total) {
+      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
+   }
+
+   ####
+   
+   # Extract data (as-is) and put it an list (by range)
+   data.raw <- list()
+   for (range in 1:ranges.total) {
+      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
+      # Replace commas by dots
+      data.raw[[range]] <- gsub(",", "\\.", data.raw[[range]])
+   }   
+   
+   
+   # Specify header parameters to include in dataframe
+   header.param.rexp <- 
+      c(DateTime           = "^Date:", 
+        IntegrationTime    = "^Integration Time \\(usec\\):", 
+        n_Averaged         = "^Spectra Averaged:",
+        Boxcar             = "^Boxcar Smoothing:",
+        CorrElectricDark   = "^Correct for Electrical Dark:",
+        StrobeLampEnabled  = "^Strobe/Lamp Enabled:",
+        CorrDetectorNonLin = "^Correct for Detector Non-linearity:",
+        CorrStrayLight     = "^Correct for Stray Light:",
+        n_Pixels           = "^Number of Pixels")
+   
+   # Collect data and header parameters in dataframes, by range in a list
+   data <- list()
+   for (range in 1:ranges.total) {
+      zz <- textConnection(data.raw[[range]], "r")
+      data[[range]] <- data.frame(stringsAsFactors = FALSE,
+                                  sampleid,
+                                  int2padstr(range, "0", 3),
+                                  matrix(scan(zz, what = character()), ncol = 2, byrow = T))
+      close(zz)
+      # Collect header parameters
+      for (param in 1:length(header.param.rexp)) {
+         data[[range]] <- 
+            cbind(stringsAsFactors = FALSE, 
+                  data[[range]], 
+                  # Matches any word, digit, plus, or minus character 
+                  # surrounded by parentheses at the end of the string
+                  sub("\\s\\([\\w\\d\\+\\-]+\\)$", "",
+                      strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
+                               headers.raw[[range]]) == 1)], ": ")[[1]][2], 
+                      perl = TRUE))
+      }
+      names(data[[range]]) <- 
+         c("sampleid", "range", "wavelength", "intensity", names(header.param.rexp))
+   }
+                                                                   
+   # Create a unified dataframe
+   data.df <- data[[1]]
+   if (ranges.total > 1) {
+      for (range in 2:ranges.total) {
+         data.df <- rbind(data.df, data[[range]])
+      }
+   }
+
+   # Convert the DateTime column to more legibly (and compact) format
+   data.df$DateTime <- 
+      format(as.POSIXct(gsub("\\s[A-Z]{4}\\s", " ", data.df$Date), 
+                        format = "%a %b %d %H:%M:%S %Y"), 
+             format = "%Y-%m-%d %H:%M:%S")
+   # Convert wavelength and intensity to numeric format
+   mode(data.df$wavelength) <- "numeric"
+   mode(data.df$intensity) <- "numeric"
+   
+   return(data.df)
+}
\ No newline at end of file
diff --git a/XRD-TF/mraw2df.R b/XRD-TF/mraw2df.R
new file mode 100644
index 0000000..fd7631f
--- /dev/null
+++ b/XRD-TF/mraw2df.R
@@ -0,0 +1,154 @@
+source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
+source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")
+
+
+##################################################
+#!!!!!!!!!!!!!!!!!# mraw2df #!!!!!!!!!!!!!!!!!!!!#
+##################################################
+mraw2df <- function(txtfile) {
+   ## Description:
+   ##   Reads TXT files with one or multiple ranges (XRD commander "save as txt")
+   ##   Extracts both data (thth, intensity) and parameters
+   ## Usage:
+   ##   mraw2df(txtfile)
+   ## Arguments:
+   ##   txtfile: text string with full path to TXT file, which may
+   ##            containing single or multiple data ranges
+   ## Value:
+   ##   Dataframe with the following columns:
+   ##   $ sampleid        : chr
+   ##   $ thth            : num
+   ##   $ counts (or cps) : num
+   ##   $ steptime        : num
+   ##   $ stepsize        : num
+   ##   $ theta           : num
+   ##   $ khi             : num
+   ##   $ phi             : num
+   ##   $ x               : num
+   ##   $ y               : num
+   ##   $ z               : num
+   ##   $ divergence      : num
+   ##   $ antiscatter     : num
+   ##   $ cps (or counts) : num
+   #
+   range.header.start.rexp <- "^; \\(Data for Range" #regexp
+   range.header.end.rexp <- "^_2THETA[^=]" #regexp
+   
+   # Read the input multirange file
+   ufile <- file(uxdfile, "r")
+   # Note that readLines apparently completely skips empty lines. 
+   # In that case line numbers do not match between source and f.
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   # Fetch a sampleid for the current job
+   sampleid <- ProvideSampleId(uxdfile)
+   
+   # Look for header start rows
+   range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
+   # Look for header end rows
+   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
+   
+   # Calculate number of ranges
+   ranges.total <- 
+      ifelse(length(range.header.start.rows) == length(range.header.end.rows), 
+             length(range.header.start.rows),
+             NA) #why would they not be equal?
+   if (is.na(ranges.total)) {
+      # Obviously something bad happened.
+      # Do something about it. echo an error message perhaps.
+      # But why would they not be equal?
+      
+   }
+         
+   # Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file
+   # Assuming it is the same for all ranges in this job (a safe assumption).
+   if (f[range.header.end.rows][1] == "_2THETACOUNTS") {
+      # we got counts
+      counts.flag <- TRUE
+      cps.flag <- FALSE
+   }
+   if (f[range.header.end.rows][1] == "_2THETACPS") {
+      # we got counts per second
+      counts.flag <-FALSE
+      cps.flag <- TRUE
+   }
+   
+   # Extract headers (as-is) and put them in a list (by range)
+   headers.raw <- list()
+   for (range in 1:ranges.total) {
+      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
+   }
+
+   # Data always start on the row after header end
+   range.data.start.rows <- range.header.end.rows + 1
+   # Data end rows precedes header with one row, except for the first range
+   # But only if data contained more than one range, obviously. Let's make the code check for that
+   if (ranges.total > 1) {
+      range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
+   } else {
+      # Data in fact only contains one range
+      range.data.end.rows <- length(f)
+   }
+   
+   ####
+   
+   # Extract data (as-is) and put it an list (by range)
+   data.raw <- list()
+   for (range in 1:ranges.total) {
+      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
+   }
+   
+   # Specify header parameters to include in dataframe
+   header.param.rexp <- c(steptime = "^_STEPTIME=", 
+                          stepsize = "^_STEPSIZE=", 
+                          theta = "^_THETA=",
+                          khi = "^_KHI=",
+                          phi = "^_PHI=",
+                          x = "^_X=",
+                          y = "^_Y=",
+                          z = "^_Z=",
+                          divergence = "^_DIVERGENCE=",
+                          antiscatter = "^_ANTISCATTER=")
+   
+   # Collect data and header parameters in dataframes, by range in a list
+   data <- list()
+   for (range in 1:ranges.total) {
+      zz <- textConnection(data.raw[[range]], "r")
+      data[[range]] <- data.frame(stringsAsFactors = F,
+                                  sampleid,
+                                  int2padstr(range, "0", 3),
+                                  matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
+      close(zz)
+      # Collect header parameters
+      for (param in 1:length(header.param.rexp)) {
+         data[[range]] <- cbind(data[[range]], 
+            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
+               headers.raw[[range]]) == 1)], "=")[[1]][2]))
+      }
+      names(data[[range]]) <- 
+         c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+   }
+                                                                   
+   # Calculate the other of the pair counts <-> cps
+   if (counts.flag) {
+      for (range in 1:ranges.total) {
+         data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)
+      }
+   }
+   if (cps.flag) {
+      for (range in 1:ranges.total) {
+         data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)
+      }
+   }
+      
+   # Return a unified dataframe
+   data.df <- data[[1]]
+   if (ranges.total > 1) {
+      for (range in 2:ranges.total) {
+         data.df <- rbind(data.df, data[[range]])
+      }
+   }
+   
+   return(data.df)
+}
diff --git a/XRD-TF/muxd2df.R b/XRD-TF/muxd2df.R
index 9015530..896f03c 100644
--- a/XRD-TF/muxd2df.R
+++ b/XRD-TF/muxd2df.R
@@ -1,4 +1,5 @@
 source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
+source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")
 
 
 ##################################################
@@ -13,7 +14,7 @@ muxd2df <- function(uxdfile) {
    ## Usage:
    ##   muxd2df(uxdfile)
    ## Arguments:
-   ##   uxdfile: text string with full path to UXD file
+   ##   uxdfile: text string with full path to UXD file, which may
    ##            containing single or multiple data ranges
    ## Value:
    ##   Dataframe with the following columns:
@@ -118,6 +119,7 @@ muxd2df <- function(uxdfile) {
       zz <- textConnection(data.raw[[range]], "r")
       data[[range]] <- data.frame(stringsAsFactors = F,
                                   sampleid,
+                                  int2padstr(range, "0", 3),
                                   matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
       close(zz)
       # Collect header parameters
@@ -127,7 +129,7 @@ muxd2df <- function(uxdfile) {
                headers.raw[[range]]) == 1)], "=")[[1]][2]))
       }
       names(data[[range]]) <- 
-         c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+         c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
    }
                                                                    
    # Calculate the other of the pair counts <-> cps
diff --git a/common/AVS2SHE.R b/common/AVS2SHE.R
index 6fc2d06..f23d849 100644
--- a/common/AVS2SHE.R
+++ b/common/AVS2SHE.R
@@ -5,4 +5,4 @@ AVS2SHE <- function(avs) {
    # Converts from absolute vacuum scale (AVS) to SHE scale
    she <- -(4.5 + avs)
    return(she)
-}
\ No newline at end of file
+}
diff --git a/common/Celsius2Kelvin.R b/common/Celsius2Kelvin.R
index 4c06661..2ab5a4c 100644
--- a/common/Celsius2Kelvin.R
+++ b/common/Celsius2Kelvin.R
@@ -11,4 +11,4 @@ Celsius2Kelvin <- function(Celsius) {
    }
    Kelvin <- Celsius + 273.15
    return(Kelvin)
-}
\ No newline at end of file
+}
diff --git a/common/ConvertRefPot.R b/common/ConvertRefPot.R
index 9c6391c..f8f1427 100644
--- a/common/ConvertRefPot.R
+++ b/common/ConvertRefPot.R
@@ -71,4 +71,4 @@ ConvertRefPot <- function(argpotential, argrefscale, valuerefscale) {
          scale.names[[decision.vector["valueref"]]][1])
    }
    return(rnpotential)
-}
\ No newline at end of file
+}
diff --git a/common/ConvertRefPotEC.R b/common/ConvertRefPotEC.R
index 4b4ec54..7b28d43 100644
--- a/common/ConvertRefPotEC.R
+++ b/common/ConvertRefPotEC.R
@@ -45,4 +45,4 @@ ConvertRefPotEC <- function(argpotential, argrefscale, valuerefscale) {
       valuepotential <- NA
    }
    return(valuepotential)
-}
\ No newline at end of file
+}
diff --git a/common/Kelvin2Celsius.R b/common/Kelvin2Celsius.R
index 2c688b4..ddb1dee 100644
--- a/common/Kelvin2Celsius.R
+++ b/common/Kelvin2Celsius.R
@@ -11,4 +11,4 @@ Kelvin2Celsius <- function(Kelvin) {
    }
    Celsius <- Kelvin - 273.15
    return(Celsius)
-}
\ No newline at end of file
+}
diff --git a/common/LoadRData2Variable.R b/common/LoadRData2Variable.R
index ee2795e..3e8cd9f 100644
--- a/common/LoadRData2Variable.R
+++ b/common/LoadRData2Variable.R
@@ -3,4 +3,4 @@
 # NOT TESTED for when the R-data file contains many variables
 LoadRData2Variable <- function(FullPathToRData) {
    return(eval(parse(text = load(FullPathToRData))))
-}
\ No newline at end of file
+}
diff --git a/common/ProvideSampleId.R b/common/ProvideSampleId.R
index 6f1449e..24a9840 100644
--- a/common/ProvideSampleId.R
+++ b/common/ProvideSampleId.R
@@ -31,4 +31,4 @@ ProvideSampleId <- function (pathexpfile) {
    ####
    
    return(sampleid)
-}
\ No newline at end of file
+}
diff --git a/common/SHE2AVS.R b/common/SHE2AVS.R
index 31c7e48..f343619 100644
--- a/common/SHE2AVS.R
+++ b/common/SHE2AVS.R
@@ -5,4 +5,4 @@ SHE2AVS <- function(she) {
    # Converts from SHE scale to absolute vacuum (AVS) scale
    avs <- -(4.5 + she)
    return(avs)
-}
\ No newline at end of file
+}
diff --git a/common/as.degrees.R b/common/as.degrees.R
index 3a80188..fad9362 100644
--- a/common/as.degrees.R
+++ b/common/as.degrees.R
@@ -5,4 +5,4 @@ as.degrees <- function(radians) {
    # Converts from radians to degrees
    degrees <- radians * (180 / pi)
    return(degrees)
-}
\ No newline at end of file
+}
diff --git a/common/as.radians.R b/common/as.radians.R
index 7f643cc..a02fcae 100644
--- a/common/as.radians.R
+++ b/common/as.radians.R
@@ -5,4 +5,4 @@ as.radians <- function(degrees) {
    # Converts from degrees to radians
    radians <- degrees * (pi / 180)
    return(radians)
-}
\ No newline at end of file
+}
diff --git a/common/int2padstr.R b/common/int2padstr.R
index aa9aba4..2a1c1c5 100644
--- a/common/int2padstr.R
+++ b/common/int2padstr.R
@@ -18,4 +18,4 @@ int2padstr <- function (ii, pchr, w) {
    ## Value:
    ##   A character string or a vector of character strings
    gsub(" ", pchr, formatC(ii, format="s", mode="character", width = w))
-}
\ No newline at end of file
+}
diff --git a/common/is.wholenumber.R b/common/is.wholenumber.R
index ff8093a..d742618 100644
--- a/common/is.wholenumber.R
+++ b/common/is.wholenumber.R
@@ -2,4 +2,4 @@
 
 is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
    abs(x - round(x)) < tol
-}
\ No newline at end of file
+}
diff --git a/common/molarity2mass.R b/common/molarity2mass.R
index 5eb8d49..4277849 100644
--- a/common/molarity2mass.R
+++ b/common/molarity2mass.R
@@ -13,4 +13,4 @@ molarity2mass <- function(formulamass, volume, molarity) {
    # Unit check:
    # [g * mol-1] * [liter] * [mole * liter-1] = [g]
    return(mass)
-}
\ No newline at end of file
+}
diff --git a/common/roundup.R b/common/roundup.R
index 21c02ca..8fa1b50 100644
--- a/common/roundup.R
+++ b/common/roundup.R
@@ -3,4 +3,4 @@
 
 roundup <- function(x, nearest=1000) {
    ceiling(max(x+10^-9)/nearest + 1/nearest)*nearest
-}
\ No newline at end of file
+}
diff --git a/init.R b/init.R
index 1024552..967c5c5 100644
--- a/init.R
+++ b/init.R
@@ -1,13 +1,10 @@
 # To source a bunch of files in the same directory
 
 sourceDir <- function(path, trace = TRUE) {
-   for (nm in list.files(path, pattern = "\\.[Rr]$")) {
-      if(trace) {
-         cat(nm,":")
-      }
-      source(file.path(path, nm))
-      if(trace) {
-         cat("\n")
-      }
+   lsDir <- list.files(path, pattern = "\\.[Rr]$")
+   for (i in lsDir) {
+      if(trace) {cat(i, ":")}
+      source(file.path(path, i))
+      if(trace) {cat("\n")}
    }
-}
\ No newline at end of file
+}