Ok, no commits since a while. Last change was remake of init.R to improve readability.

15 years ago · f0ed360c0d
parent 70c39b9145
commit f0ed360c0d
21 changed files with 340 additions and 27 deletions
--- a/INCA/eds2df.R
+++ b/INCA/eds2df.R
@ -1,3 +1,6 @@
 ## This function should be renamed to EMSA2df(). The file it reads is a so-called EMSA spectral data file.
 # /TA, 111110
 eds2df <- function(edstxtfile) {
   ## Description:
   ##   Reads EDS textfile from INCA EDS.
--- a/OOHR2000/OO2df.R
+++ b/OOHR2000/OO2df.R
@ -0,0 +1,157 @@
 source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")
 ##################################################
 #################### OO2df #######################
 ##################################################
 OO2df <- function(datafile) {
   ## Description:
   ##   
   ##   
   ##   
   ##   
   ## Usage:
   ##   OO2df(datafile)
   ## Arguments:
   ##   datafile: text string with full path to TXT file
   ##             containing single or multiple data ranges
   ## Value:
   ##   Dataframe with the following columns:
   ##   $ sampleid        : chr
   ##   $ wavelength      : num
   ##   $ counts          : num
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ 
   ##   $ cps ?
   #
 #    range.header.start.rexp <- "^; \\(Data for Range" #regexp
 #    range.header.end.rexp <- "^_2THETA[^=]" #regexp
   range.data.start.rexp <- ">+Begin[\\s\\w]*<+"
   range.data.end.rexp <- ">+End[\\s\\w]*<+"
   # Read the input file
   dfile <- file(datafile, "r")
   # Note that readLines apparently completely skips empty lines. 
   # That causes line numbers to not match between source and f vector.
   f <- readLines(dfile, n=-1) # read _all_ lines from data file
   close(dfile)
   # Fetch a sampleid for the current job
   sampleid <- ProvideSampleId(datafile)
 #    # Look for header start rows
 #    range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
 #    # Look for header end rows
 #    range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
   # Look for data start marker line
   range.data.start.rows <- which(regexpr(range.data.start.rexp, f, perl = TRUE) == 1) + 1
   # Look for data end marker line
   range.data.end.rows <- which(regexpr(range.data.end.rexp, f, perl = TRUE) == 1) - 1
   # Calculate number of ranges
   ranges.total <- 
      ifelse(length(range.data.start.rows) == length(range.data.end.rows), 
             length(range.data.start.rows),
             NA) #why would they not be equal?
   if (is.na(ranges.total)) {
      # Obviously something bad happened.
      # Do something about it. echo an error message perhaps.
      # But why would they not be equal?
   }
   # Header always precedes start of data
   range.header.end.rows <- range.data.start.rows - 2
   if (ranges.total > 1) {
      range.header.start.rows <- c(1, range.data.end.rows[2:length(range.data.end.rows)])
   } else {
      # Data in fact only contains one range
      range.header.start.rows <- 1
   }
   # Extract headers (as-is) and put them in a list (by range)
   headers.raw <- list()
   for (range in 1:ranges.total) {
      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
   }
   ####
   # Extract data (as-is) and put it an list (by range)
   data.raw <- list()
   for (range in 1:ranges.total) {
      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
      # Replace commas by dots
      data.raw[[range]] <- gsub(",", "\\.", data.raw[[range]])
   }   
   # Specify header parameters to include in dataframe
   header.param.rexp <- 
      c(DateTime           = "^Date:", 
        IntegrationTime    = "^Integration Time \\(usec\\):", 
        n_Averaged         = "^Spectra Averaged:",
        Boxcar             = "^Boxcar Smoothing:",
        CorrElectricDark   = "^Correct for Electrical Dark:",
        StrobeLampEnabled  = "^Strobe/Lamp Enabled:",
        CorrDetectorNonLin = "^Correct for Detector Non-linearity:",
        CorrStrayLight     = "^Correct for Stray Light:",
        n_Pixels           = "^Number of Pixels")
   # Collect data and header parameters in dataframes, by range in a list
   data <- list()
   for (range in 1:ranges.total) {
      zz <- textConnection(data.raw[[range]], "r")
      data[[range]] <- data.frame(stringsAsFactors = FALSE,
                                  sampleid,
                                  int2padstr(range, "0", 3),
                                  matrix(scan(zz, what = character()), ncol = 2, byrow = T))
      close(zz)
      # Collect header parameters
      for (param in 1:length(header.param.rexp)) {
         data[[range]] <- 
            cbind(stringsAsFactors = FALSE, 
                  data[[range]], 
                  # Matches any word, digit, plus, or minus character 
                  # surrounded by parentheses at the end of the string
                  sub("\\s\\([\\w\\d\\+\\-]+\\)$", "",
                      strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
                               headers.raw[[range]]) == 1)], ": ")[[1]][2], 
                      perl = TRUE))
      }
      names(data[[range]]) <- 
         c("sampleid", "range", "wavelength", "intensity", names(header.param.rexp))
   }
   # Create a unified dataframe
   data.df <- data[[1]]
   if (ranges.total > 1) {
      for (range in 2:ranges.total) {
         data.df <- rbind(data.df, data[[range]])
      }
   }
   # Convert the DateTime column to more legibly (and compact) format
   data.df$DateTime <- 
      format(as.POSIXct(gsub("\\s[A-Z]{4}\\s", " ", data.df$Date), 
                        format = "%a %b %d %H:%M:%S %Y"), 
             format = "%Y-%m-%d %H:%M:%S")
   # Convert wavelength and intensity to numeric format
   mode(data.df$wavelength) <- "numeric"
   mode(data.df$intensity) <- "numeric"
   return(data.df)
 }
--- a/XRD-TF/mraw2df.R
+++ b/XRD-TF/mraw2df.R
@ -0,0 +1,154 @@
 source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")
 ##################################################
 #!!!!!!!!!!!!!!!!!# mraw2df #!!!!!!!!!!!!!!!!!!!!#
 ##################################################
 mraw2df <- function(txtfile) {
   ## Description:
   ##   Reads TXT files with one or multiple ranges (XRD commander "save as txt")
   ##   Extracts both data (thth, intensity) and parameters
   ## Usage:
   ##   mraw2df(txtfile)
   ## Arguments:
   ##   txtfile: text string with full path to TXT file, which may
   ##            containing single or multiple data ranges
   ## Value:
   ##   Dataframe with the following columns:
   ##   $ sampleid        : chr
   ##   $ thth            : num
   ##   $ counts (or cps) : num
   ##   $ steptime        : num
   ##   $ stepsize        : num
   ##   $ theta           : num
   ##   $ khi             : num
   ##   $ phi             : num
   ##   $ x               : num
   ##   $ y               : num
   ##   $ z               : num
   ##   $ divergence      : num
   ##   $ antiscatter     : num
   ##   $ cps (or counts) : num
   #
   range.header.start.rexp <- "^; \\(Data for Range" #regexp
   range.header.end.rexp <- "^_2THETA[^=]" #regexp
   # Read the input multirange file
   ufile <- file(uxdfile, "r")
   # Note that readLines apparently completely skips empty lines. 
   # In that case line numbers do not match between source and f.
   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
   close(ufile)
   # Fetch a sampleid for the current job
   sampleid <- ProvideSampleId(uxdfile)
   # Look for header start rows
   range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
   # Look for header end rows
   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
   # Calculate number of ranges
   ranges.total <- 
      ifelse(length(range.header.start.rows) == length(range.header.end.rows), 
             length(range.header.start.rows),
             NA) #why would they not be equal?
   if (is.na(ranges.total)) {
      # Obviously something bad happened.
      # Do something about it. echo an error message perhaps.
      # But why would they not be equal?
   }
   # Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file
   # Assuming it is the same for all ranges in this job (a safe assumption).
   if (f[range.header.end.rows][1] == "_2THETACOUNTS") {
      # we got counts
      counts.flag <- TRUE
      cps.flag <- FALSE
   }
   if (f[range.header.end.rows][1] == "_2THETACPS") {
      # we got counts per second
      counts.flag <-FALSE
      cps.flag <- TRUE
   }
   # Extract headers (as-is) and put them in a list (by range)
   headers.raw <- list()
   for (range in 1:ranges.total) {
      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
   }
   # Data always start on the row after header end
   range.data.start.rows <- range.header.end.rows + 1
   # Data end rows precedes header with one row, except for the first range
   # But only if data contained more than one range, obviously. Let's make the code check for that
   if (ranges.total > 1) {
      range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
   } else {
      # Data in fact only contains one range
      range.data.end.rows <- length(f)
   }
   ####
   # Extract data (as-is) and put it an list (by range)
   data.raw <- list()
   for (range in 1:ranges.total) {
      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
   }
   # Specify header parameters to include in dataframe
   header.param.rexp <- c(steptime = "^_STEPTIME=", 
                          stepsize = "^_STEPSIZE=", 
                          theta = "^_THETA=",
                          khi = "^_KHI=",
                          phi = "^_PHI=",
                          x = "^_X=",
                          y = "^_Y=",
                          z = "^_Z=",
                          divergence = "^_DIVERGENCE=",
                          antiscatter = "^_ANTISCATTER=")
   # Collect data and header parameters in dataframes, by range in a list
   data <- list()
   for (range in 1:ranges.total) {
      zz <- textConnection(data.raw[[range]], "r")
      data[[range]] <- data.frame(stringsAsFactors = F,
                                  sampleid,
                                  int2padstr(range, "0", 3),
                                  matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
      close(zz)
      # Collect header parameters
      for (param in 1:length(header.param.rexp)) {
         data[[range]] <- cbind(data[[range]], 
            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
               headers.raw[[range]]) == 1)], "=")[[1]][2]))
      }
      names(data[[range]]) <- 
         c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
   }
   # Calculate the other of the pair counts <-> cps
   if (counts.flag) {
      for (range in 1:ranges.total) {
         data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)
      }
   }
   if (cps.flag) {
      for (range in 1:ranges.total) {
         data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)
      }
   }
   # Return a unified dataframe
   data.df <- data[[1]]
   if (ranges.total > 1) {
      for (range in 2:ranges.total) {
         data.df <- rbind(data.df, data[[range]])
      }
   }
   return(data.df)
 }
--- a/XRD-TF/muxd2df.R
+++ b/XRD-TF/muxd2df.R
@ -1,4 +1,5 @@
 source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")
 ##################################################
@ -13,7 +14,7 @@ muxd2df <- function(uxdfile) {
   ## Usage:
   ##   muxd2df(uxdfile)
   ## Arguments:
-   ##   uxdfile: text string with full path to UXD file
+   ##   uxdfile: text string with full path to UXD file, which may
   ##            containing single or multiple data ranges
   ## Value:
   ##   Dataframe with the following columns:
@ -118,6 +119,7 @@ muxd2df <- function(uxdfile) {
      zz <- textConnection(data.raw[[range]], "r")
      data[[range]] <- data.frame(stringsAsFactors = F,
                                  sampleid,
                                  int2padstr(range, "0", 3),
                                  matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
      close(zz)
      # Collect header parameters
@ -127,7 +129,7 @@ muxd2df <- function(uxdfile) {
               headers.raw[[range]]) == 1)], "=")[[1]][2]))
      }
      names(data[[range]]) <- 
-         c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
+         c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
   }
   # Calculate the other of the pair counts <-> cps
--- a/init.R
+++ b/init.R
@ -1,13 +1,10 @@
 # To source a bunch of files in the same directory
 sourceDir <- function(path, trace = TRUE) {
-   for (nm in list.files(path, pattern = "\\.[Rr]$")) {
+   lsDir <- list.files(path, pattern = "\\.[Rr]$")
-      if(trace) {
+   for (i in lsDir) {
-         cat(nm,":")
+      if(trace) {cat(i, ":")}
-      }
+      source(file.path(path, i))
-      source(file.path(path, nm))
+      if(trace) {cat("\n")}
      if(trace) {
         cat("\n")
      }
   }
 }