R-common/XRD-TF/muxd2df.R

source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")


##################################################
################### muxd2df ######################
##################################################
muxd2df <- function(uxdfile) {
   ## Description:
   ##   Reads UXD files with one or multiple ranges (converted using XCH v1.0)
   ##   Extracts both data (thth, intensity) and parameters
   ##   Also automatically calculates cps if counts are present, and vice versa
   ##   (note that this depends on specific strings in the UXD format).
   ## Usage:
   ##   muxd2df(uxdfile)
   ## Arguments:
   ##   uxdfile: text string with full path to UXD file, which may
   ##            containing single or multiple data ranges
   ## Value:
   ##   Dataframe with the following columns:
   ##   $ sampleid        : chr
   ##   $ thth            : num
   ##   $ counts (or cps) : num
   ##   $ steptime        : num
   ##   $ stepsize        : num
   ##   $ theta           : num
   ##   $ khi             : num
   ##   $ phi             : num
   ##   $ x               : num
   ##   $ y               : num
   ##   $ z               : num
   ##   $ divergence      : num
   ##   $ antiscatter     : num
   ##   $ cps (or counts) : num
   #
   range.header.start.rexp <- "^; \\(Data for Range" #regexp
   range.header.end.rexp <- "^_2THETA[^=]" #regexp
   
   # Read the input multirange file
   ufile <- file(uxdfile, "r")
   # Note that readLines apparently completely skips empty lines. 
   # In that case line numbers do not match between source and f.
   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
   close(ufile)
   
   # Fetch a sampleid for the current job
   sampleid <- ProvideSampleId(uxdfile)
   
   # Look for header start rows
   range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
   # Look for header end rows
   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
   
   # Calculate number of ranges
   ranges.total <- 
      ifelse(length(range.header.start.rows) == length(range.header.end.rows), 
             length(range.header.start.rows),
             NA) #why would they not be equal?
   if (is.na(ranges.total)) {
      # Obviously something bad happened.
      # Do something about it. echo an error message perhaps.
      # But why would they not be equal?
      
   }
         
   # Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file
   # Assuming it is the same for all ranges in this job (a safe assumption).
   if (f[range.header.end.rows][1] == "_2THETACOUNTS") {
      # we got counts
      counts.flag <- TRUE
      cps.flag <- FALSE
   }
   if (f[range.header.end.rows][1] == "_2THETACPS") {
      # we got counts per second
      counts.flag <-FALSE
      cps.flag <- TRUE
   }
   
   # Extract headers (as-is) and put them in a list (by range)
   headers.raw <- list()
   for (range in 1:ranges.total) {
      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
   }

   # Data always start on the row after header end
   range.data.start.rows <- range.header.end.rows + 1
   # Data end rows precedes header with one row, except for the first range
   # But only if data contained more than one range, obviously. Let's make the code check for that
   if (ranges.total > 1) {
      range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
   } else {
      # Data in fact only contains one range
      range.data.end.rows <- length(f)
   }
   
   ####
   
   # Extract data (as-is) and put it an list (by range)
   data.raw <- list()
   for (range in 1:ranges.total) {
      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
   }
   
   # Specify header parameters to include in dataframe
   header.param.rexp <- c(steptime = "^_STEPTIME=", 
                          stepsize = "^_STEPSIZE=", 
                          theta = "^_THETA=",
                          khi = "^_KHI=",
                          phi = "^_PHI=",
                          x = "^_X=",
                          y = "^_Y=",
                          z = "^_Z=",
                          divergence = "^_DIVERGENCE=",
                          antiscatter = "^_ANTISCATTER=")
   
   # Collect data and header parameters in dataframes, by range in a list
   data <- list()
   for (range in 1:ranges.total) {
      zz <- textConnection(data.raw[[range]], "r")
      data[[range]] <- data.frame(stringsAsFactors = F,
                                  sampleid,
                                  int2padstr(range, "0", 3),
                                  matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
      close(zz)
      # Collect header parameters
      for (param in 1:length(header.param.rexp)) {
         data[[range]] <- cbind(data[[range]], 
            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
               headers.raw[[range]]) == 1)], "=")[[1]][2]))
      }
      names(data[[range]]) <- 
         c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
   }
                                                                   
   # Calculate the other of the pair counts <-> cps
   if (counts.flag) {
      for (range in 1:ranges.total) {
         data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)
      }
   }
   if (cps.flag) {
      for (range in 1:ranges.total) {
         data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)
      }
   }
      
   # Return a unified dataframe
   data.df <- data[[1]]
   if (ranges.total > 1) {
      for (range in 2:ranges.total) {
         data.df <- rbind(data.df, data[[range]])
      }
   }
   
   return(data.df)
}
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")`
Ok, no commits since a while. Last change was remake of init.R to improve readability. 13 years ago			`source("/home/taha/chepec/chetex/common/R/common/int2padstr.R")`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago

Split all files into their separate functions, put each former file in a directory. The idea is to better expose each function, since some functions tended to drown in the larger files. Note that dependecies are are not resolved yet. Many functions are probably broken right now, especially those who point to common.R. 14 years ago			`##################################################`
			`################### muxd2df ######################`
			`##################################################`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`muxd2df <- function(uxdfile) {`
			`## Description:`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`## Reads UXD files with one or multiple ranges (converted using XCH v1.0)`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`## Extracts both data (thth, intensity) and parameters`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`## Also automatically calculates cps if counts are present, and vice versa`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`## (note that this depends on specific strings in the UXD format).`
			`## Usage:`
			`## muxd2df(uxdfile)`
			`## Arguments:`
Ok, no commits since a while. Last change was remake of init.R to improve readability. 13 years ago			`## uxdfile: text string with full path to UXD file, which may`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`## containing single or multiple data ranges`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`## Value:`
			`## Dataframe with the following columns:`
			`## $ sampleid : chr`
			`## $ thth : num`
			`## $ counts (or cps) : num`
			`## $ steptime : num`
			`## $ stepsize : num`
			`## $ theta : num`
			`## $ khi : num`
			`## $ phi : num`
			`## $ x : num`
			`## $ y : num`
			`## $ z : num`
			`## $ divergence : num`
			`## $ antiscatter : num`
			`## $ cps (or counts) : num`
			`#`
			`range.header.start.rexp <- "^; \\(Data for Range" #regexp`
			`range.header.end.rexp <- "^_2THETA[^=]" #regexp`

			`# Read the input multirange file`
			`ufile <- file(uxdfile, "r")`
			`# Note that readLines apparently completely skips empty lines.`
			`# In that case line numbers do not match between source and f.`
			`f <- readLines(ufile, n=-1) #read _all_ lines from UXD file`
			`close(ufile)`

			`# Fetch a sampleid for the current job`
			`sampleid <- ProvideSampleId(uxdfile)`

			`# Look for header start rows`
			`range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)`
			`# Look for header end rows`
			`range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)`

			`# Calculate number of ranges`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`ranges.total <-`
			`ifelse(length(range.header.start.rows) == length(range.header.end.rows),`
			`length(range.header.start.rows),`
			`NA) #why would they not be equal?`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`if (is.na(ranges.total)) {`
			`# Obviously something bad happened.`
			`# Do something about it. echo an error message perhaps.`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`# But why would they not be equal?`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago
			`}`

			`# Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file`
			`# Assuming it is the same for all ranges in this job (a safe assumption).`
			`if (f[range.header.end.rows][1] == "_2THETACOUNTS") {`
			`# we got counts`
			`counts.flag <- TRUE`
			`cps.flag <- FALSE`
			`}`
			`if (f[range.header.end.rows][1] == "_2THETACPS") {`
			`# we got counts per second`
			`counts.flag <-FALSE`
			`cps.flag <- TRUE`
			`}`

			`# Extract headers (as-is) and put them in a list (by range)`
			`headers.raw <- list()`
			`for (range in 1:ranges.total) {`
			`headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]`
			`}`

			`# Data always start on the row after header end`
			`range.data.start.rows <- range.header.end.rows + 1`
			`# Data end rows precedes header with one row, except for the first range`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`# But only if data contained more than one range, obviously. Let's make the code check for that`
			`if (ranges.total > 1) {`
			`range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))`
			`} else {`
			`# Data in fact only contains one range`
			`range.data.end.rows <- length(f)`
			`}`

			`####`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago
			`# Extract data (as-is) and put it an list (by range)`
			`data.raw <- list()`
			`for (range in 1:ranges.total) {`
			`data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]`
			`}`

			`# Specify header parameters to include in dataframe`
			`header.param.rexp <- c(steptime = "^_STEPTIME=",`
			`stepsize = "^_STEPSIZE=",`
			`theta = "^_THETA=",`
			`khi = "^_KHI=",`
			`phi = "^_PHI=",`
			`x = "^_X=",`
			`y = "^_Y=",`
			`z = "^_Z=",`
			`divergence = "^_DIVERGENCE=",`
			`antiscatter = "^_ANTISCATTER=")`

			`# Collect data and header parameters in dataframes, by range in a list`
			`data <- list()`
			`for (range in 1:ranges.total) {`
			`zz <- textConnection(data.raw[[range]], "r")`
			`data[[range]] <- data.frame(stringsAsFactors = F,`
			`sampleid,`
Ok, no commits since a while. Last change was remake of init.R to improve readability. 13 years ago			`int2padstr(range, "0", 3),`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))`
			`close(zz)`
			`# Collect header parameters`
			`for (param in 1:length(header.param.rexp)) {`
			`data[[range]] <- cbind(data[[range]],`
			`as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]),`
			`headers.raw[[range]]) == 1)], "=")[[1]][2]))`
			`}`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`names(data[[range]]) <-`
Ok, no commits since a while. Last change was remake of init.R to improve readability. 13 years ago			`c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`}`

			`# Calculate the other of the pair counts <-> cps`
			`if (counts.flag) {`
			`for (range in 1:ranges.total) {`
			`data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)`
			`}`
			`}`
			`if (cps.flag) {`
			`for (range in 1:ranges.total) {`
			`data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)`
			`}`
			`}`

			`# Return a unified dataframe`
			`data.df <- data[[1]]`
xrdpkWrapper.R now correctly handles situations where file already exists, override = TRUE, and run > 1. Previously only the latest run was actually saved, although the created list had the right length. Works correctly and as intended now, as far as I can tell. 13 years ago			`if (ranges.total > 1) {`
			`for (range in 2:ranges.total) {`
			`data.df <- rbind(data.df, data[[range]])`
			`}`
Re-wrote muxd2df() from scratch. To make it able to include parameters as well as data in the returned df. muxd2df() now works satisfactorily, outputs parameters such as steptime, theta, and cps as well as thth and counts. Other minor changes mostly updates of source file pointers. 14 years ago			`}`

			`return(data.df)`
			`}`