From f0ed360c0d1508ea40d72d069530221d0d061f2d Mon Sep 17 00:00:00 2001 From: Taha Ahmed Date: Wed, 7 Dec 2011 17:02:54 +0100 Subject: [PATCH] Ok, no commits since a while. Last change was remake of init.R to improve readability. --- AUTOLAB/amp2df.R | 2 +- AUTOLAB/ocp2df.R | 2 +- INCA/eds2df.R | 3 + OOHR2000/OO2df.R | 157 ++++++++++++++++++++++++++++++++++++ XRD-TF/mraw2df.R | 154 +++++++++++++++++++++++++++++++++++ XRD-TF/muxd2df.R | 6 +- common/AVS2SHE.R | 2 +- common/Celsius2Kelvin.R | 2 +- common/ConvertRefPot.R | 2 +- common/ConvertRefPotEC.R | 2 +- common/Kelvin2Celsius.R | 2 +- common/LoadRData2Variable.R | 2 +- common/ProvideSampleId.R | 2 +- common/SHE2AVS.R | 2 +- common/as.degrees.R | 2 +- common/as.radians.R | 2 +- common/int2padstr.R | 2 +- common/is.wholenumber.R | 2 +- common/molarity2mass.R | 2 +- common/roundup.R | 2 +- init.R | 15 ++-- 21 files changed, 340 insertions(+), 27 deletions(-) create mode 100644 OOHR2000/OO2df.R create mode 100644 XRD-TF/mraw2df.R diff --git a/AUTOLAB/amp2df.R b/AUTOLAB/amp2df.R index 557ba97..819c61e 100644 --- a/AUTOLAB/amp2df.R +++ b/AUTOLAB/amp2df.R @@ -86,4 +86,4 @@ amp2df <- function(datafilename, wearea = 1) { chargedensity = chargedensity) # return(ff) -} \ No newline at end of file +} diff --git a/AUTOLAB/ocp2df.R b/AUTOLAB/ocp2df.R index 38e6dae..b34e8c0 100644 --- a/AUTOLAB/ocp2df.R +++ b/AUTOLAB/ocp2df.R @@ -58,4 +58,4 @@ ocp2df <- function(datafilename) { names(ff) <- c("sampleid", "time", "potential") # return(ff) -} \ No newline at end of file +} diff --git a/INCA/eds2df.R b/INCA/eds2df.R index 4c42ff3..85de629 100644 --- a/INCA/eds2df.R +++ b/INCA/eds2df.R @@ -1,3 +1,6 @@ +## This function should be renamed to EMSA2df(). The file it reads is a so-called EMSA spectral data file. +# /TA, 111110 + eds2df <- function(edstxtfile) { ## Description: ## Reads EDS textfile from INCA EDS. diff --git a/OOHR2000/OO2df.R b/OOHR2000/OO2df.R new file mode 100644 index 0000000..383689f --- /dev/null +++ b/OOHR2000/OO2df.R @@ -0,0 +1,157 @@ +source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R") +source("/home/taha/chepec/chetex/common/R/common/int2padstr.R") + + +################################################## +#################### OO2df ####################### +################################################## +OO2df <- function(datafile) { + ## Description: + ## + ## + ## + ## + ## Usage: + ## OO2df(datafile) + ## Arguments: + ## datafile: text string with full path to TXT file + ## containing single or multiple data ranges + ## Value: + ## Dataframe with the following columns: + ## $ sampleid : chr + ## $ wavelength : num + ## $ counts : num + ## $ + ## $ + ## $ + ## $ + ## $ + ## $ + ## $ + ## $ + ## $ + ## $ + ## $ cps ? + # +# range.header.start.rexp <- "^; \\(Data for Range" #regexp +# range.header.end.rexp <- "^_2THETA[^=]" #regexp + + range.data.start.rexp <- ">+Begin[\\s\\w]*<+" + range.data.end.rexp <- ">+End[\\s\\w]*<+" + + # Read the input file + dfile <- file(datafile, "r") + # Note that readLines apparently completely skips empty lines. + # That causes line numbers to not match between source and f vector. + f <- readLines(dfile, n=-1) # read _all_ lines from data file + close(dfile) + + # Fetch a sampleid for the current job + sampleid <- ProvideSampleId(datafile) + +# # Look for header start rows +# range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1) +# # Look for header end rows +# range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1) + + # Look for data start marker line + range.data.start.rows <- which(regexpr(range.data.start.rexp, f, perl = TRUE) == 1) + 1 + # Look for data end marker line + range.data.end.rows <- which(regexpr(range.data.end.rexp, f, perl = TRUE) == 1) - 1 + + # Calculate number of ranges + ranges.total <- + ifelse(length(range.data.start.rows) == length(range.data.end.rows), + length(range.data.start.rows), + NA) #why would they not be equal? + if (is.na(ranges.total)) { + # Obviously something bad happened. + # Do something about it. echo an error message perhaps. + # But why would they not be equal? + + } + + + + # Header always precedes start of data + range.header.end.rows <- range.data.start.rows - 2 + if (ranges.total > 1) { + range.header.start.rows <- c(1, range.data.end.rows[2:length(range.data.end.rows)]) + } else { + # Data in fact only contains one range + range.header.start.rows <- 1 + } + + # Extract headers (as-is) and put them in a list (by range) + headers.raw <- list() + for (range in 1:ranges.total) { + headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]] + } + + #### + + # Extract data (as-is) and put it an list (by range) + data.raw <- list() + for (range in 1:ranges.total) { + data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]] + # Replace commas by dots + data.raw[[range]] <- gsub(",", "\\.", data.raw[[range]]) + } + + + # Specify header parameters to include in dataframe + header.param.rexp <- + c(DateTime = "^Date:", + IntegrationTime = "^Integration Time \\(usec\\):", + n_Averaged = "^Spectra Averaged:", + Boxcar = "^Boxcar Smoothing:", + CorrElectricDark = "^Correct for Electrical Dark:", + StrobeLampEnabled = "^Strobe/Lamp Enabled:", + CorrDetectorNonLin = "^Correct for Detector Non-linearity:", + CorrStrayLight = "^Correct for Stray Light:", + n_Pixels = "^Number of Pixels") + + # Collect data and header parameters in dataframes, by range in a list + data <- list() + for (range in 1:ranges.total) { + zz <- textConnection(data.raw[[range]], "r") + data[[range]] <- data.frame(stringsAsFactors = FALSE, + sampleid, + int2padstr(range, "0", 3), + matrix(scan(zz, what = character()), ncol = 2, byrow = T)) + close(zz) + # Collect header parameters + for (param in 1:length(header.param.rexp)) { + data[[range]] <- + cbind(stringsAsFactors = FALSE, + data[[range]], + # Matches any word, digit, plus, or minus character + # surrounded by parentheses at the end of the string + sub("\\s\\([\\w\\d\\+\\-]+\\)$", "", + strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), + headers.raw[[range]]) == 1)], ": ")[[1]][2], + perl = TRUE)) + } + names(data[[range]]) <- + c("sampleid", "range", "wavelength", "intensity", names(header.param.rexp)) + } + + # Create a unified dataframe + data.df <- data[[1]] + if (ranges.total > 1) { + for (range in 2:ranges.total) { + data.df <- rbind(data.df, data[[range]]) + } + } + + # Convert the DateTime column to more legibly (and compact) format + data.df$DateTime <- + format(as.POSIXct(gsub("\\s[A-Z]{4}\\s", " ", data.df$Date), + format = "%a %b %d %H:%M:%S %Y"), + format = "%Y-%m-%d %H:%M:%S") + # Convert wavelength and intensity to numeric format + mode(data.df$wavelength) <- "numeric" + mode(data.df$intensity) <- "numeric" + + return(data.df) +} \ No newline at end of file diff --git a/XRD-TF/mraw2df.R b/XRD-TF/mraw2df.R new file mode 100644 index 0000000..fd7631f --- /dev/null +++ b/XRD-TF/mraw2df.R @@ -0,0 +1,154 @@ +source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R") +source("/home/taha/chepec/chetex/common/R/common/int2padstr.R") + + +################################################## +#!!!!!!!!!!!!!!!!!# mraw2df #!!!!!!!!!!!!!!!!!!!!# +################################################## +mraw2df <- function(txtfile) { + ## Description: + ## Reads TXT files with one or multiple ranges (XRD commander "save as txt") + ## Extracts both data (thth, intensity) and parameters + ## Usage: + ## mraw2df(txtfile) + ## Arguments: + ## txtfile: text string with full path to TXT file, which may + ## containing single or multiple data ranges + ## Value: + ## Dataframe with the following columns: + ## $ sampleid : chr + ## $ thth : num + ## $ counts (or cps) : num + ## $ steptime : num + ## $ stepsize : num + ## $ theta : num + ## $ khi : num + ## $ phi : num + ## $ x : num + ## $ y : num + ## $ z : num + ## $ divergence : num + ## $ antiscatter : num + ## $ cps (or counts) : num + # + range.header.start.rexp <- "^; \\(Data for Range" #regexp + range.header.end.rexp <- "^_2THETA[^=]" #regexp + + # Read the input multirange file + ufile <- file(uxdfile, "r") + # Note that readLines apparently completely skips empty lines. + # In that case line numbers do not match between source and f. + f <- readLines(ufile, n=-1) #read _all_ lines from UXD file + close(ufile) + + # Fetch a sampleid for the current job + sampleid <- ProvideSampleId(uxdfile) + + # Look for header start rows + range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1) + # Look for header end rows + range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1) + + # Calculate number of ranges + ranges.total <- + ifelse(length(range.header.start.rows) == length(range.header.end.rows), + length(range.header.start.rows), + NA) #why would they not be equal? + if (is.na(ranges.total)) { + # Obviously something bad happened. + # Do something about it. echo an error message perhaps. + # But why would they not be equal? + + } + + # Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file + # Assuming it is the same for all ranges in this job (a safe assumption). + if (f[range.header.end.rows][1] == "_2THETACOUNTS") { + # we got counts + counts.flag <- TRUE + cps.flag <- FALSE + } + if (f[range.header.end.rows][1] == "_2THETACPS") { + # we got counts per second + counts.flag <-FALSE + cps.flag <- TRUE + } + + # Extract headers (as-is) and put them in a list (by range) + headers.raw <- list() + for (range in 1:ranges.total) { + headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]] + } + + # Data always start on the row after header end + range.data.start.rows <- range.header.end.rows + 1 + # Data end rows precedes header with one row, except for the first range + # But only if data contained more than one range, obviously. Let's make the code check for that + if (ranges.total > 1) { + range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f)) + } else { + # Data in fact only contains one range + range.data.end.rows <- length(f) + } + + #### + + # Extract data (as-is) and put it an list (by range) + data.raw <- list() + for (range in 1:ranges.total) { + data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]] + } + + # Specify header parameters to include in dataframe + header.param.rexp <- c(steptime = "^_STEPTIME=", + stepsize = "^_STEPSIZE=", + theta = "^_THETA=", + khi = "^_KHI=", + phi = "^_PHI=", + x = "^_X=", + y = "^_Y=", + z = "^_Z=", + divergence = "^_DIVERGENCE=", + antiscatter = "^_ANTISCATTER=") + + # Collect data and header parameters in dataframes, by range in a list + data <- list() + for (range in 1:ranges.total) { + zz <- textConnection(data.raw[[range]], "r") + data[[range]] <- data.frame(stringsAsFactors = F, + sampleid, + int2padstr(range, "0", 3), + matrix(scan(zz, what = numeric()), ncol = 2, byrow = T)) + close(zz) + # Collect header parameters + for (param in 1:length(header.param.rexp)) { + data[[range]] <- cbind(data[[range]], + as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), + headers.raw[[range]]) == 1)], "=")[[1]][2])) + } + names(data[[range]]) <- + c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp)) + } + + # Calculate the other of the pair counts <-> cps + if (counts.flag) { + for (range in 1:ranges.total) { + data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime) + } + } + if (cps.flag) { + for (range in 1:ranges.total) { + data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime) + } + } + + # Return a unified dataframe + data.df <- data[[1]] + if (ranges.total > 1) { + for (range in 2:ranges.total) { + data.df <- rbind(data.df, data[[range]]) + } + } + + return(data.df) +} diff --git a/XRD-TF/muxd2df.R b/XRD-TF/muxd2df.R index 9015530..896f03c 100644 --- a/XRD-TF/muxd2df.R +++ b/XRD-TF/muxd2df.R @@ -1,4 +1,5 @@ source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R") +source("/home/taha/chepec/chetex/common/R/common/int2padstr.R") ################################################## @@ -13,7 +14,7 @@ muxd2df <- function(uxdfile) { ## Usage: ## muxd2df(uxdfile) ## Arguments: - ## uxdfile: text string with full path to UXD file + ## uxdfile: text string with full path to UXD file, which may ## containing single or multiple data ranges ## Value: ## Dataframe with the following columns: @@ -118,6 +119,7 @@ muxd2df <- function(uxdfile) { zz <- textConnection(data.raw[[range]], "r") data[[range]] <- data.frame(stringsAsFactors = F, sampleid, + int2padstr(range, "0", 3), matrix(scan(zz, what = numeric()), ncol = 2, byrow = T)) close(zz) # Collect header parameters @@ -127,7 +129,7 @@ muxd2df <- function(uxdfile) { headers.raw[[range]]) == 1)], "=")[[1]][2])) } names(data[[range]]) <- - c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp)) + c("sampleid", "range", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp)) } # Calculate the other of the pair counts <-> cps diff --git a/common/AVS2SHE.R b/common/AVS2SHE.R index 6fc2d06..f23d849 100644 --- a/common/AVS2SHE.R +++ b/common/AVS2SHE.R @@ -5,4 +5,4 @@ AVS2SHE <- function(avs) { # Converts from absolute vacuum scale (AVS) to SHE scale she <- -(4.5 + avs) return(she) -} \ No newline at end of file +} diff --git a/common/Celsius2Kelvin.R b/common/Celsius2Kelvin.R index 4c06661..2ab5a4c 100644 --- a/common/Celsius2Kelvin.R +++ b/common/Celsius2Kelvin.R @@ -11,4 +11,4 @@ Celsius2Kelvin <- function(Celsius) { } Kelvin <- Celsius + 273.15 return(Kelvin) -} \ No newline at end of file +} diff --git a/common/ConvertRefPot.R b/common/ConvertRefPot.R index 9c6391c..f8f1427 100644 --- a/common/ConvertRefPot.R +++ b/common/ConvertRefPot.R @@ -71,4 +71,4 @@ ConvertRefPot <- function(argpotential, argrefscale, valuerefscale) { scale.names[[decision.vector["valueref"]]][1]) } return(rnpotential) -} \ No newline at end of file +} diff --git a/common/ConvertRefPotEC.R b/common/ConvertRefPotEC.R index 4b4ec54..7b28d43 100644 --- a/common/ConvertRefPotEC.R +++ b/common/ConvertRefPotEC.R @@ -45,4 +45,4 @@ ConvertRefPotEC <- function(argpotential, argrefscale, valuerefscale) { valuepotential <- NA } return(valuepotential) -} \ No newline at end of file +} diff --git a/common/Kelvin2Celsius.R b/common/Kelvin2Celsius.R index 2c688b4..ddb1dee 100644 --- a/common/Kelvin2Celsius.R +++ b/common/Kelvin2Celsius.R @@ -11,4 +11,4 @@ Kelvin2Celsius <- function(Kelvin) { } Celsius <- Kelvin - 273.15 return(Celsius) -} \ No newline at end of file +} diff --git a/common/LoadRData2Variable.R b/common/LoadRData2Variable.R index ee2795e..3e8cd9f 100644 --- a/common/LoadRData2Variable.R +++ b/common/LoadRData2Variable.R @@ -3,4 +3,4 @@ # NOT TESTED for when the R-data file contains many variables LoadRData2Variable <- function(FullPathToRData) { return(eval(parse(text = load(FullPathToRData)))) -} \ No newline at end of file +} diff --git a/common/ProvideSampleId.R b/common/ProvideSampleId.R index 6f1449e..24a9840 100644 --- a/common/ProvideSampleId.R +++ b/common/ProvideSampleId.R @@ -31,4 +31,4 @@ ProvideSampleId <- function (pathexpfile) { #### return(sampleid) -} \ No newline at end of file +} diff --git a/common/SHE2AVS.R b/common/SHE2AVS.R index 31c7e48..f343619 100644 --- a/common/SHE2AVS.R +++ b/common/SHE2AVS.R @@ -5,4 +5,4 @@ SHE2AVS <- function(she) { # Converts from SHE scale to absolute vacuum (AVS) scale avs <- -(4.5 + she) return(avs) -} \ No newline at end of file +} diff --git a/common/as.degrees.R b/common/as.degrees.R index 3a80188..fad9362 100644 --- a/common/as.degrees.R +++ b/common/as.degrees.R @@ -5,4 +5,4 @@ as.degrees <- function(radians) { # Converts from radians to degrees degrees <- radians * (180 / pi) return(degrees) -} \ No newline at end of file +} diff --git a/common/as.radians.R b/common/as.radians.R index 7f643cc..a02fcae 100644 --- a/common/as.radians.R +++ b/common/as.radians.R @@ -5,4 +5,4 @@ as.radians <- function(degrees) { # Converts from degrees to radians radians <- degrees * (pi / 180) return(radians) -} \ No newline at end of file +} diff --git a/common/int2padstr.R b/common/int2padstr.R index aa9aba4..2a1c1c5 100644 --- a/common/int2padstr.R +++ b/common/int2padstr.R @@ -18,4 +18,4 @@ int2padstr <- function (ii, pchr, w) { ## Value: ## A character string or a vector of character strings gsub(" ", pchr, formatC(ii, format="s", mode="character", width = w)) -} \ No newline at end of file +} diff --git a/common/is.wholenumber.R b/common/is.wholenumber.R index ff8093a..d742618 100644 --- a/common/is.wholenumber.R +++ b/common/is.wholenumber.R @@ -2,4 +2,4 @@ is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) { abs(x - round(x)) < tol -} \ No newline at end of file +} diff --git a/common/molarity2mass.R b/common/molarity2mass.R index 5eb8d49..4277849 100644 --- a/common/molarity2mass.R +++ b/common/molarity2mass.R @@ -13,4 +13,4 @@ molarity2mass <- function(formulamass, volume, molarity) { # Unit check: # [g * mol-1] * [liter] * [mole * liter-1] = [g] return(mass) -} \ No newline at end of file +} diff --git a/common/roundup.R b/common/roundup.R index 21c02ca..8fa1b50 100644 --- a/common/roundup.R +++ b/common/roundup.R @@ -3,4 +3,4 @@ roundup <- function(x, nearest=1000) { ceiling(max(x+10^-9)/nearest + 1/nearest)*nearest -} \ No newline at end of file +} diff --git a/init.R b/init.R index 1024552..967c5c5 100644 --- a/init.R +++ b/init.R @@ -1,13 +1,10 @@ # To source a bunch of files in the same directory sourceDir <- function(path, trace = TRUE) { - for (nm in list.files(path, pattern = "\\.[Rr]$")) { - if(trace) { - cat(nm,":") - } - source(file.path(path, nm)) - if(trace) { - cat("\n") - } + lsDir <- list.files(path, pattern = "\\.[Rr]$") + for (i in lsDir) { + if(trace) {cat(i, ":")} + source(file.path(path, i)) + if(trace) {cat("\n")} } -} \ No newline at end of file +}