You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			199 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			R
		
	
			
		
		
	
	
			199 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			R
		
	
source("/home/taha/chepec/chetex/common/R/common/ProvideSampleId.R")
 | 
						|
 | 
						|
 | 
						|
##################################################
 | 
						|
################### muxd2df ######################
 | 
						|
##################################################
 | 
						|
muxd2df <- function(uxdfile) {
 | 
						|
   ## Description:
 | 
						|
   ##   Reads UXD files with multiple ranges (converted using XCH v1.0)
 | 
						|
   ##   Extracts both data (thth, intensity) and parameters
 | 
						|
   ##   Also automatically calculates cps is counts are present, and vice versa
 | 
						|
   ##   (note that this depends on specific strings in the UXD format).
 | 
						|
   ## Usage:
 | 
						|
   ##   muxd2df(uxdfile)
 | 
						|
   ## Arguments:
 | 
						|
   ##   uxdfile: text string with full path to UXD file
 | 
						|
   ## Value:
 | 
						|
   ##   Dataframe with the following columns:
 | 
						|
   ##   $ sampleid        : chr
 | 
						|
   ##   $ thth            : num
 | 
						|
   ##   $ counts (or cps) : num
 | 
						|
   ##   $ steptime        : num
 | 
						|
   ##   $ stepsize        : num
 | 
						|
   ##   $ theta           : num
 | 
						|
   ##   $ khi             : num
 | 
						|
   ##   $ phi             : num
 | 
						|
   ##   $ x               : num
 | 
						|
   ##   $ y               : num
 | 
						|
   ##   $ z               : num
 | 
						|
   ##   $ divergence      : num
 | 
						|
   ##   $ antiscatter     : num
 | 
						|
   ##   $ cps (or counts) : num
 | 
						|
   #
 | 
						|
   range.header.start.rexp <- "^; \\(Data for Range" #regexp
 | 
						|
   range.header.end.rexp <- "^_2THETA[^=]" #regexp
 | 
						|
   
 | 
						|
   # Read the input multirange file
 | 
						|
   ufile <- file(uxdfile, "r")
 | 
						|
   # Note that readLines apparently completely skips empty lines. 
 | 
						|
   # In that case line numbers do not match between source and f.
 | 
						|
   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
 | 
						|
   close(ufile)
 | 
						|
   
 | 
						|
   # Fetch a sampleid for the current job
 | 
						|
   sampleid <- ProvideSampleId(uxdfile)
 | 
						|
   
 | 
						|
   # Look for header start rows
 | 
						|
   range.header.start.rows <- which(regexpr(range.header.start.rexp, f) == 1)
 | 
						|
   # Look for header end rows
 | 
						|
   range.header.end.rows <- which(regexpr(range.header.end.rexp, f) == 1)
 | 
						|
   
 | 
						|
   # Calculate number of ranges
 | 
						|
   ranges.total <- ifelse(length(range.header.start.rows) == length(range.header.end.rows), length(range.header.start.rows), NA)
 | 
						|
   if (is.na(ranges.total)) {
 | 
						|
      # Obviously something bad happened.
 | 
						|
      # Do something about it. echo an error message perhaps.
 | 
						|
      
 | 
						|
   }
 | 
						|
         
 | 
						|
   # Determine whether we have COUNTS or COUNTS PER SECOND in current UXD-file
 | 
						|
   # Assuming it is the same for all ranges in this job (a safe assumption).
 | 
						|
   if (f[range.header.end.rows][1] == "_2THETACOUNTS") {
 | 
						|
      # we got counts
 | 
						|
      counts.flag <- TRUE
 | 
						|
      cps.flag <- FALSE
 | 
						|
   }
 | 
						|
   if (f[range.header.end.rows][1] == "_2THETACPS") {
 | 
						|
      # we got counts per second
 | 
						|
      counts.flag <-FALSE
 | 
						|
      cps.flag <- TRUE
 | 
						|
   }
 | 
						|
   
 | 
						|
   # Extract headers (as-is) and put them in a list (by range)
 | 
						|
   headers.raw <- list()
 | 
						|
   for (range in 1:ranges.total) {
 | 
						|
      headers.raw[[range]] <- f[range.header.start.rows[range]:range.header.end.rows[range]]
 | 
						|
   }
 | 
						|
 | 
						|
   # Data always start on the row after header end
 | 
						|
   range.data.start.rows <- range.header.end.rows + 1
 | 
						|
   # Data end rows precedes header with one row, except for the first range
 | 
						|
   range.data.end.rows <- c(range.header.start.rows[2:length(range.header.start.rows)] - 1, length(f))
 | 
						|
   
 | 
						|
   # Extract data (as-is) and put it an list (by range)
 | 
						|
   data.raw <- list()
 | 
						|
   for (range in 1:ranges.total) {
 | 
						|
      data.raw[[range]] <- f[range.data.start.rows[range]:range.data.end.rows[range]]
 | 
						|
   }
 | 
						|
   
 | 
						|
   # Specify header parameters to include in dataframe
 | 
						|
   header.param.rexp <- c(steptime = "^_STEPTIME=", 
 | 
						|
                          stepsize = "^_STEPSIZE=", 
 | 
						|
                          theta = "^_THETA=",
 | 
						|
                          khi = "^_KHI=",
 | 
						|
                          phi = "^_PHI=",
 | 
						|
                          x = "^_X=",
 | 
						|
                          y = "^_Y=",
 | 
						|
                          z = "^_Z=",
 | 
						|
                          divergence = "^_DIVERGENCE=",
 | 
						|
                          antiscatter = "^_ANTISCATTER=")
 | 
						|
   
 | 
						|
   # Collect data and header parameters in dataframes, by range in a list
 | 
						|
   data <- list()
 | 
						|
   for (range in 1:ranges.total) {
 | 
						|
      zz <- textConnection(data.raw[[range]], "r")
 | 
						|
      data[[range]] <- data.frame(stringsAsFactors = F,
 | 
						|
                                  sampleid,
 | 
						|
                                  matrix(scan(zz, what = numeric()), ncol = 2, byrow = T))
 | 
						|
      close(zz)
 | 
						|
      # Collect header parameters
 | 
						|
      for (param in 1:length(header.param.rexp)) {
 | 
						|
         data[[range]] <- cbind(data[[range]], 
 | 
						|
            as.numeric(strsplit(headers.raw[[range]][which(regexpr(unname(header.param.rexp[param]), 
 | 
						|
               headers.raw[[range]]) == 1)], "=")[[1]][2]))
 | 
						|
      }
 | 
						|
      names(data[[range]]) <- c("sampleid", "thth", ifelse(counts.flag, "counts", "cps"), names(header.param.rexp))
 | 
						|
   }
 | 
						|
                                                                   
 | 
						|
   # Calculate the other of the pair counts <-> cps
 | 
						|
   if (counts.flag) {
 | 
						|
      for (range in 1:ranges.total) {
 | 
						|
         data[[range]] <- cbind(data[[range]], cps = data[[range]]$counts / data[[range]]$steptime)
 | 
						|
      }
 | 
						|
   }
 | 
						|
   if (cps.flag) {
 | 
						|
      for (range in 1:ranges.total) {
 | 
						|
         data[[range]] <- cbind(data[[range]], counts = data[[range]]$cps * data[[range]]$steptime)
 | 
						|
      }
 | 
						|
   }
 | 
						|
      
 | 
						|
   # Return a unified dataframe
 | 
						|
   data.df <- data[[1]]
 | 
						|
   for (range in 2:ranges.total) {
 | 
						|
      data.df <- rbind(data.df, data[[range]])
 | 
						|
   }
 | 
						|
   
 | 
						|
   return(data.df)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#### OLD VERSION - DEPRECATE
 | 
						|
##################################################
 | 
						|
################### muxd2df ######################
 | 
						|
##################################################
 | 
						|
muxd2df.old <- function(uxdfile, range.descriptor) {
 | 
						|
   # Function that reads an UXD file which contains several ranges
 | 
						|
   # (created in a programmed run, for example)
 | 
						|
   # Arguments
 | 
						|
   # :: uxdfile (filename with extension)
 | 
						|
   # :: range.descriptor (an array with as many elements as
 | 
						|
   #    there are ranges in the uxdfile)
 | 
						|
   # Returns: dataframe with 3 columns
 | 
						|
   
 | 
						|
   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
 | 
						|
   cdata <- "[0-9]" #regexpr matching one character of any digit
 | 
						|
   # Create filenames for the output # no longer used, return dataframe instead
 | 
						|
   #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
 | 
						|
   
 | 
						|
   # Read the input multirange file
 | 
						|
   ufile <- file(uxdfile, "r")
 | 
						|
   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
 | 
						|
   close(ufile)
 | 
						|
   
 | 
						|
   # This way we identify data rows by looking for numeric characters.
 | 
						|
   #wh <- regexpr("[0-9]", f)
 | 
						|
   # This way we identify header rows
 | 
						|
   # Later we will assume that all other rows are data
 | 
						|
   wh <- regexpr(cchar, f)
 | 
						|
   
 | 
						|
   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
 | 
						|
   # the value of each element corresponds to the position of the regexp match.
 | 
						|
   # value = 1 means the first character of the row is cchar (row is header)
 | 
						|
   # value =-1 means no cchar occur on the row (row is data)
 | 
						|
   
 | 
						|
   #length(mh[mh == -1]) #total number of datarows in uxdfile
 | 
						|
   #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
 | 
						|
   
 | 
						|
   i <- seq(1, length(mh) - 1, 1)
 | 
						|
   j <- seq(2, length(mh), 1)
 | 
						|
   starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
 | 
						|
   ends   <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
 | 
						|
   ends   <- c(ends, length(mh)) #fixed the last index of ends   
 | 
						|
   
 | 
						|
   ff <- data.frame(NULL)
 | 
						|
   for (s in 1:length(range.descriptor)) {
 | 
						|
      zz <- textConnection(f[starts[s]:ends[s]], "r")
 | 
						|
      ff <- rbind(ff, data.frame(range.descriptor[s],
 | 
						|
            matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
 | 
						|
      close(zz)
 | 
						|
   }
 | 
						|
   names(ff) <- c("sampleid", "angle", "intensity")
 | 
						|
   
 | 
						|
   # Return dataframe
 | 
						|
   ff
 | 
						|
}
 |