Contains my own R functions. First commit.

16 years ago · 996a79787c
commit 996a79787c
2 changed files with 575 additions and 0 deletions
--- a/common.R
+++ b/common.R
@ -0,0 +1,7 @@
+as.radians <- function(degrees) {
+   radians <- degrees * (pi / 180)
+}
+
+as.degrees <- function(radians) {
+   radians <- radians * (180 / pi)
+}
--- a/xrdtf.R
+++ b/xrdtf.R
@ -0,0 +1,568 @@
+# Collection of R functions for analysis of thin-film X-ray diffraction patterns
+# Maintainer: Taha Ahmed
+
+# CONTENTS
+# >>>> matchpdf
+# >>>> scherrer
+# >>>> pdf2df
+# >>>> uxd2mtx
+# >>>> uxd2df
+# >>>> muxd2df
+# >>>> muxd2mtx
+# >>>> muxd2ls
+#    - REPAIR SHOP
+#    - print.xtable.booktabs
+#    - split.muxd
+#    - strip.ka2
+#    - pearson.beta
+
+
+
+
+##################################################
+################## matchpdf ######################
+##################################################
+matchpdf <- function(expcol, pdfrow) {
+   # Function for matching each element of two numeric vectors
+   # to the closest-lying element (numerically) in the other
+   # vector. For example for matching 2th values of PDF to
+   # 2th values of recorded diffractogram.
+   ## Args: two vectors to be matched (any special considerations on vector lengths?)
+   ## Values: A list of 5 numeric vectors. See description inside function.
+   # A word of caution. This function creates a matrix with dimensions pdfrow * expcol.
+   # Usually both of these vectors are rather short. But watch out if large vectors
+   # are submitted --- the looping below could make this function very slow.
+#
+   diff.thth <- matrix(NA, length(pdfrow), length(expcol))
+   diff.indx <- matrix(NA, length(pdfrow), length(expcol))
+   
+   for (pdf.row in 1:length(pdfrow)) {
+      # For every row, calculate differences between that pdf value and all exp values
+      diff.thth[pdf.row, ] <- expcol - pdfrow[pdf.row]
+   }
+#   
+   # Now we go on to find the minimum along each row of diff.thth
+   # and set all other values to some arbitrary value
+   diff.rmin <- diff.thth
+   for (pdf.row in 1:dim(diff.thth)[1]) {
+      for (exp.col in 1:dim(diff.thth)[2]) {
+         if (abs(diff.thth[pdf.row, exp.col]) != min(abs(diff.thth[pdf.row, ]))) {
+            diff.rmin[pdf.row, exp.col] <- Inf
+         }
+      }
+   }
+#   
+   # We likewise find the minimum along each column and set any other,
+   # non-Inf values to Inf
+   diff.cmin <- diff.rmin
+   for (exp.col in 1:dim(diff.rmin)[2]) {
+      for (pdf.row in 1:dim(diff.rmin)[1]) {
+         if (abs(diff.rmin[pdf.row, exp.col]) != min(abs(diff.rmin[, exp.col]))) {
+            diff.cmin[pdf.row, exp.col] <- Inf
+         }
+      }
+   }
+#   
+   # The matrix now contains at most one non-Inf value per row and column.
+   # To make the use of colSums and rowSums possible (next step) all Inf are set zero,
+   # and all matches are set to 1.
+   for (pdf.row in 1:dim(diff.cmin)[1]) {
+      for (exp.col in 1:dim(diff.cmin)[2]) {
+         if ((diff.cmin[pdf.row, exp.col]) != Inf) {
+            diff.indx[pdf.row, exp.col] <- 1
+         } else {
+            diff.indx[pdf.row, exp.col] <- 0
+         }
+      }
+   }
+#   
+   # Attach error message to return list if sum(rowSums(diff.indx)) == sum(colSums(diff.indx))
+   matchpdf.err.msg <-"matchpdf() failed to complete. It seems rowSums != colSums."
+   mtch <- matchpdf.err.msg
+#   
+   # Check that sum(rowSums(diff.indx)) == sum(colSums(diff.indx))
+   if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) {
+      # Reset mtch
+      mtch <- list()
+      mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
+      # List of 5
+      # $ csums     : num - consisting of ones and zeroes. Shows you which positions of expcol matched.
+      # $ rsums     : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched.
+      # $ expthth   : num - consisting of the matching elements of expcol.
+      # $ pdfthth   : num - consisting of the matching elements of pdfrow.
+      # $ deltathth : num - element-wise difference of expcol and pdfrow.
+   }
+#
+   return(mtch)
+}
+
+
+
+
+##################################################
+################## scherrer ######################
+##################################################
+scherrer <- function(integralbreadth, thth, wavelength = 1.54056, shapeconstant = ((4/3)*(pi/6))^(1/3)) {
+   # Function for calculating crystallite grain size from reflection data
+   # ARGS: integralbreadth - vector with integral breadth of reflections (in degrees)
+   #       thth            - vector with 2theta values of reflections (in degrees)
+   #       wavelength      - X-ray wavelength used (default 1.54056 Å, Cu Ka)
+   #       shapeconstant   - Scherrer constant (default spherical, ~0.9)
+   # VALUE: vector with size parameters
+   ## REQUIRES: as.radians(), source("/home/taha/chepec/chetex/common/R/common.R")
+   D <- (shapeconstant * wavelength) / (as.radians(integralbreadth) * cos(as.radians(thth)))
+   # cos() - angles must be in radians, not degrees!
+   return(D) #units of angstrom
+}
+
+
+
+
+##################################################
+################### pdf2df #######################
+##################################################
+pdf2df <- function(pdffile) {
+   # Function for extracting information from ICDD PDF XML-files
+   # For example the PDF files produced by the PDF database at Ångström's X-ray lab
+   # NOTE: sometimes intensity values are specified as less than some value.
+   #       In those cases, this function simply strips the less-than character.
+   # ARGS: pdffile (complete path and filename to PDF file)
+   # VALUE: dataframe with 9 columns:
+   #        thth angles (numeric),
+   #        d (numeric),
+   #        h index (numeric),
+   #        k index (numeric),
+   #        l index (numeric),
+   #        hkl indices (factor of strings),
+   #        hkl.TeX indices formatted for LaTeX (factor of strings),
+   #        intensity (numeric),
+   #        int.TeX intensity formatted for LaTeX (factor of strings)
+   # attr:  This function sets the following attributes:
+   #        ApplicationName,
+   #        ApplicationVersion,
+   #        pdfNumber,
+   #        chemicalformula,
+   #        wavelength
+   #
+   require(XML)
+   doc <- xmlTreeParse(pdffile)
+   pdf <- xmlRoot(doc)
+   rmchar <- "[^0-9]"
+   #
+   angles <- data.frame(NULL)
+   for (i in 1:length(pdf[["graphs"]][["stick_series"]])) {
+   angles <- rbind(angles, data.frame(#
+      thth      = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["theta"]])),
+      d         = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["da"]])),
+      h         = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])),
+      k         = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])),
+      l         = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])),
+      hkl       = paste(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]),
+                     xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]),
+                     xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]), sep = ""),
+      hkl.TeX   = paste("\\mbox{$", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])) < 0,
+                     paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]))),
+                        "}", sep = ""),
+                     xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])),
+                  "\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])) < 0,
+                     paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]))),
+                        "}", sep = ""),
+                     xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])),
+                  "\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])) < 0,
+                     paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]))),
+                        "}", sep = ""),
+                     xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])),
+                  "$}", sep = "", collapse = ""),
+      intensity = as.numeric(gsub(rmchar, "", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]))),
+      int.TeX   = paste("{", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]), "}", sep = "")
+      ))
+   }
+   #
+   attr(angles, "ApplicationName") <- xmlAttrs(pdf)[[1]]
+   attr(angles, "ApplicationVersion") <- xmlAttrs(pdf)[[2]]
+   attr(angles, "pdfNumber") <- xmlValue(pdf[["pdf_data"]][["pdf_number"]])
+   attr(angles, "chemicalformula") <- gsub("[ ]", "", xmlValue(pdf[["pdf_data"]][["chemical_formula"]]))
+   attr(angles, "wavelength") <- as.numeric(xmlValue(pdf[["graphs"]][["wave_length"]]))
+   # Caution: Do not subset. Subsetting causes all attributes to be lost.
+   return(angles)
+}
+
+
+
+
+##################################################
+################### uxd2mtx ######################
+##################################################
+uxd2mtx <- function(uxdfile) {
+   # Function for reading UXD files 
+   # Assumptions: data in two columns
+   # Args: uxdfile (filename with extension)
+   # Return value: matrix with two columns
+   
+   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
+   cdata <- "[0-9]" #regexpr matching one character of any digit
+   
+   # A new file (datafile) containing only data will be created,
+   # extension ".data" appended to uxdfile
+   #datafile <- paste(uxdfile,".data",sep="")
+   
+   ufile <- file(uxdfile, "r")
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   # This way we identify data rows by looking for numeric characters.
+   #wh <- regexpr("[0-9]", f)
+   # This way we identify header rows
+   # We assume that all other rows are data
+   wh <- regexpr(cchar, f)
+   
+   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
+   # the value of each element corresponds to the position of the regexp match.
+   # value = 1 means the first character of the row is cchar (row is header)
+   # value =-1 means no cchar occur on the row (row is data)
+   
+   i <- seq(1, length(mh) - 1, 1)
+   j <- seq(2, length(mh), 1)
+   
+   starts <- which(mh[i] == 1 & mh[j] != 1) + 1
+   ends   <- length(mh)
+   f <- f[starts:ends]
+   
+   zz <- textConnection(f, "r")
+   ff <- matrix(scan(zz, what = numeric()), ncol=2, byrow=T)
+   close(zz)
+      
+   #zz <- file(datafile, "w") #open connection to datafile
+   #write.table(ff, file=datafile, row.names=F, sep=",")
+   #close(zz)
+
+   # Return matrix
+   ff
+}
+
+
+
+
+
+##################################################
+#################### uxd2df ######################
+##################################################
+uxd2df <- function(uxdfile) {
+   # Function for reading UXD files  # Assumptions: data in two columns
+   # Args: uxdfile (filename with extension)
+   # Returns: dataframe with three columns
+   
+   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
+   cdata <- "[0-9]" #regexpr matching one character of any digit
+   
+   # A new file (datafile) containing only data will be created,
+   # extension ".data" appended to uxdfile
+   #datafile <- paste(uxdfile,".data",sep="")
+   
+   ufile <- file(uxdfile, "r")
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   # This way we identify data rows by looking for numeric characters.
+   #wh <- regexpr("[0-9]", f)
+   # This way we identify header rows
+   # We assume that all other rows are data
+   wh <- regexpr(cchar, f)
+   
+   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
+   # the value of each element corresponds to the position of the regexp match.
+   # value = 1 means the first character of the row is cchar (row is header)
+   # value =-1 means no cchar occur on the row (row is data)
+   
+   i <- seq(1, length(mh) - 1, 1)
+   j <- seq(2, length(mh), 1)
+   
+   starts <- which(mh[i] == 1 & mh[j] != 1) + 1
+   ends   <- length(mh)
+   f <- f[starts:ends]
+   
+   zz <- textConnection(f, "r")
+   ff <- data.frame(uxdfile, matrix(scan(zz,
+         what = numeric()), ncol=2, byrow=T))
+   names(ff) <- c("sampleid", "angle", "intensity")
+   close(zz)
+      
+   #zz <- file(datafile, "w") #open connection to datafile
+   #write.table(ff, file=datafile, row.names=F, sep=",")
+   #close(zz)
+
+   # Return dataframe
+   ff
+}
+
+
+
+
+##################################################
+################### muxd2df ######################
+##################################################
+muxd2df <- function(uxdfile, range.descriptor) {
+   # Function that reads an UXD file which contains several ranges
+   # (created in a programmed run, for example)
+   # Arguments
+   # :: uxdfile (filename with extension)
+   # :: range.descriptor (an array with as many elements as
+   #    there are ranges in the uxdfile)
+   # Returns: dataframe with 3 columns
+   
+   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
+   cdata <- "[0-9]" #regexpr matching one character of any digit
+   # Create filenames for the output # no longer used, return dataframe instead
+   #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
+   
+   # Read the input multirange file
+   ufile <- file(uxdfile, "r")
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   # This way we identify data rows by looking for numeric characters.
+   #wh <- regexpr("[0-9]", f)
+   # This way we identify header rows
+   # Later we will assume that all other rows are data
+   wh <- regexpr(cchar, f)
+   
+   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
+   # the value of each element corresponds to the position of the regexp match.
+   # value = 1 means the first character of the row is cchar (row is header)
+   # value =-1 means no cchar occur on the row (row is data)
+   
+   #length(mh[mh == -1]) #total number of datarows in uxdfile
+   #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
+   
+   i <- seq(1, length(mh) - 1, 1)
+   j <- seq(2, length(mh), 1)
+   starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
+   ends   <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
+   ends   <- c(ends, length(mh)) #fixed the last index of ends   
+   
+   ff <- data.frame(NULL)
+   for (s in 1:length(range.descriptor)) {
+      zz <- textConnection(f[starts[s]:ends[s]], "r")
+      ff <- rbind(ff, data.frame(range.descriptor[s],
+            matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
+      close(zz)
+   }
+   names(ff) <- c("sampleid", "angle", "intensity")
+   
+   # Return dataframe
+   ff
+}
+
+
+
+
+##################################################
+################### muxd2mtx #####################
+##################################################
+muxd2mtx <- function(uxdfile, range) {
+   # Function that reads an UXD file which contains several ranges
+   # (created in a programmed run, for example)
+   # Arguments
+   # :: uxdfile (filename with extension)
+   # :: range (an integer, the number of ranges in the uxdfile)
+   # Returns: matrix with 2 columns
+   
+   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
+   cdata <- "[0-9]" #regexpr matching one character of any digit
+   # Create filenames for the output # no longer used, return dataframe instead
+   #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
+   
+   # Read the input multirange file
+   ufile <- file(uxdfile, "r")
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   # We identify header rows. We will assume that all other rows are data
+   wh <- regexpr(cchar, f)
+   # length(wh) equals length(f)
+   # wh has either 1 or -1 for each element
+   # value = 1 means the first character of that row is cchar (row is header)
+   # value =-1 means absence of cchar on that row (row is data)
+   
+   # Since wh contains some attributes (given by regexpr function), we strip out everything
+   # but the index vector and assign it to the new vector mh
+   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
+   
+   #length(mh[mh == -1]) #total number of datarows in uxdfile
+   #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
+   
+   # Set counters i and j used in assignment below
+   i <- seq(1, length(mh) - 1, 1)
+   j <- seq(2, length(mh), 1)
+   
+   starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
+   ends   <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
+   ends   <- c(ends, length(mh)) #fixes the last index of ends
+   # note that length of starts (or ends) gives the number of ranges
+   
+   ff <- matrix(NA, length(f), 2)
+   for (s in 1:range) {
+      zz <- textConnection(f[starts[s]:ends[s]], "r")
+      ff <- rbind(ff, matrix(scan(zz, what = numeric()), ncol=2, byrow=T))
+      close(zz)
+   }
+   
+   # Clean up matrix: remove extra rows
+   ff[apply(ff,1,function(x)any(!is.na(x))), ]
+
+   # Return matrix
+   ff
+}
+
+
+
+##################################################
+################### muxd2ls ######################
+##################################################
+muxd2ls <- function(uxdfile) {
+   # Function that reads an UXD file which contains several ranges
+   # (created in a programmed run, for example)
+   # Arguments
+   # :: uxdfile (filename with extension)
+   # Returns: List of matrices, as many as there were ranges
+   # Requires: ??
+   # See extensive comments in muxd2mtx()
+   
+   cchar <- "[;_]" #comment characters used in Bruker's UXD
+   cdata <- "[0-9]"  
+   
+   ufile <- file(uxdfile, "r")
+   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
+   close(ufile)
+   
+   wh <- regexpr(cchar, f)
+   mh <- wh[1:length(wh)]
+   
+   i <- seq(1, length(mh) - 1, 1)
+   j <- seq(2, length(mh), 1)
+   starts <- which(mh[i] == 1 & mh[j] != 1) + 1
+   ends   <- which(mh[i] != 1 & mh[j] == 1)
+   ends   <- c(ends, length(mh))
+   
+   ff <- list()
+   for (s in 1:length(starts)) {
+      zz <- textConnection(f[starts[s]:ends[s]], "r")
+      ms <- matrix(scan(zz, what = numeric()), ncol = 2, byrow = T)
+      close(zz)
+      ff[[s]] <- ms
+   }
+   # Return list of matrices
+   return(ff)
+}
+
+
+
+
+
+
+
+
+# -------- ##################### -------- #
+# -------- #### REPAIR SHOP #### -------- #
+# -------- ##################### -------- #
+
+
+##################################################
+################ pearson.beta ####################
+##################################################
+pearson.beta <- function(m, w) {
+   # Function for calculating integral breadth, \beta, of
+   # an X-ray reflection using the half-width of FWHM (w)
+   # and m parameter from a Pearson fit.
+   ## Args: m - shape parameter from Pearson fit (numeric)
+   ##       w - half the FWHM for the reflection (numeric)
+   ## **** NOTE: gamma(x) out of range for x > 171
+   ## Values: The integral breadth of the reflection (numeric)
+   # Ref: Birkholz2006, page 96 (Eq. 3.5)
+   beta <- ((pi * 2^(2 * (1 - m)) * gamma(2 * m - 1)) /
+      ((2^(1 / m) - 1) * (gamma(m))^2)) * w
+}
+
+
+
+##################################################
+################## strip.ka2 #####################
+##################################################
+strip.ka <- function(angle, intensity) {
+   # Function that strips (removes) the Cu Ka2 contribution
+   # from an experimental diffractogram (2Th, I(2Th))
+   # using the Rachinger algorithm.
+   ## Args: 2theta vector
+   ##       intensity vector
+   ## Values: intensity vector
+   # Ref: Birkholz2006, page 99 (Eq. 3.12), and page 31
+   Cu.Kai  <- 0.154059 # nanometre
+   Cu.Kaii <- 0.154441 # nanometre
+   alpha.int.ratio <- 0.5
+   int.stripped <- intensity - alpha.int.ratio*intensity
+   return(int.stripped)
+}
+
+
+
+
+print.xtable.booktabs <- function(x){
+   # Make xtable print booktabs tables
+   # ARGS: x (matrix)
+   require(xtable)
+   print(xtable(x), floating=F, hline.after=NULL,
+    add.to.row=list(pos=list(-1,0, nrow(x)),
+    command=c("\\toprule ", "\\midrule ", "\\bottomrule ")))
+}
+
+
+split.muxd <- function(uxdfile, range.descriptor) {
+   # Fix this some other day!!
+   # Function that reads an UXD file which contains several ranges
+   # (created in a programmed run, for example) and splits it into
+   # its ranges and writes to that number of files
+   # Arguments
+   # :: uxdfile (filename with extension)
+   # :: range.descriptor (an array with as many elements as
+   #    there are ranges in the uxdfile)
+   # Returns: nothing. Writes files to HDD.
+   
+   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
+   cdata <- "[0-9]" #regexpr matching one character of any digit
+   # Create filenames for the output # no longer used, return dataframe instead
+   datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
+   
+   # Read the input multirange file
+   f <- readLines(uxdfile, n=-1)
+   
+   
+   # This way we identify data rows by looking for numeric characters.
+   #wh <- regexpr("[0-9]", f)
+   # This way we identify header rows
+   # Later we will assume that all other rows are data
+   wh <- regexpr(cchar, f)
+   
+   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
+   # the value of each element corresponds to the position of the regexp match.
+   # value = 1 means the first character of the row is cchar (row is header)
+   # value =-1 means no cchar occur on the row (row is data)
+   
+   #length(mh[mh == -1]) #total number of datarows in uxdfile
+   #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
+   
+   i <- seq(1, length(mh) - 1, 1)
+   j <- seq(2, length(mh), 1)
+   starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
+   ends   <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
+   ends   <- c(ends, length(mh)) #fixed the last index of ends
+   
+   #ff <- data.frame(NULL)
+   for (s in 1:length(range.descriptor)) {
+      matrix(scan(file=textConnection(f[starts[s]:ends[s]]),
+            what = numeric()), ncol=2, byrow=T)
+   }
+   names(ff) <- c("sampleid", "angle", "intensity")
+   
+   # Return dataframe
+   ff
+}