From 996a79787c984016462e82822944ac05c4ac8627 Mon Sep 17 00:00:00 2001 From: Taha Ahmed Date: Sun, 16 Jan 2011 18:49:12 +0100 Subject: [PATCH] Contains my own R functions. First commit. --- common.R | 7 + xrdtf.R | 568 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 575 insertions(+) create mode 100755 common.R create mode 100755 xrdtf.R diff --git a/common.R b/common.R new file mode 100755 index 0000000..74b1909 --- /dev/null +++ b/common.R @@ -0,0 +1,7 @@ +as.radians <- function(degrees) { + radians <- degrees * (pi / 180) +} + +as.degrees <- function(radians) { + radians <- radians * (180 / pi) +} diff --git a/xrdtf.R b/xrdtf.R new file mode 100755 index 0000000..85ad2a6 --- /dev/null +++ b/xrdtf.R @@ -0,0 +1,568 @@ +# Collection of R functions for analysis of thin-film X-ray diffraction patterns +# Maintainer: Taha Ahmed + +# CONTENTS +# >>>> matchpdf +# >>>> scherrer +# >>>> pdf2df +# >>>> uxd2mtx +# >>>> uxd2df +# >>>> muxd2df +# >>>> muxd2mtx +# >>>> muxd2ls +# - REPAIR SHOP +# - print.xtable.booktabs +# - split.muxd +# - strip.ka2 +# - pearson.beta + + + + +################################################## +################## matchpdf ###################### +################################################## +matchpdf <- function(expcol, pdfrow) { + # Function for matching each element of two numeric vectors + # to the closest-lying element (numerically) in the other + # vector. For example for matching 2th values of PDF to + # 2th values of recorded diffractogram. + ## Args: two vectors to be matched (any special considerations on vector lengths?) + ## Values: A list of 5 numeric vectors. See description inside function. + # A word of caution. This function creates a matrix with dimensions pdfrow * expcol. + # Usually both of these vectors are rather short. But watch out if large vectors + # are submitted --- the looping below could make this function very slow. +# + diff.thth <- matrix(NA, length(pdfrow), length(expcol)) + diff.indx <- matrix(NA, length(pdfrow), length(expcol)) + + for (pdf.row in 1:length(pdfrow)) { + # For every row, calculate differences between that pdf value and all exp values + diff.thth[pdf.row, ] <- expcol - pdfrow[pdf.row] + } +# + # Now we go on to find the minimum along each row of diff.thth + # and set all other values to some arbitrary value + diff.rmin <- diff.thth + for (pdf.row in 1:dim(diff.thth)[1]) { + for (exp.col in 1:dim(diff.thth)[2]) { + if (abs(diff.thth[pdf.row, exp.col]) != min(abs(diff.thth[pdf.row, ]))) { + diff.rmin[pdf.row, exp.col] <- Inf + } + } + } +# + # We likewise find the minimum along each column and set any other, + # non-Inf values to Inf + diff.cmin <- diff.rmin + for (exp.col in 1:dim(diff.rmin)[2]) { + for (pdf.row in 1:dim(diff.rmin)[1]) { + if (abs(diff.rmin[pdf.row, exp.col]) != min(abs(diff.rmin[, exp.col]))) { + diff.cmin[pdf.row, exp.col] <- Inf + } + } + } +# + # The matrix now contains at most one non-Inf value per row and column. + # To make the use of colSums and rowSums possible (next step) all Inf are set zero, + # and all matches are set to 1. + for (pdf.row in 1:dim(diff.cmin)[1]) { + for (exp.col in 1:dim(diff.cmin)[2]) { + if ((diff.cmin[pdf.row, exp.col]) != Inf) { + diff.indx[pdf.row, exp.col] <- 1 + } else { + diff.indx[pdf.row, exp.col] <- 0 + } + } + } +# + # Attach error message to return list if sum(rowSums(diff.indx)) == sum(colSums(diff.indx)) + matchpdf.err.msg <-"matchpdf() failed to complete. It seems rowSums != colSums." + mtch <- matchpdf.err.msg +# + # Check that sum(rowSums(diff.indx)) == sum(colSums(diff.indx)) + if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) { + # Reset mtch + mtch <- list() + mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0]) + # List of 5 + # $ csums : num - consisting of ones and zeroes. Shows you which positions of expcol matched. + # $ rsums : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched. + # $ expthth : num - consisting of the matching elements of expcol. + # $ pdfthth : num - consisting of the matching elements of pdfrow. + # $ deltathth : num - element-wise difference of expcol and pdfrow. + } +# + return(mtch) +} + + + + +################################################## +################## scherrer ###################### +################################################## +scherrer <- function(integralbreadth, thth, wavelength = 1.54056, shapeconstant = ((4/3)*(pi/6))^(1/3)) { + # Function for calculating crystallite grain size from reflection data + # ARGS: integralbreadth - vector with integral breadth of reflections (in degrees) + # thth - vector with 2theta values of reflections (in degrees) + # wavelength - X-ray wavelength used (default 1.54056 Å, Cu Ka) + # shapeconstant - Scherrer constant (default spherical, ~0.9) + # VALUE: vector with size parameters + ## REQUIRES: as.radians(), source("/home/taha/chepec/chetex/common/R/common.R") + D <- (shapeconstant * wavelength) / (as.radians(integralbreadth) * cos(as.radians(thth))) + # cos() - angles must be in radians, not degrees! + return(D) #units of angstrom +} + + + + +################################################## +################### pdf2df ####################### +################################################## +pdf2df <- function(pdffile) { + # Function for extracting information from ICDD PDF XML-files + # For example the PDF files produced by the PDF database at Ångström's X-ray lab + # NOTE: sometimes intensity values are specified as less than some value. + # In those cases, this function simply strips the less-than character. + # ARGS: pdffile (complete path and filename to PDF file) + # VALUE: dataframe with 9 columns: + # thth angles (numeric), + # d (numeric), + # h index (numeric), + # k index (numeric), + # l index (numeric), + # hkl indices (factor of strings), + # hkl.TeX indices formatted for LaTeX (factor of strings), + # intensity (numeric), + # int.TeX intensity formatted for LaTeX (factor of strings) + # attr: This function sets the following attributes: + # ApplicationName, + # ApplicationVersion, + # pdfNumber, + # chemicalformula, + # wavelength + # + require(XML) + doc <- xmlTreeParse(pdffile) + pdf <- xmlRoot(doc) + rmchar <- "[^0-9]" + # + angles <- data.frame(NULL) + for (i in 1:length(pdf[["graphs"]][["stick_series"]])) { + angles <- rbind(angles, data.frame(# + thth = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["theta"]])), + d = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["da"]])), + h = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])), + k = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])), + l = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])), + hkl = paste(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]), + xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]), + xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]), sep = ""), + hkl.TeX = paste("\\mbox{$", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])) < 0, + paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]))), + "}", sep = ""), + xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])), + "\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])) < 0, + paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]))), + "}", sep = ""), + xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])), + "\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])) < 0, + paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]))), + "}", sep = ""), + xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])), + "$}", sep = "", collapse = ""), + intensity = as.numeric(gsub(rmchar, "", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]))), + int.TeX = paste("{", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]), "}", sep = "") + )) + } + # + attr(angles, "ApplicationName") <- xmlAttrs(pdf)[[1]] + attr(angles, "ApplicationVersion") <- xmlAttrs(pdf)[[2]] + attr(angles, "pdfNumber") <- xmlValue(pdf[["pdf_data"]][["pdf_number"]]) + attr(angles, "chemicalformula") <- gsub("[ ]", "", xmlValue(pdf[["pdf_data"]][["chemical_formula"]])) + attr(angles, "wavelength") <- as.numeric(xmlValue(pdf[["graphs"]][["wave_length"]])) + # Caution: Do not subset. Subsetting causes all attributes to be lost. + return(angles) +} + + + + +################################################## +################### uxd2mtx ###################### +################################################## +uxd2mtx <- function(uxdfile) { + # Function for reading UXD files + # Assumptions: data in two columns + # Args: uxdfile (filename with extension) + # Return value: matrix with two columns + + cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD + cdata <- "[0-9]" #regexpr matching one character of any digit + + # A new file (datafile) containing only data will be created, + # extension ".data" appended to uxdfile + #datafile <- paste(uxdfile,".data",sep="") + + ufile <- file(uxdfile, "r") + f <- readLines(ufile, n=-1) #read _all_ lines from UXD file + close(ufile) + + # This way we identify data rows by looking for numeric characters. + #wh <- regexpr("[0-9]", f) + # This way we identify header rows + # We assume that all other rows are data + wh <- regexpr(cchar, f) + + mh <- wh[1:length(wh)] # this gives you the corresponding index vector + # the value of each element corresponds to the position of the regexp match. + # value = 1 means the first character of the row is cchar (row is header) + # value =-1 means no cchar occur on the row (row is data) + + i <- seq(1, length(mh) - 1, 1) + j <- seq(2, length(mh), 1) + + starts <- which(mh[i] == 1 & mh[j] != 1) + 1 + ends <- length(mh) + f <- f[starts:ends] + + zz <- textConnection(f, "r") + ff <- matrix(scan(zz, what = numeric()), ncol=2, byrow=T) + close(zz) + + #zz <- file(datafile, "w") #open connection to datafile + #write.table(ff, file=datafile, row.names=F, sep=",") + #close(zz) + + # Return matrix + ff +} + + + + + +################################################## +#################### uxd2df ###################### +################################################## +uxd2df <- function(uxdfile) { + # Function for reading UXD files # Assumptions: data in two columns + # Args: uxdfile (filename with extension) + # Returns: dataframe with three columns + + cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD + cdata <- "[0-9]" #regexpr matching one character of any digit + + # A new file (datafile) containing only data will be created, + # extension ".data" appended to uxdfile + #datafile <- paste(uxdfile,".data",sep="") + + ufile <- file(uxdfile, "r") + f <- readLines(ufile, n=-1) #read _all_ lines from UXD file + close(ufile) + + # This way we identify data rows by looking for numeric characters. + #wh <- regexpr("[0-9]", f) + # This way we identify header rows + # We assume that all other rows are data + wh <- regexpr(cchar, f) + + mh <- wh[1:length(wh)] # this gives you the corresponding index vector + # the value of each element corresponds to the position of the regexp match. + # value = 1 means the first character of the row is cchar (row is header) + # value =-1 means no cchar occur on the row (row is data) + + i <- seq(1, length(mh) - 1, 1) + j <- seq(2, length(mh), 1) + + starts <- which(mh[i] == 1 & mh[j] != 1) + 1 + ends <- length(mh) + f <- f[starts:ends] + + zz <- textConnection(f, "r") + ff <- data.frame(uxdfile, matrix(scan(zz, + what = numeric()), ncol=2, byrow=T)) + names(ff) <- c("sampleid", "angle", "intensity") + close(zz) + + #zz <- file(datafile, "w") #open connection to datafile + #write.table(ff, file=datafile, row.names=F, sep=",") + #close(zz) + + # Return dataframe + ff +} + + + + +################################################## +################### muxd2df ###################### +################################################## +muxd2df <- function(uxdfile, range.descriptor) { + # Function that reads an UXD file which contains several ranges + # (created in a programmed run, for example) + # Arguments + # :: uxdfile (filename with extension) + # :: range.descriptor (an array with as many elements as + # there are ranges in the uxdfile) + # Returns: dataframe with 3 columns + + cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD + cdata <- "[0-9]" #regexpr matching one character of any digit + # Create filenames for the output # no longer used, return dataframe instead + #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="") + + # Read the input multirange file + ufile <- file(uxdfile, "r") + f <- readLines(ufile, n=-1) #read _all_ lines from UXD file + close(ufile) + + # This way we identify data rows by looking for numeric characters. + #wh <- regexpr("[0-9]", f) + # This way we identify header rows + # Later we will assume that all other rows are data + wh <- regexpr(cchar, f) + + mh <- wh[1:length(wh)] # this gives you the corresponding index vector + # the value of each element corresponds to the position of the regexp match. + # value = 1 means the first character of the row is cchar (row is header) + # value =-1 means no cchar occur on the row (row is data) + + #length(mh[mh == -1]) #total number of datarows in uxdfile + #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier) + + i <- seq(1, length(mh) - 1, 1) + j <- seq(2, length(mh), 1) + starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices + ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last + ends <- c(ends, length(mh)) #fixed the last index of ends + + ff <- data.frame(NULL) + for (s in 1:length(range.descriptor)) { + zz <- textConnection(f[starts[s]:ends[s]], "r") + ff <- rbind(ff, data.frame(range.descriptor[s], + matrix(scan(zz, what = numeric()), ncol=2, byrow=T))) + close(zz) + } + names(ff) <- c("sampleid", "angle", "intensity") + + # Return dataframe + ff +} + + + + +################################################## +################### muxd2mtx ##################### +################################################## +muxd2mtx <- function(uxdfile, range) { + # Function that reads an UXD file which contains several ranges + # (created in a programmed run, for example) + # Arguments + # :: uxdfile (filename with extension) + # :: range (an integer, the number of ranges in the uxdfile) + # Returns: matrix with 2 columns + + cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD + cdata <- "[0-9]" #regexpr matching one character of any digit + # Create filenames for the output # no longer used, return dataframe instead + #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="") + + # Read the input multirange file + ufile <- file(uxdfile, "r") + f <- readLines(ufile, n=-1) #read _all_ lines from UXD file + close(ufile) + + # We identify header rows. We will assume that all other rows are data + wh <- regexpr(cchar, f) + # length(wh) equals length(f) + # wh has either 1 or -1 for each element + # value = 1 means the first character of that row is cchar (row is header) + # value =-1 means absence of cchar on that row (row is data) + + # Since wh contains some attributes (given by regexpr function), we strip out everything + # but the index vector and assign it to the new vector mh + mh <- wh[1:length(wh)] # this gives you the corresponding index vector + + #length(mh[mh == -1]) #total number of datarows in uxdfile + #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier) + + # Set counters i and j used in assignment below + i <- seq(1, length(mh) - 1, 1) + j <- seq(2, length(mh), 1) + + starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices + ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last + ends <- c(ends, length(mh)) #fixes the last index of ends + # note that length of starts (or ends) gives the number of ranges + + ff <- matrix(NA, length(f), 2) + for (s in 1:range) { + zz <- textConnection(f[starts[s]:ends[s]], "r") + ff <- rbind(ff, matrix(scan(zz, what = numeric()), ncol=2, byrow=T)) + close(zz) + } + + # Clean up matrix: remove extra rows + ff[apply(ff,1,function(x)any(!is.na(x))), ] + + # Return matrix + ff +} + + + +################################################## +################### muxd2ls ###################### +################################################## +muxd2ls <- function(uxdfile) { + # Function that reads an UXD file which contains several ranges + # (created in a programmed run, for example) + # Arguments + # :: uxdfile (filename with extension) + # Returns: List of matrices, as many as there were ranges + # Requires: ?? + # See extensive comments in muxd2mtx() + + cchar <- "[;_]" #comment characters used in Bruker's UXD + cdata <- "[0-9]" + + ufile <- file(uxdfile, "r") + f <- readLines(ufile, n=-1) #read _all_ lines from UXD file + close(ufile) + + wh <- regexpr(cchar, f) + mh <- wh[1:length(wh)] + + i <- seq(1, length(mh) - 1, 1) + j <- seq(2, length(mh), 1) + starts <- which(mh[i] == 1 & mh[j] != 1) + 1 + ends <- which(mh[i] != 1 & mh[j] == 1) + ends <- c(ends, length(mh)) + + ff <- list() + for (s in 1:length(starts)) { + zz <- textConnection(f[starts[s]:ends[s]], "r") + ms <- matrix(scan(zz, what = numeric()), ncol = 2, byrow = T) + close(zz) + ff[[s]] <- ms + } + # Return list of matrices + return(ff) +} + + + + + + + + +# -------- ##################### -------- # +# -------- #### REPAIR SHOP #### -------- # +# -------- ##################### -------- # + + +################################################## +################ pearson.beta #################### +################################################## +pearson.beta <- function(m, w) { + # Function for calculating integral breadth, \beta, of + # an X-ray reflection using the half-width of FWHM (w) + # and m parameter from a Pearson fit. + ## Args: m - shape parameter from Pearson fit (numeric) + ## w - half the FWHM for the reflection (numeric) + ## **** NOTE: gamma(x) out of range for x > 171 + ## Values: The integral breadth of the reflection (numeric) + # Ref: Birkholz2006, page 96 (Eq. 3.5) + beta <- ((pi * 2^(2 * (1 - m)) * gamma(2 * m - 1)) / + ((2^(1 / m) - 1) * (gamma(m))^2)) * w +} + + + +################################################## +################## strip.ka2 ##################### +################################################## +strip.ka <- function(angle, intensity) { + # Function that strips (removes) the Cu Ka2 contribution + # from an experimental diffractogram (2Th, I(2Th)) + # using the Rachinger algorithm. + ## Args: 2theta vector + ## intensity vector + ## Values: intensity vector + # Ref: Birkholz2006, page 99 (Eq. 3.12), and page 31 + Cu.Kai <- 0.154059 # nanometre + Cu.Kaii <- 0.154441 # nanometre + alpha.int.ratio <- 0.5 + int.stripped <- intensity - alpha.int.ratio*intensity + return(int.stripped) +} + + + + +print.xtable.booktabs <- function(x){ + # Make xtable print booktabs tables + # ARGS: x (matrix) + require(xtable) + print(xtable(x), floating=F, hline.after=NULL, + add.to.row=list(pos=list(-1,0, nrow(x)), + command=c("\\toprule ", "\\midrule ", "\\bottomrule "))) +} + + +split.muxd <- function(uxdfile, range.descriptor) { + # Fix this some other day!! + # Function that reads an UXD file which contains several ranges + # (created in a programmed run, for example) and splits it into + # its ranges and writes to that number of files + # Arguments + # :: uxdfile (filename with extension) + # :: range.descriptor (an array with as many elements as + # there are ranges in the uxdfile) + # Returns: nothing. Writes files to HDD. + + cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD + cdata <- "[0-9]" #regexpr matching one character of any digit + # Create filenames for the output # no longer used, return dataframe instead + datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="") + + # Read the input multirange file + f <- readLines(uxdfile, n=-1) + + + # This way we identify data rows by looking for numeric characters. + #wh <- regexpr("[0-9]", f) + # This way we identify header rows + # Later we will assume that all other rows are data + wh <- regexpr(cchar, f) + + mh <- wh[1:length(wh)] # this gives you the corresponding index vector + # the value of each element corresponds to the position of the regexp match. + # value = 1 means the first character of the row is cchar (row is header) + # value =-1 means no cchar occur on the row (row is data) + + #length(mh[mh == -1]) #total number of datarows in uxdfile + #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier) + + i <- seq(1, length(mh) - 1, 1) + j <- seq(2, length(mh), 1) + starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices + ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last + ends <- c(ends, length(mh)) #fixed the last index of ends + + #ff <- data.frame(NULL) + for (s in 1:length(range.descriptor)) { + matrix(scan(file=textConnection(f[starts[s]:ends[s]]), + what = numeric()), ncol=2, byrow=T) + } + names(ff) <- c("sampleid", "angle", "intensity") + + # Return dataframe + ff +}