# Collection of R functions for analysis of thin-film X-ray diffraction patterns # Maintainer: Taha Ahmed # CONTENTS # >>>> matchpdf # >>>> scherrer # >>>> pdf2df # >>>> uxd2mtx # >>>> uxd2df # >>>> muxd2df # >>>> muxd2mtx # >>>> muxd2ls # - REPAIR SHOP # - print.xtable.booktabs # - split.muxd # - strip.ka2 # - pearson.beta ################################################## ################## matchpdf ###################### ################################################## matchpdf <- function(expcol, pdfrow) { # Function for matching each element of two numeric vectors # to the closest-lying element (numerically) in the other # vector. For example for matching 2th values of PDF to # 2th values of recorded diffractogram. ## Args: two vectors to be matched (any special considerations on vector lengths?) ## Values: A list of 5 numeric vectors. See description inside function. # A word of caution. This function creates a matrix with dimensions pdfrow * expcol. # Usually both of these vectors are rather short. But watch out if large vectors # are submitted --- the looping below could make this function very slow. # diff.thth <- matrix(NA, length(pdfrow), length(expcol)) diff.indx <- matrix(NA, length(pdfrow), length(expcol)) for (pdf.row in 1:length(pdfrow)) { # For every row, calculate differences between that pdf value and all exp values diff.thth[pdf.row, ] <- expcol - pdfrow[pdf.row] } # # Now we go on to find the minimum along each row of diff.thth # and set all other values to some arbitrary value diff.rmin <- diff.thth for (pdf.row in 1:dim(diff.thth)[1]) { for (exp.col in 1:dim(diff.thth)[2]) { if (abs(diff.thth[pdf.row, exp.col]) != min(abs(diff.thth[pdf.row, ]))) { diff.rmin[pdf.row, exp.col] <- Inf } } } # # We likewise find the minimum along each column and set any other, # non-Inf values to Inf diff.cmin <- diff.rmin for (exp.col in 1:dim(diff.rmin)[2]) { for (pdf.row in 1:dim(diff.rmin)[1]) { if (abs(diff.rmin[pdf.row, exp.col]) != min(abs(diff.rmin[, exp.col]))) { diff.cmin[pdf.row, exp.col] <- Inf } } } # # The matrix now contains at most one non-Inf value per row and column. # To make the use of colSums and rowSums possible (next step) all Inf are set zero, # and all matches are set to 1. for (pdf.row in 1:dim(diff.cmin)[1]) { for (exp.col in 1:dim(diff.cmin)[2]) { if ((diff.cmin[pdf.row, exp.col]) != Inf) { diff.indx[pdf.row, exp.col] <- 1 } else { diff.indx[pdf.row, exp.col] <- 0 } } } # # Attach error message to return list if sum(rowSums(diff.indx)) == sum(colSums(diff.indx)) matchpdf.err.msg <-"matchpdf() failed to complete. It seems rowSums != colSums." mtch <- matchpdf.err.msg # # Check that sum(rowSums(diff.indx)) == sum(colSums(diff.indx)) if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) { # Reset mtch mtch <- list() mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0]) # List of 5 # $ csums : num - consisting of ones and zeroes. Shows you which positions of expcol matched. # $ rsums : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched. # $ expthth : num - consisting of the matching elements of expcol. # $ pdfthth : num - consisting of the matching elements of pdfrow. # $ deltathth : num - element-wise difference of expcol and pdfrow. } # return(mtch) } ################################################## ################## scherrer ###################### ################################################## scherrer <- function(integralbreadth, thth, wavelength = 1.54056, shapeconstant = ((4/3)*(pi/6))^(1/3)) { # Function for calculating crystallite grain size from reflection data # ARGS: integralbreadth - vector with integral breadth of reflections (in degrees) # thth - vector with 2theta values of reflections (in degrees) # wavelength - X-ray wavelength used (default 1.54056 Å, Cu Ka) # shapeconstant - Scherrer constant (default spherical, ~0.9) # VALUE: vector with size parameters ## REQUIRES: as.radians(), source("/home/taha/chepec/chetex/common/R/common.R") D <- (shapeconstant * wavelength) / (as.radians(integralbreadth) * cos(as.radians(thth))) # cos() - angles must be in radians, not degrees! return(D) #units of angstrom } ################################################## ################### pdf2df ####################### ################################################## pdf2df <- function(pdffile) { # Function for extracting information from ICDD PDF XML-files # For example the PDF files produced by the PDF database at Ångström's X-ray lab # NOTE: sometimes intensity values are specified as less than some value. # In those cases, this function simply strips the less-than character. # ARGS: pdffile (complete path and filename to PDF file) # VALUE: dataframe with 9 columns: # thth angles (numeric), # d (numeric), # h index (numeric), # k index (numeric), # l index (numeric), # hkl indices (factor of strings), # hkl.TeX indices formatted for LaTeX (factor of strings), # intensity (numeric), # int.TeX intensity formatted for LaTeX (factor of strings) # attr: This function sets the following attributes: # ApplicationName, # ApplicationVersion, # pdfNumber, # chemicalformula, # wavelength # require(XML) doc <- xmlTreeParse(pdffile) pdf <- xmlRoot(doc) rmchar <- "[^0-9]" # angles <- data.frame(NULL) for (i in 1:length(pdf[["graphs"]][["stick_series"]])) { angles <- rbind(angles, data.frame(# thth = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["theta"]])), d = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["da"]])), h = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])), k = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])), l = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])), hkl = paste(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]), xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]), xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]), sep = ""), hkl.TeX = paste("\\mbox{$", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])) < 0, paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]))), "}", sep = ""), xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])), "\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])) < 0, paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]))), "}", sep = ""), xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])), "\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])) < 0, paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]))), "}", sep = ""), xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])), "$}", sep = "", collapse = ""), intensity = as.numeric(gsub(rmchar, "", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]))), int.TeX = paste("{", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]), "}", sep = "") )) } # attr(angles, "ApplicationName") <- xmlAttrs(pdf)[[1]] attr(angles, "ApplicationVersion") <- xmlAttrs(pdf)[[2]] attr(angles, "pdfNumber") <- xmlValue(pdf[["pdf_data"]][["pdf_number"]]) attr(angles, "chemicalformula") <- gsub("[ ]", "", xmlValue(pdf[["pdf_data"]][["chemical_formula"]])) attr(angles, "wavelength") <- as.numeric(xmlValue(pdf[["graphs"]][["wave_length"]])) # Caution: Do not subset. Subsetting causes all attributes to be lost. return(angles) } ################################################## ################### uxd2mtx ###################### ################################################## uxd2mtx <- function(uxdfile) { # Function for reading UXD files # Assumptions: data in two columns # Args: uxdfile (filename with extension) # Return value: matrix with two columns cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD cdata <- "[0-9]" #regexpr matching one character of any digit # A new file (datafile) containing only data will be created, # extension ".data" appended to uxdfile #datafile <- paste(uxdfile,".data",sep="") ufile <- file(uxdfile, "r") f <- readLines(ufile, n=-1) #read _all_ lines from UXD file close(ufile) # This way we identify data rows by looking for numeric characters. #wh <- regexpr("[0-9]", f) # This way we identify header rows # We assume that all other rows are data wh <- regexpr(cchar, f) mh <- wh[1:length(wh)] # this gives you the corresponding index vector # the value of each element corresponds to the position of the regexp match. # value = 1 means the first character of the row is cchar (row is header) # value =-1 means no cchar occur on the row (row is data) i <- seq(1, length(mh) - 1, 1) j <- seq(2, length(mh), 1) starts <- which(mh[i] == 1 & mh[j] != 1) + 1 ends <- length(mh) f <- f[starts:ends] zz <- textConnection(f, "r") ff <- matrix(scan(zz, what = numeric()), ncol=2, byrow=T) close(zz) #zz <- file(datafile, "w") #open connection to datafile #write.table(ff, file=datafile, row.names=F, sep=",") #close(zz) # Return matrix ff } ################################################## #################### uxd2df ###################### ################################################## uxd2df <- function(uxdfile) { # Function for reading UXD files # Assumptions: data in two columns # Args: uxdfile (filename with extension) # Returns: dataframe with three columns cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD cdata <- "[0-9]" #regexpr matching one character of any digit # A new file (datafile) containing only data will be created, # extension ".data" appended to uxdfile #datafile <- paste(uxdfile,".data",sep="") ufile <- file(uxdfile, "r") f <- readLines(ufile, n=-1) #read _all_ lines from UXD file close(ufile) # This way we identify data rows by looking for numeric characters. #wh <- regexpr("[0-9]", f) # This way we identify header rows # We assume that all other rows are data wh <- regexpr(cchar, f) mh <- wh[1:length(wh)] # this gives you the corresponding index vector # the value of each element corresponds to the position of the regexp match. # value = 1 means the first character of the row is cchar (row is header) # value =-1 means no cchar occur on the row (row is data) i <- seq(1, length(mh) - 1, 1) j <- seq(2, length(mh), 1) starts <- which(mh[i] == 1 & mh[j] != 1) + 1 ends <- length(mh) f <- f[starts:ends] zz <- textConnection(f, "r") ff <- data.frame(uxdfile, matrix(scan(zz, what = numeric()), ncol=2, byrow=T)) names(ff) <- c("sampleid", "angle", "intensity") close(zz) #zz <- file(datafile, "w") #open connection to datafile #write.table(ff, file=datafile, row.names=F, sep=",") #close(zz) # Return dataframe ff } ################################################## ################### muxd2df ###################### ################################################## muxd2df <- function(uxdfile, range.descriptor) { # Function that reads an UXD file which contains several ranges # (created in a programmed run, for example) # Arguments # :: uxdfile (filename with extension) # :: range.descriptor (an array with as many elements as # there are ranges in the uxdfile) # Returns: dataframe with 3 columns cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD cdata <- "[0-9]" #regexpr matching one character of any digit # Create filenames for the output # no longer used, return dataframe instead #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="") # Read the input multirange file ufile <- file(uxdfile, "r") f <- readLines(ufile, n=-1) #read _all_ lines from UXD file close(ufile) # This way we identify data rows by looking for numeric characters. #wh <- regexpr("[0-9]", f) # This way we identify header rows # Later we will assume that all other rows are data wh <- regexpr(cchar, f) mh <- wh[1:length(wh)] # this gives you the corresponding index vector # the value of each element corresponds to the position of the regexp match. # value = 1 means the first character of the row is cchar (row is header) # value =-1 means no cchar occur on the row (row is data) #length(mh[mh == -1]) #total number of datarows in uxdfile #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier) i <- seq(1, length(mh) - 1, 1) j <- seq(2, length(mh), 1) starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last ends <- c(ends, length(mh)) #fixed the last index of ends ff <- data.frame(NULL) for (s in 1:length(range.descriptor)) { zz <- textConnection(f[starts[s]:ends[s]], "r") ff <- rbind(ff, data.frame(range.descriptor[s], matrix(scan(zz, what = numeric()), ncol=2, byrow=T))) close(zz) } names(ff) <- c("sampleid", "angle", "intensity") # Return dataframe ff } ################################################## ################### muxd2mtx ##################### ################################################## muxd2mtx <- function(uxdfile, range) { # Function that reads an UXD file which contains several ranges # (created in a programmed run, for example) # Arguments # :: uxdfile (filename with extension) # :: range (an integer, the number of ranges in the uxdfile) # Returns: matrix with 2 columns cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD cdata <- "[0-9]" #regexpr matching one character of any digit # Create filenames for the output # no longer used, return dataframe instead #datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="") # Read the input multirange file ufile <- file(uxdfile, "r") f <- readLines(ufile, n=-1) #read _all_ lines from UXD file close(ufile) # We identify header rows. We will assume that all other rows are data wh <- regexpr(cchar, f) # length(wh) equals length(f) # wh has either 1 or -1 for each element # value = 1 means the first character of that row is cchar (row is header) # value =-1 means absence of cchar on that row (row is data) # Since wh contains some attributes (given by regexpr function), we strip out everything # but the index vector and assign it to the new vector mh mh <- wh[1:length(wh)] # this gives you the corresponding index vector #length(mh[mh == -1]) #total number of datarows in uxdfile #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier) # Set counters i and j used in assignment below i <- seq(1, length(mh) - 1, 1) j <- seq(2, length(mh), 1) starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last ends <- c(ends, length(mh)) #fixes the last index of ends # note that length of starts (or ends) gives the number of ranges ff <- matrix(NA, length(f), 2) for (s in 1:range) { zz <- textConnection(f[starts[s]:ends[s]], "r") ff <- rbind(ff, matrix(scan(zz, what = numeric()), ncol=2, byrow=T)) close(zz) } # Clean up matrix: remove extra rows ff[apply(ff,1,function(x)any(!is.na(x))), ] # Return matrix ff } ################################################## ################### muxd2ls ###################### ################################################## muxd2ls <- function(uxdfile) { # Function that reads an UXD file which contains several ranges # (created in a programmed run, for example) # Arguments # :: uxdfile (filename with extension) # Returns: List of matrices, as many as there were ranges # Requires: ?? # See extensive comments in muxd2mtx() cchar <- "[;_]" #comment characters used in Bruker's UXD cdata <- "[0-9]" ufile <- file(uxdfile, "r") f <- readLines(ufile, n=-1) #read _all_ lines from UXD file close(ufile) wh <- regexpr(cchar, f) mh <- wh[1:length(wh)] i <- seq(1, length(mh) - 1, 1) j <- seq(2, length(mh), 1) starts <- which(mh[i] == 1 & mh[j] != 1) + 1 ends <- which(mh[i] != 1 & mh[j] == 1) ends <- c(ends, length(mh)) ff <- list() for (s in 1:length(starts)) { zz <- textConnection(f[starts[s]:ends[s]], "r") ms <- matrix(scan(zz, what = numeric()), ncol = 2, byrow = T) close(zz) ff[[s]] <- ms } # Return list of matrices return(ff) } # -------- ##################### -------- # # -------- #### REPAIR SHOP #### -------- # # -------- ##################### -------- # ################################################## ################ pearson.beta #################### ################################################## pearson.beta <- function(m, w) { # Function for calculating integral breadth, \beta, of # an X-ray reflection using the half-width of FWHM (w) # and m parameter from a Pearson fit. ## Args: m - shape parameter from Pearson fit (numeric) ## w - half the FWHM for the reflection (numeric) ## **** NOTE: gamma(x) out of range for x > 171 ## Values: The integral breadth of the reflection (numeric) # Ref: Birkholz2006, page 96 (Eq. 3.5) beta <- ((pi * 2^(2 * (1 - m)) * gamma(2 * m - 1)) / ((2^(1 / m) - 1) * (gamma(m))^2)) * w } ################################################## ################## strip.ka2 ##################### ################################################## strip.ka <- function(angle, intensity) { # Function that strips (removes) the Cu Ka2 contribution # from an experimental diffractogram (2Th, I(2Th)) # using the Rachinger algorithm. ## Args: 2theta vector ## intensity vector ## Values: intensity vector # Ref: Birkholz2006, page 99 (Eq. 3.12), and page 31 Cu.Kai <- 0.154059 # nanometre Cu.Kaii <- 0.154441 # nanometre alpha.int.ratio <- 0.5 int.stripped <- intensity - alpha.int.ratio*intensity return(int.stripped) } print.xtable.booktabs <- function(x){ # Make xtable print booktabs tables # ARGS: x (matrix) require(xtable) print(xtable(x), floating=F, hline.after=NULL, add.to.row=list(pos=list(-1,0, nrow(x)), command=c("\\toprule ", "\\midrule ", "\\bottomrule "))) } split.muxd <- function(uxdfile, range.descriptor) { # Fix this some other day!! # Function that reads an UXD file which contains several ranges # (created in a programmed run, for example) and splits it into # its ranges and writes to that number of files # Arguments # :: uxdfile (filename with extension) # :: range.descriptor (an array with as many elements as # there are ranges in the uxdfile) # Returns: nothing. Writes files to HDD. cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD cdata <- "[0-9]" #regexpr matching one character of any digit # Create filenames for the output # no longer used, return dataframe instead datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="") # Read the input multirange file f <- readLines(uxdfile, n=-1) # This way we identify data rows by looking for numeric characters. #wh <- regexpr("[0-9]", f) # This way we identify header rows # Later we will assume that all other rows are data wh <- regexpr(cchar, f) mh <- wh[1:length(wh)] # this gives you the corresponding index vector # the value of each element corresponds to the position of the regexp match. # value = 1 means the first character of the row is cchar (row is header) # value =-1 means no cchar occur on the row (row is data) #length(mh[mh == -1]) #total number of datarows in uxdfile #mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier) i <- seq(1, length(mh) - 1, 1) j <- seq(2, length(mh), 1) starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last ends <- c(ends, length(mh)) #fixed the last index of ends #ff <- data.frame(NULL) for (s in 1:length(range.descriptor)) { matrix(scan(file=textConnection(f[starts[s]:ends[s]]), what = numeric()), ncol=2, byrow=T) } names(ff) <- c("sampleid", "angle", "intensity") # Return dataframe ff }