Contains my own R functions. First commit.

master
Taha Ahmed 14 years ago
commit 996a79787c

@ -0,0 +1,7 @@
as.radians <- function(degrees) {
radians <- degrees * (pi / 180)
}
as.degrees <- function(radians) {
radians <- radians * (180 / pi)
}

@ -0,0 +1,568 @@
# Collection of R functions for analysis of thin-film X-ray diffraction patterns
# Maintainer: Taha Ahmed
# CONTENTS
# >>>> matchpdf
# >>>> scherrer
# >>>> pdf2df
# >>>> uxd2mtx
# >>>> uxd2df
# >>>> muxd2df
# >>>> muxd2mtx
# >>>> muxd2ls
# - REPAIR SHOP
# - print.xtable.booktabs
# - split.muxd
# - strip.ka2
# - pearson.beta
##################################################
################## matchpdf ######################
##################################################
matchpdf <- function(expcol, pdfrow) {
# Function for matching each element of two numeric vectors
# to the closest-lying element (numerically) in the other
# vector. For example for matching 2th values of PDF to
# 2th values of recorded diffractogram.
## Args: two vectors to be matched (any special considerations on vector lengths?)
## Values: A list of 5 numeric vectors. See description inside function.
# A word of caution. This function creates a matrix with dimensions pdfrow * expcol.
# Usually both of these vectors are rather short. But watch out if large vectors
# are submitted --- the looping below could make this function very slow.
#
diff.thth <- matrix(NA, length(pdfrow), length(expcol))
diff.indx <- matrix(NA, length(pdfrow), length(expcol))
for (pdf.row in 1:length(pdfrow)) {
# For every row, calculate differences between that pdf value and all exp values
diff.thth[pdf.row, ] <- expcol - pdfrow[pdf.row]
}
#
# Now we go on to find the minimum along each row of diff.thth
# and set all other values to some arbitrary value
diff.rmin <- diff.thth
for (pdf.row in 1:dim(diff.thth)[1]) {
for (exp.col in 1:dim(diff.thth)[2]) {
if (abs(diff.thth[pdf.row, exp.col]) != min(abs(diff.thth[pdf.row, ]))) {
diff.rmin[pdf.row, exp.col] <- Inf
}
}
}
#
# We likewise find the minimum along each column and set any other,
# non-Inf values to Inf
diff.cmin <- diff.rmin
for (exp.col in 1:dim(diff.rmin)[2]) {
for (pdf.row in 1:dim(diff.rmin)[1]) {
if (abs(diff.rmin[pdf.row, exp.col]) != min(abs(diff.rmin[, exp.col]))) {
diff.cmin[pdf.row, exp.col] <- Inf
}
}
}
#
# The matrix now contains at most one non-Inf value per row and column.
# To make the use of colSums and rowSums possible (next step) all Inf are set zero,
# and all matches are set to 1.
for (pdf.row in 1:dim(diff.cmin)[1]) {
for (exp.col in 1:dim(diff.cmin)[2]) {
if ((diff.cmin[pdf.row, exp.col]) != Inf) {
diff.indx[pdf.row, exp.col] <- 1
} else {
diff.indx[pdf.row, exp.col] <- 0
}
}
}
#
# Attach error message to return list if sum(rowSums(diff.indx)) == sum(colSums(diff.indx))
matchpdf.err.msg <-"matchpdf() failed to complete. It seems rowSums != colSums."
mtch <- matchpdf.err.msg
#
# Check that sum(rowSums(diff.indx)) == sum(colSums(diff.indx))
if (sum(rowSums(diff.indx)) == sum(colSums(diff.indx))) {
# Reset mtch
mtch <- list()
mtch <- list(csums = colSums(diff.indx), rsums = rowSums(diff.indx), expthth = expcol[colSums(diff.indx) != 0], pdfthth = pdfrow[rowSums(diff.indx) != 0], deltathth = expcol[colSums(diff.indx) != 0] - pdfrow[rowSums(diff.indx) != 0])
# List of 5
# $ csums : num - consisting of ones and zeroes. Shows you which positions of expcol matched.
# $ rsums : num - consisting of ones and zeroes. Shows you which positions of pdfrow matched.
# $ expthth : num - consisting of the matching elements of expcol.
# $ pdfthth : num - consisting of the matching elements of pdfrow.
# $ deltathth : num - element-wise difference of expcol and pdfrow.
}
#
return(mtch)
}
##################################################
################## scherrer ######################
##################################################
scherrer <- function(integralbreadth, thth, wavelength = 1.54056, shapeconstant = ((4/3)*(pi/6))^(1/3)) {
# Function for calculating crystallite grain size from reflection data
# ARGS: integralbreadth - vector with integral breadth of reflections (in degrees)
# thth - vector with 2theta values of reflections (in degrees)
# wavelength - X-ray wavelength used (default 1.54056 Å, Cu Ka)
# shapeconstant - Scherrer constant (default spherical, ~0.9)
# VALUE: vector with size parameters
## REQUIRES: as.radians(), source("/home/taha/chepec/chetex/common/R/common.R")
D <- (shapeconstant * wavelength) / (as.radians(integralbreadth) * cos(as.radians(thth)))
# cos() - angles must be in radians, not degrees!
return(D) #units of angstrom
}
##################################################
################### pdf2df #######################
##################################################
pdf2df <- function(pdffile) {
# Function for extracting information from ICDD PDF XML-files
# For example the PDF files produced by the PDF database at Ångström's X-ray lab
# NOTE: sometimes intensity values are specified as less than some value.
# In those cases, this function simply strips the less-than character.
# ARGS: pdffile (complete path and filename to PDF file)
# VALUE: dataframe with 9 columns:
# thth angles (numeric),
# d (numeric),
# h index (numeric),
# k index (numeric),
# l index (numeric),
# hkl indices (factor of strings),
# hkl.TeX indices formatted for LaTeX (factor of strings),
# intensity (numeric),
# int.TeX intensity formatted for LaTeX (factor of strings)
# attr: This function sets the following attributes:
# ApplicationName,
# ApplicationVersion,
# pdfNumber,
# chemicalformula,
# wavelength
#
require(XML)
doc <- xmlTreeParse(pdffile)
pdf <- xmlRoot(doc)
rmchar <- "[^0-9]"
#
angles <- data.frame(NULL)
for (i in 1:length(pdf[["graphs"]][["stick_series"]])) {
angles <- rbind(angles, data.frame(#
thth = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["theta"]])),
d = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["da"]])),
h = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])),
k = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])),
l = as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])),
hkl = paste(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]),
xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]),
xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]), sep = ""),
hkl.TeX = paste("\\mbox{$", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])) < 0,
paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]]))),
"}", sep = ""),
xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["h"]])),
"\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])) < 0,
paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]]))),
"}", sep = ""),
xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["k"]])),
"\\,", ifelse(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])) < 0,
paste("\\bar{", abs(as.numeric(xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]]))),
"}", sep = ""),
xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["l"]])),
"$}", sep = "", collapse = ""),
intensity = as.numeric(gsub(rmchar, "", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]))),
int.TeX = paste("{", xmlValue(pdf[["graphs"]][["stick_series"]][[i]][["intensity"]]), "}", sep = "")
))
}
#
attr(angles, "ApplicationName") <- xmlAttrs(pdf)[[1]]
attr(angles, "ApplicationVersion") <- xmlAttrs(pdf)[[2]]
attr(angles, "pdfNumber") <- xmlValue(pdf[["pdf_data"]][["pdf_number"]])
attr(angles, "chemicalformula") <- gsub("[ ]", "", xmlValue(pdf[["pdf_data"]][["chemical_formula"]]))
attr(angles, "wavelength") <- as.numeric(xmlValue(pdf[["graphs"]][["wave_length"]]))
# Caution: Do not subset. Subsetting causes all attributes to be lost.
return(angles)
}
##################################################
################### uxd2mtx ######################
##################################################
uxd2mtx <- function(uxdfile) {
# Function for reading UXD files
# Assumptions: data in two columns
# Args: uxdfile (filename with extension)
# Return value: matrix with two columns
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
cdata <- "[0-9]" #regexpr matching one character of any digit
# A new file (datafile) containing only data will be created,
# extension ".data" appended to uxdfile
#datafile <- paste(uxdfile,".data",sep="")
ufile <- file(uxdfile, "r")
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
close(ufile)
# This way we identify data rows by looking for numeric characters.
#wh <- regexpr("[0-9]", f)
# This way we identify header rows
# We assume that all other rows are data
wh <- regexpr(cchar, f)
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
# the value of each element corresponds to the position of the regexp match.
# value = 1 means the first character of the row is cchar (row is header)
# value =-1 means no cchar occur on the row (row is data)
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1
ends <- length(mh)
f <- f[starts:ends]
zz <- textConnection(f, "r")
ff <- matrix(scan(zz, what = numeric()), ncol=2, byrow=T)
close(zz)
#zz <- file(datafile, "w") #open connection to datafile
#write.table(ff, file=datafile, row.names=F, sep=",")
#close(zz)
# Return matrix
ff
}
##################################################
#################### uxd2df ######################
##################################################
uxd2df <- function(uxdfile) {
# Function for reading UXD files # Assumptions: data in two columns
# Args: uxdfile (filename with extension)
# Returns: dataframe with three columns
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
cdata <- "[0-9]" #regexpr matching one character of any digit
# A new file (datafile) containing only data will be created,
# extension ".data" appended to uxdfile
#datafile <- paste(uxdfile,".data",sep="")
ufile <- file(uxdfile, "r")
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
close(ufile)
# This way we identify data rows by looking for numeric characters.
#wh <- regexpr("[0-9]", f)
# This way we identify header rows
# We assume that all other rows are data
wh <- regexpr(cchar, f)
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
# the value of each element corresponds to the position of the regexp match.
# value = 1 means the first character of the row is cchar (row is header)
# value =-1 means no cchar occur on the row (row is data)
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1
ends <- length(mh)
f <- f[starts:ends]
zz <- textConnection(f, "r")
ff <- data.frame(uxdfile, matrix(scan(zz,
what = numeric()), ncol=2, byrow=T))
names(ff) <- c("sampleid", "angle", "intensity")
close(zz)
#zz <- file(datafile, "w") #open connection to datafile
#write.table(ff, file=datafile, row.names=F, sep=",")
#close(zz)
# Return dataframe
ff
}
##################################################
################### muxd2df ######################
##################################################
muxd2df <- function(uxdfile, range.descriptor) {
# Function that reads an UXD file which contains several ranges
# (created in a programmed run, for example)
# Arguments
# :: uxdfile (filename with extension)
# :: range.descriptor (an array with as many elements as
# there are ranges in the uxdfile)
# Returns: dataframe with 3 columns
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
cdata <- "[0-9]" #regexpr matching one character of any digit
# Create filenames for the output # no longer used, return dataframe instead
#datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
# Read the input multirange file
ufile <- file(uxdfile, "r")
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
close(ufile)
# This way we identify data rows by looking for numeric characters.
#wh <- regexpr("[0-9]", f)
# This way we identify header rows
# Later we will assume that all other rows are data
wh <- regexpr(cchar, f)
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
# the value of each element corresponds to the position of the regexp match.
# value = 1 means the first character of the row is cchar (row is header)
# value =-1 means no cchar occur on the row (row is data)
#length(mh[mh == -1]) #total number of datarows in uxdfile
#mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
ends <- c(ends, length(mh)) #fixed the last index of ends
ff <- data.frame(NULL)
for (s in 1:length(range.descriptor)) {
zz <- textConnection(f[starts[s]:ends[s]], "r")
ff <- rbind(ff, data.frame(range.descriptor[s],
matrix(scan(zz, what = numeric()), ncol=2, byrow=T)))
close(zz)
}
names(ff) <- c("sampleid", "angle", "intensity")
# Return dataframe
ff
}
##################################################
################### muxd2mtx #####################
##################################################
muxd2mtx <- function(uxdfile, range) {
# Function that reads an UXD file which contains several ranges
# (created in a programmed run, for example)
# Arguments
# :: uxdfile (filename with extension)
# :: range (an integer, the number of ranges in the uxdfile)
# Returns: matrix with 2 columns
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
cdata <- "[0-9]" #regexpr matching one character of any digit
# Create filenames for the output # no longer used, return dataframe instead
#datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
# Read the input multirange file
ufile <- file(uxdfile, "r")
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
close(ufile)
# We identify header rows. We will assume that all other rows are data
wh <- regexpr(cchar, f)
# length(wh) equals length(f)
# wh has either 1 or -1 for each element
# value = 1 means the first character of that row is cchar (row is header)
# value =-1 means absence of cchar on that row (row is data)
# Since wh contains some attributes (given by regexpr function), we strip out everything
# but the index vector and assign it to the new vector mh
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
#length(mh[mh == -1]) #total number of datarows in uxdfile
#mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
# Set counters i and j used in assignment below
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
ends <- c(ends, length(mh)) #fixes the last index of ends
# note that length of starts (or ends) gives the number of ranges
ff <- matrix(NA, length(f), 2)
for (s in 1:range) {
zz <- textConnection(f[starts[s]:ends[s]], "r")
ff <- rbind(ff, matrix(scan(zz, what = numeric()), ncol=2, byrow=T))
close(zz)
}
# Clean up matrix: remove extra rows
ff[apply(ff,1,function(x)any(!is.na(x))), ]
# Return matrix
ff
}
##################################################
################### muxd2ls ######################
##################################################
muxd2ls <- function(uxdfile) {
# Function that reads an UXD file which contains several ranges
# (created in a programmed run, for example)
# Arguments
# :: uxdfile (filename with extension)
# Returns: List of matrices, as many as there were ranges
# Requires: ??
# See extensive comments in muxd2mtx()
cchar <- "[;_]" #comment characters used in Bruker's UXD
cdata <- "[0-9]"
ufile <- file(uxdfile, "r")
f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
close(ufile)
wh <- regexpr(cchar, f)
mh <- wh[1:length(wh)]
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1
ends <- which(mh[i] != 1 & mh[j] == 1)
ends <- c(ends, length(mh))
ff <- list()
for (s in 1:length(starts)) {
zz <- textConnection(f[starts[s]:ends[s]], "r")
ms <- matrix(scan(zz, what = numeric()), ncol = 2, byrow = T)
close(zz)
ff[[s]] <- ms
}
# Return list of matrices
return(ff)
}
# -------- ##################### -------- #
# -------- #### REPAIR SHOP #### -------- #
# -------- ##################### -------- #
##################################################
################ pearson.beta ####################
##################################################
pearson.beta <- function(m, w) {
# Function for calculating integral breadth, \beta, of
# an X-ray reflection using the half-width of FWHM (w)
# and m parameter from a Pearson fit.
## Args: m - shape parameter from Pearson fit (numeric)
## w - half the FWHM for the reflection (numeric)
## **** NOTE: gamma(x) out of range for x > 171
## Values: The integral breadth of the reflection (numeric)
# Ref: Birkholz2006, page 96 (Eq. 3.5)
beta <- ((pi * 2^(2 * (1 - m)) * gamma(2 * m - 1)) /
((2^(1 / m) - 1) * (gamma(m))^2)) * w
}
##################################################
################## strip.ka2 #####################
##################################################
strip.ka <- function(angle, intensity) {
# Function that strips (removes) the Cu Ka2 contribution
# from an experimental diffractogram (2Th, I(2Th))
# using the Rachinger algorithm.
## Args: 2theta vector
## intensity vector
## Values: intensity vector
# Ref: Birkholz2006, page 99 (Eq. 3.12), and page 31
Cu.Kai <- 0.154059 # nanometre
Cu.Kaii <- 0.154441 # nanometre
alpha.int.ratio <- 0.5
int.stripped <- intensity - alpha.int.ratio*intensity
return(int.stripped)
}
print.xtable.booktabs <- function(x){
# Make xtable print booktabs tables
# ARGS: x (matrix)
require(xtable)
print(xtable(x), floating=F, hline.after=NULL,
add.to.row=list(pos=list(-1,0, nrow(x)),
command=c("\\toprule ", "\\midrule ", "\\bottomrule ")))
}
split.muxd <- function(uxdfile, range.descriptor) {
# Fix this some other day!!
# Function that reads an UXD file which contains several ranges
# (created in a programmed run, for example) and splits it into
# its ranges and writes to that number of files
# Arguments
# :: uxdfile (filename with extension)
# :: range.descriptor (an array with as many elements as
# there are ranges in the uxdfile)
# Returns: nothing. Writes files to HDD.
cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
cdata <- "[0-9]" #regexpr matching one character of any digit
# Create filenames for the output # no longer used, return dataframe instead
datafile <- paste(uxdfile,"-",range.descriptor,".data",sep="")
# Read the input multirange file
f <- readLines(uxdfile, n=-1)
# This way we identify data rows by looking for numeric characters.
#wh <- regexpr("[0-9]", f)
# This way we identify header rows
# Later we will assume that all other rows are data
wh <- regexpr(cchar, f)
mh <- wh[1:length(wh)] # this gives you the corresponding index vector
# the value of each element corresponds to the position of the regexp match.
# value = 1 means the first character of the row is cchar (row is header)
# value =-1 means no cchar occur on the row (row is data)
#length(mh[mh == -1]) #total number of datarows in uxdfile
#mh[mh > 1 | mh < 0] <- 0 #set all header-rows to zero (just to make things easier)
i <- seq(1, length(mh) - 1, 1)
j <- seq(2, length(mh), 1)
starts <- which(mh[i] == 1 & mh[j] != 1) + 1 #start indices
ends <- which(mh[i] != 1 & mh[j] == 1) #end indices, except the last
ends <- c(ends, length(mh)) #fixed the last index of ends
#ff <- data.frame(NULL)
for (s in 1:length(range.descriptor)) {
matrix(scan(file=textConnection(f[starts[s]:ends[s]]),
what = numeric()), ncol=2, byrow=T)
}
names(ff) <- c("sampleid", "angle", "intensity")
# Return dataframe
ff
}
Loading…
Cancel
Save