R-common/XRD-TF/uxd2df.R

##################################################
#################### uxd2df ######################
##################################################
uxd2df <- function(uxdfile) {
   # Function for reading UXD files  # Assumptions: data in two columns
   # Args: uxdfile (filename with extension)
   # Returns: dataframe with three columns
   
   cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD
   cdata <- "[0-9]" #regexpr matching one character of any digit
   
   # A new file (datafile) containing only data will be created,
   # extension ".data" appended to uxdfile
   #datafile <- paste(uxdfile,".data",sep="")
   
   ufile <- file(uxdfile, "r")
   f <- readLines(ufile, n=-1) #read _all_ lines from UXD file
   close(ufile)
   
   # This way we identify data rows by looking for numeric characters.
   #wh <- regexpr("[0-9]", f)
   # This way we identify header rows
   # We assume that all other rows are data
   wh <- regexpr(cchar, f)
   
   mh <- wh[1:length(wh)] # this gives you the corresponding index vector
   # the value of each element corresponds to the position of the regexp match.
   # value = 1 means the first character of the row is cchar (row is header)
   # value =-1 means no cchar occur on the row (row is data)
   
   i <- seq(1, length(mh) - 1, 1)
   j <- seq(2, length(mh), 1)
   
   starts <- which(mh[i] == 1 & mh[j] != 1) + 1
   ends   <- length(mh)
   f <- f[starts:ends]
   
   rgxp.sampleid <- "[^/]*(?=\\.\\w*)" ## THIS REQUIRES perl=TRUE
   # Regular expression that extracts the filename out of a full path.
   # Matches and extracts everything from the last forward slash (assuming Unix slashes)
   # up until a dot folllowed by an arbitrary number of alphanumeric characters.
   sampleidmtch <- regexpr(rgxp.sampleid, uxdfile, perl=TRUE)
   # Check that there was a match
   if (sampleidmtch < 0) {
      # -1 means no match
      sampleid <- uxdfile
      # If match was unsuccessful we use the argument as passed to this function as sampleid
   }
   sampleid <- substr(uxdfile, sampleidmtch, (sampleidmtch + attr(sampleidmtch, "match.length") - 1))
   
   zz <- textConnection(f, "r")
   ff <- data.frame(sampleid, matrix(scan(zz,
         what = numeric()), ncol=2, byrow=T))
   names(ff) <- c("sampleid", "angle", "intensity")
   close(zz)
      
   #zz <- file(datafile, "w") #open connection to datafile
   #write.table(ff, file=datafile, row.names=F, sep=",")
   #close(zz)

   # Return dataframe
   ff
}
Split all files into their separate functions, put each former file in a directory. The idea is to better expose each function, since some functions tended to drown in the larger files. Note that dependecies are are not resolved yet. Many functions are probably broken right now, especially those who point to common.R. 14 years ago			`##################################################`
			`#################### uxd2df ######################`
			`##################################################`
			`uxd2df <- function(uxdfile) {`
			`# Function for reading UXD files # Assumptions: data in two columns`
			`# Args: uxdfile (filename with extension)`
			`# Returns: dataframe with three columns`

			`cchar <- "[;_]" #regexpr matching the comment characters used in Bruker's UXD`
			`cdata <- "[0-9]" #regexpr matching one character of any digit`

			`# A new file (datafile) containing only data will be created,`
			`# extension ".data" appended to uxdfile`
			`#datafile <- paste(uxdfile,".data",sep="")`

			`ufile <- file(uxdfile, "r")`
			`f <- readLines(ufile, n=-1) #read _all_ lines from UXD file`
			`close(ufile)`

			`# This way we identify data rows by looking for numeric characters.`
			`#wh <- regexpr("[0-9]", f)`
			`# This way we identify header rows`
			`# We assume that all other rows are data`
			`wh <- regexpr(cchar, f)`

			`mh <- wh[1:length(wh)] # this gives you the corresponding index vector`
			`# the value of each element corresponds to the position of the regexp match.`
			`# value = 1 means the first character of the row is cchar (row is header)`
			`# value =-1 means no cchar occur on the row (row is data)`

			`i <- seq(1, length(mh) - 1, 1)`
			`j <- seq(2, length(mh), 1)`

			`starts <- which(mh[i] == 1 & mh[j] != 1) + 1`
			`ends <- length(mh)`
			`f <- f[starts:ends]`

			`rgxp.sampleid <- "[^/](?=\\.\\w)" ## THIS REQUIRES perl=TRUE`
			`# Regular expression that extracts the filename out of a full path.`
			`# Matches and extracts everything from the last forward slash (assuming Unix slashes)`
			`# up until a dot folllowed by an arbitrary number of alphanumeric characters.`
			`sampleidmtch <- regexpr(rgxp.sampleid, uxdfile, perl=TRUE)`
			`# Check that there was a match`
			`if (sampleidmtch < 0) {`
			`# -1 means no match`
			`sampleid <- uxdfile`
			`# If match was unsuccessful we use the argument as passed to this function as sampleid`
			`}`
			`sampleid <- substr(uxdfile, sampleidmtch, (sampleidmtch + attr(sampleidmtch, "match.length") - 1))`

			`zz <- textConnection(f, "r")`
			`ff <- data.frame(sampleid, matrix(scan(zz,`
			`what = numeric()), ncol=2, byrow=T))`
			`names(ff) <- c("sampleid", "angle", "intensity")`
			`close(zz)`

			`#zz <- file(datafile, "w") #open connection to datafile`
			`#write.table(ff, file=datafile, row.names=F, sep=",")`
			`#close(zz)`

			`# Return dataframe`
			`ff`
			`}`