From 5ee85d580eb344641afbc491a04afffda1eece71 Mon Sep 17 00:00:00 2001 From: Taha Ahmed Date: Sat, 10 Dec 2011 17:10:58 +0100 Subject: [PATCH] Added function to read Perkin-Elmer spectra files. This function reads *.SP files (ASCII), and currently fetches only data (no meta-data). --- PELambda/SP2df.R | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 PELambda/SP2df.R diff --git a/PELambda/SP2df.R b/PELambda/SP2df.R new file mode 100644 index 0000000..9ecde24 --- /dev/null +++ b/PELambda/SP2df.R @@ -0,0 +1,49 @@ +################################################## +#################### SP2df ####################### +################################################## +SP2df <- function(datafile) { + ## Description: + ## For now just extracting the bare minimum (the data itself) from SP (ASCII) spectra files. + ## Usage: + ## SP2df(datafile) + ## Arguments: + ## datafile: text string with full path to TXT file + ## containing single or multiple data ranges + ## Value: + ## Dataframe with the following columns: + ## $ sampleid : chr + ## $ wavelength : num + ## $ intensity : num + # + range.data.start.rexp <- "\\#DATA" + #range.data.end.rexp <- ">+End[\\s\\w]*<+" + + # Read the input file + dfile <- file(datafile, "r") + # Note that readLines apparently completely skips empty lines. + # That causes line numbers to not match between source and f vector. + f <- readLines(dfile, n=-1) # read _all_ lines from data file + close(dfile) + + # Create a sampleid for the current job (use the folder name) + sampleid <- basename(dirname(datafile)) + + # Look for data start marker line + range.data.start.row <- grep(range.data.start.rexp, f, perl = TRUE) + 1 + # Data ends one line before EOF + range.data.end.row <- length(f) - 1 + + # Extract data (as-is) + data.raw <- f[range.data.start.row:range.data.end.row] + + + # Collect data into dataframe + zz <- textConnection(data.raw, "r") + data <- data.frame(stringsAsFactors = FALSE, + sampleid, + matrix(scan(zz, what = numeric()), ncol = 2, byrow = T)) + close(zz) + names(data) <- c("sampleid", "wavelength", "intensity") + + return(data) +}