#!/usr/bin/env python def usage(): print ''' %s filename Splits filename into separate files with target names. Also dumps edited version. (Important: Dump edited version not yet implemented.) The first two characters in your file are your field separator. See "slyhelp.txt" for more information. To use sed edits and filters, put a copy of "sly.cfg" in your working directory and put the sed scripts in any convenient directory. See example "sly.cfg" and "slyhelp.txt". \includes may be used in your xxx.ly file, or "lyinclude.py" may be used to put the files into a final xxx.ly version. (c)2003 David Raleigh Arnold, under GNU. ''' % (sys.argv[0]) sys.exit() def inputfile(): '''Get file from argument. Supply .sly ending, test existence of file, and open it for reading. ''' if not len(sys.argv) == 2: print ''' One argument, a file name with \".sly\" extension, is required.''' usage() arg = sys.argv[1] if os.path.isfile(arg + '.sly'): infile = open(arg + '.sly') elif os.path.isfile(arg): infile = open(arg) else: print "File " + arg + " or " + arg + ".sly does not exist." return infile def stripcoms(filename): ''' Strip # comments and white from a disk file. Return stripped lines of the opened file and ensure trailing newline.''' lines = open(filename) liststring = "" for line in lines.readlines(): line = line.split('#')[0] + '\n' # keep left string, add newline line = line.strip() # empties white lines if line!="": # filter out empty lines liststring = liststring + line + '\n' # build string. Lines have \n return(liststring) # return lines def getfilters(): try: ''' getfilters has the name of the sly file. THe presence of sly.cfg is tested. Function returns 2 concatenated strings of sed commands or 2 empty strings. A message is printed if there is no sly.cfg, but no checking is done to see if sly.cfg is any good, just that it exists.''' if os.path.isfile("sly.cfg"): file = "sly.cfg" else: file = "" print''' No \"sly.cfg\" file is present, so sly will display the file output instead of a new version of your sly file.''' return('', '') cfg = stripcoms(file) #stripcoms returns a string cfglist = cfg.split('Finish:\n') #chop at end. stripcoms put \n ... finfiles = cfglist[1][:-1] #result # if there was none. recfglist = cfglist[0].split('WIP:\n') wipfiles = recfglist[1][:-1] #result pathlist = recfglist[0].split('Path:\n') sedpath = pathlist[1][:-1] #result ''' Put path to both file lists.''' files = wipfiles.split('\n') lines = '' for file in files: if file!="\n" and file!="": if sedpath!="": que = os.path.join(sedpath, file) # the work else: que = file # no path is fine cmds = stripcoms(que) lines = lines + cmds wipcmds = lines files = finfiles.split('\n') # copying is the easy way lines = '' for file in files: if file!="\n" and file!="": if sedpath!="": que = os.path.join(sedpath, file) # the work else: que = file # no path is fine cmds = stripcoms(que) lines = lines + cmds fincmds = lines return(wipcmds, fincmds) except: print ''' While the sly.cfg file does not have to be present, if it is present, it must have these three lines: Path: WIP: Final: Of course, to get a good result you can't apply your sed files any old way, and they must exist. See slyhelp.txt. ''' print "python errors:" print sys.exc_type, sys.exc_value sys.exit() def getheader(infile): '''Get header info, which tells how sly file is structured. When rewriting header to stdout in new sly file, the old header will simply be copied.''' file = inputfile().readlines() cnt = 0 hdrdata = [] numfields = [] minlenfields = [] ss = [] oldlines = [] for line in file: oldline = line # print later '''Get field separator and strip from 1st line''' if cnt == 0: # first iteration only fsep = line[:2] line = line[2:] # strip FS from beginning of file headwords = line[:-1].split(fsep) holder = [] minlens = [] # build array of min field lengths for word in headwords: # strip spaces off each word in line-list if word.find(' ') >= 0: # if a space in the word minlen = len(word) else: minlen = 0 holder.append(word.strip()) minlens.append(minlen) headwords = holder lenwords = len(headwords) '''Set srchspec to none on first pass or get them.''' if cnt == 0: # first iteration only tryfirst = headwords[0].split('=') # is list incl search spec holder = [] # strip spaces off each word in split first field for word in tryfirst: holder.append(word.strip()) tryfirst = holder if len(tryfirst) < 2: srchspec = 'none' headwords[:0] = [srchspec] # put srchspec in front of headwords lenwords = len(headwords) # update length else: # anything after "equals" is srchspec srchspec = ''.join(tryfirst[1:]) #join unlists headwords = headwords[1:] headwords[:0] = [srchspec] '''getting rest of header info''' #break before appending more lines, if matching srchspec if srchspec != "none" and cnt > 0: srchspec = headwords[0].strip() if cnt > 0: # loop at least once if srchspec == ss[0]: break hdrdata.append(headwords) numfields.append(lenwords) minlenfields.append(minlens) ss.append(srchspec) oldlines.append(oldline) # raw header to stdout cnt = cnt + 1 if ss[0] == "": break return(cnt, fsep, numfields, hdrdata, ss, oldlines, minlenfields) def getarray(infile, fsep, ss): '''getarray puts notes into list of lines = words len(rows) is number of lines of data, w/o header Also make first column entirely searchspecs.''' file = infile.readlines()[cnt:] # slice off header array = [] arraycnt = 0 # remove arraycnt? for line in file: if len(line[:-1].strip()) > 0: row = line.strip().split(fsep) # [:-1] loses data else: # gracefully rid of white lines continue if ss[0] == "none": # if no FS... row[:0] = ['none'] # put "none" in first field newspec = row[0].strip() if not newspec in ss: # line has no spec row[:0] = [spec] # put previous spec in front spec = row[0].strip() # spec for next iteration holder = [] for word in row: holder.append(word.strip()) row = holder array.append(row) arraycnt = arraycnt + 1 return array, arraycnt def checkfile(array, ss, cnt): '''Check the array, if errors bail and give bad lines. Func has no return value, so is callable as command''' bail = "no" i = 1 for row in array: j = 0 for spec in ss: if spec == row[0].strip(): if string.join(row[1].strip().split(' ', 1)[0:1]) in ss: print i + cnt, row print '''Separator misplaced at beginning of line above. A spurious error on the same line is possible. Sly can't find this error if it immediately follows the header.''' bail = "yes" if len(row) != numfields[j]: print i + cnt, row print "Error in line above: number of fields is", len(row), "but should be", numfields[j] bail = "yes" j = j + 1 i = i + 1 if bail == "yes": print ''' N.B.: Sly checks your notes data only, not your header. The line number in your file appears on the left. Whitespace lines are not counted. Your data is kept formatted as a Python list, which should make it easier to identify problems. "none" appears if there are no search specs. If there are search specs, the first must begin the first line of your notes data or the the header does not end properly. ''' sys.exit() import string, os, sys try: ''' Loop array of "measures" down to measure level, filter the content twice and write the results to files. Rebuild the array after filtered the first time, make it into a new version of the original file, and send that, formatted, to stdout. Sed is called twice for each file to be created. That slows things down much, but causes sed's searches to drop off at the end of each file, which prevents the possibility of a plethora of errors. ''' infile = inputfile() cnt, fsep, numfields, hdrdata, ss, oldlines, minlenfields = getheader(infile) wipcmds, fincmds = getfilters() array, arraycnt = getarray(infile, fsep, ss) checkfile(array, ss, cnt) # can call like cmd because no return value for lines in oldlines: # put old header to stdout print lines[:-1] top = [] for hrx in range(cnt): # header row index searchspec = hdrdata[hrx][0] # first word of each "line" of header arowlen = len(hdrdata[hrx]) # length of each "line" of header cols = [] # for rebuilding array for hfx in range(len(hdrdata[hrx])): #header line field index lins = [] count = 0 if hfx > 0 and hfx < arowlen: outfile = open(hdrdata[hrx][hfx], 'w') content = [] for arow in array: if arow[0]==searchspec: word = arow[hfx] if wipcmds=="" and fincmds=="": # in this case, all done! sys.stdout.write(word + '\n') content.append(word) product = '\n'.join(content) cmd = "sed -e '%s'" %(wipcmds) # wipsed has curlies input, output = os.popen2(cmd) #open 2 pipes and wait input.write(product) # put in input pipe input.close() # must close pipe product = output.read() # string, not list output.close() # why not? brick = product # copy to get building blocks. oldproduct = product # copy to skip placing extra spaces in targets minlen = minlenfields[hrx][hfx] if minlen: holder = [] for ln in product.split('\n'): ln = ln.ljust(minlen) holder.append(ln) product = '\n'.join(holder) # display this! product = oldproduct # second filtering, then write files. cmd = "sed -e '%s'" %(fincmds) # finsed contains curly brackets. input, output = os.popen2(cmd) #open 2 pipes and wait input.write(product) # put in input pipe input.close() # must close pipe product = output.read() # string, not list output.close() # why not? outfile.write(product + '\n') # write target files # rebuild sly file to print to stdout #product = brick except: print "python errors:" print sys.exc_type, sys.exc_value