Mon, 15 Jul 2013 13:17:56 +0200
Add parsing robustness against unknown data section types, particularly the proposed XSECTION.
ChangeLog | file | annotate | diff | comparison | revisions | |
pyslha.py | file | annotate | diff | comparison | revisions |
1.1 --- a/ChangeLog Fri May 31 19:30:14 2013 +0200 1.2 +++ b/ChangeLog Mon Jul 15 13:17:56 2013 +0200 1.3 @@ -1,3 +1,8 @@ 1.4 +2013-07-15 Andy Buckley <andy.buckley@cern.ch> 1.5 + 1.6 + * Add parsing robustness against unknown data section types, 1.7 + particularly the proposed XSECTION. 1.8 + 1.9 2013-05-31 Andy Buckley <andy.buckley@cern.ch> 1.10 1.11 * Version 2.1.2
2.1 --- a/pyslha.py Fri May 31 19:30:14 2013 +0200 2.2 +++ b/pyslha.py Mon Jul 15 13:17:56 2013 +0200 2.3 @@ -365,37 +365,55 @@ 2.4 continue 2.5 if "#" in line: 2.6 line = line[:line.index("#")] 2.7 + ## Ignore empty lines (after comment removal and whitespace trimming) 2.8 + if not line.strip(): 2.9 + continue 2.10 2.11 - ## Handle BLOCK/DECAY start lines 2.12 - if line.upper().startswith("BLOCK"): 2.13 - #print line 2.14 - match = re.match(r"BLOCK\s+(\w+)(\s+Q\s*=\s*.+)?", line.upper()) 2.15 - if not match: 2.16 - continue 2.17 - blockname = match.group(1) 2.18 - qstr = match.group(2) 2.19 - if qstr is not None: 2.20 - qstr = qstr[qstr.find("=")+1:].strip() 2.21 - currentblock = blockname 2.22 - currentdecay = None 2.23 - blocks[blockname] = Block(blockname, q=qstr) 2.24 - elif line.upper().startswith("DECAY"): 2.25 - match = re.match(r"DECAY\s+(-?\d+)\s+([\d\.E+-]+|NAN).*", line.upper()) 2.26 - if not match: 2.27 - continue 2.28 - pdgid = int(match.group(1)) 2.29 - width = float(match.group(2)) if match.group(2) != "NAN" else None 2.30 - currentblock = "DECAY" 2.31 - currentdecay = pdgid 2.32 - decays[pdgid] = Particle(pdgid, width) 2.33 + ## Section header lines start with a non-whitespace character, data lines have a whitespace indent 2.34 + # TODO: Are tabs also allowed for indents? Check the SLHA standard. 2.35 + if not line.startswith(" "): 2.36 + # TODO: Should we now strip the line to remove any trailing whitespace? 2.37 + 2.38 + ## Handle BLOCK start lines 2.39 + if line.upper().startswith("BLOCK"): 2.40 + #print line 2.41 + match = re.match(r"BLOCK\s+(\w+)(\s+Q\s*=\s*.+)?", line.upper()) 2.42 + if not match: 2.43 + continue 2.44 + blockname = match.group(1) 2.45 + qstr = match.group(2) 2.46 + if qstr is not None: 2.47 + qstr = qstr[qstr.find("=")+1:].strip() 2.48 + currentblock = blockname 2.49 + currentdecay = None 2.50 + blocks[blockname] = Block(blockname, q=qstr) 2.51 + ## Handle DECAY start lines 2.52 + elif line.upper().startswith("DECAY"): 2.53 + match = re.match(r"DECAY\s+(-?\d+)\s+([\d\.E+-]+|NAN).*", line.upper()) 2.54 + if not match: 2.55 + continue 2.56 + pdgid = int(match.group(1)) 2.57 + width = float(match.group(2)) if match.group(2) != "NAN" else None 2.58 + currentblock = "DECAY" 2.59 + currentdecay = pdgid 2.60 + decays[pdgid] = Particle(pdgid, width) 2.61 + ## Handle unknown section type start lines (and continue ignoring until a non-header line is found) 2.62 + elif type(_autotype(line.split()[0])) is str: 2.63 + import sys 2.64 + sys.stderr.write("Ignoring unknown section type: %s\n" % line.split()[0]) 2.65 + currentblock = None 2.66 + currentdecay = None 2.67 + 2.68 + ## This non-empty line starts with an indent, hence must be an in-section data line 2.69 else: 2.70 - ## In-block line 2.71 + # TODO: Should we now strip the line to remove the indent (and any trailing whitespace)? 2.72 if currentblock is not None: 2.73 items = line.split() 2.74 if len(items) < 1: 2.75 continue 2.76 if currentblock != "DECAY": 2.77 blocks[currentblock].add_entry(items) 2.78 + # TODO: Add handling of XSECTION if/when standardised 2.79 else: 2.80 br = float(items[0]) if items[0].upper() != "NAN" else None 2.81 nda = int(items[1])