Add parsing robustness against unknown data section types, particularly the proposed XSECTION.

Mon, 15 Jul 2013 13:17:56 +0200

author
Andy Buckley <andy@insectnation.org>
date
Mon, 15 Jul 2013 13:17:56 +0200
changeset 238
3e381d4604fc
parent 237
0d77779294a7
child 239
42f74a1e96cf

Add parsing robustness against unknown data section types, particularly the proposed XSECTION.

ChangeLog file | annotate | diff | comparison | revisions
pyslha.py file | annotate | diff | comparison | revisions
     1.1 --- a/ChangeLog	Fri May 31 19:30:14 2013 +0200
     1.2 +++ b/ChangeLog	Mon Jul 15 13:17:56 2013 +0200
     1.3 @@ -1,3 +1,8 @@
     1.4 +2013-07-15  Andy Buckley  <andy.buckley@cern.ch>
     1.5 +
     1.6 +	* Add parsing robustness against unknown data section types,
     1.7 +	particularly the proposed XSECTION.
     1.8 +
     1.9  2013-05-31  Andy Buckley  <andy.buckley@cern.ch>
    1.10  
    1.11  	* Version 2.1.2
     2.1 --- a/pyslha.py	Fri May 31 19:30:14 2013 +0200
     2.2 +++ b/pyslha.py	Mon Jul 15 13:17:56 2013 +0200
     2.3 @@ -365,37 +365,55 @@
     2.4              continue
     2.5          if "#" in line:
     2.6              line = line[:line.index("#")]
     2.7 +        ## Ignore empty lines (after comment removal and whitespace trimming)
     2.8 +        if not line.strip():
     2.9 +            continue
    2.10  
    2.11 -        ## Handle BLOCK/DECAY start lines
    2.12 -        if line.upper().startswith("BLOCK"):
    2.13 -            #print line
    2.14 -            match = re.match(r"BLOCK\s+(\w+)(\s+Q\s*=\s*.+)?", line.upper())
    2.15 -            if not match:
    2.16 -                continue
    2.17 -            blockname = match.group(1)
    2.18 -            qstr = match.group(2)
    2.19 -            if qstr is not None:
    2.20 -                qstr = qstr[qstr.find("=")+1:].strip()
    2.21 -            currentblock = blockname
    2.22 -            currentdecay = None
    2.23 -            blocks[blockname] = Block(blockname, q=qstr)
    2.24 -        elif line.upper().startswith("DECAY"):
    2.25 -            match = re.match(r"DECAY\s+(-?\d+)\s+([\d\.E+-]+|NAN).*", line.upper())
    2.26 -            if not match:
    2.27 -                continue
    2.28 -            pdgid = int(match.group(1))
    2.29 -            width = float(match.group(2)) if match.group(2) != "NAN" else None
    2.30 -            currentblock = "DECAY"
    2.31 -            currentdecay = pdgid
    2.32 -            decays[pdgid] = Particle(pdgid, width)
    2.33 +        ## Section header lines start with a non-whitespace character, data lines have a whitespace indent
    2.34 +        # TODO: Are tabs also allowed for indents? Check the SLHA standard.
    2.35 +        if not line.startswith(" "):
    2.36 +            # TODO: Should we now strip the line to remove any trailing whitespace?
    2.37 +
    2.38 +            ## Handle BLOCK start lines
    2.39 +            if line.upper().startswith("BLOCK"):
    2.40 +                #print line
    2.41 +                match = re.match(r"BLOCK\s+(\w+)(\s+Q\s*=\s*.+)?", line.upper())
    2.42 +                if not match:
    2.43 +                    continue
    2.44 +                blockname = match.group(1)
    2.45 +                qstr = match.group(2)
    2.46 +                if qstr is not None:
    2.47 +                    qstr = qstr[qstr.find("=")+1:].strip()
    2.48 +                currentblock = blockname
    2.49 +                currentdecay = None
    2.50 +                blocks[blockname] = Block(blockname, q=qstr)
    2.51 +            ## Handle DECAY start lines
    2.52 +            elif line.upper().startswith("DECAY"):
    2.53 +                match = re.match(r"DECAY\s+(-?\d+)\s+([\d\.E+-]+|NAN).*", line.upper())
    2.54 +                if not match:
    2.55 +                    continue
    2.56 +                pdgid = int(match.group(1))
    2.57 +                width = float(match.group(2)) if match.group(2) != "NAN" else None
    2.58 +                currentblock = "DECAY"
    2.59 +                currentdecay = pdgid
    2.60 +                decays[pdgid] = Particle(pdgid, width)
    2.61 +            ## Handle unknown section type start lines (and continue ignoring until a non-header line is found)
    2.62 +            elif type(_autotype(line.split()[0])) is str:
    2.63 +                import sys
    2.64 +                sys.stderr.write("Ignoring unknown section type: %s\n" % line.split()[0])
    2.65 +                currentblock = None
    2.66 +                currentdecay = None
    2.67 +
    2.68 +        ## This non-empty line starts with an indent, hence must be an in-section data line
    2.69          else:
    2.70 -            ## In-block line
    2.71 +            # TODO: Should we now strip the line to remove the indent (and any trailing whitespace)?
    2.72              if currentblock is not None:
    2.73                  items = line.split()
    2.74                  if len(items) < 1:
    2.75                      continue
    2.76                  if currentblock != "DECAY":
    2.77                      blocks[currentblock].add_entry(items)
    2.78 +                # TODO: Add handling of XSECTION if/when standardised
    2.79                  else:
    2.80                      br = float(items[0]) if items[0].upper() != "NAN" else None
    2.81                      nda = int(items[1])

mercurial