Related
Please bear with the long question.
Numba encounters "LLVM IR parsing error" in my code seemingly due to defualt typing of np.complex128.shape, but I could not find any documentations saying that np.complex128.shape, numba.complex128.shape or prange have default types.
Minimal workable reproduction:
import numpy as np
from numba import jit, njit, prange
from numba import complex128, int32 # import jit value types
# Invert an (n,n) submatrix of a (m>n,n) rectangular matrix by taking the first
# n rows. "Taking the first n rows" is motivated by the RHS being rank n.
#
# -- Input --
# (m,n) matrix A
#
# -- Return --
# (m,m) matrix A_inv
#njit(complex128[:,:](complex128[:,:]))
def inv_square_jit(in_matrix):
if in_matrix.ndim != 2:
raise ValueError("Input should be 2d array")
n_row = in_matrix.shape[0]
n_col = in_matrix.shape[1]
if n_row<=n_col:
raise ValueError("Input should have more rows than cols")
# Remove specfied column (slightly faster than delete)
# and remove extra rows
sqinv = np.linalg.inv(in_matrix[:n_col, :])
padded = np.zeros((n_row, n_row), dtype = np.complex128)
padded[:len(sqinv), :len(sqinv)] = sqinv
return(padded)
# Solve degenerate underdetermined equation system
# -- Input --
# (m,n+1), (m,n), rank-n 2d np arrays A, B
# n-dim np array-like vb
# or
# (m,n+1), rank n+1 A,
# m-dim np array-like v_rhs
#
# vb can be any array-like item, and is not necessarily 1d.
# Implemented with ChiPhiFunc in mind.
#
# -- Return --
# n+1 np array-like va
#
# -- Note --
# For recursion relations with ChiPhiFunc's, A and B should come from
# convolution matrices. That still needs implementation.
#njit(complex128[:](complex128[:,:], complex128[:]))
def solve_degenerate_jit(A, v_rhs):
n_dim = A.shape[1]
if A.shape[0] != v_rhs.shape[0]:
raise ValueError("solve_underdetermined: A, v_rhs must have the same number of rows")
A_inv = np.ascontiguousarray(inv_square_jit(A))
# This vector is actually m-dim, with m-n blank elems at the end.
va = (A_inv#np.ascontiguousarray(v_rhs))[:n_dim]
return(va)
# #njit(complex128[:](complex128[:,:], complex128[:,:], complex128[:]))
# def solve_degenerate_jit(A, B, vb):
# B_cont = np.ascontiguousarray(B)
# vb_cont = np.ascontiguousarray(vb)
# return(solve_degenerate_jit(A, B_cont#vb_cont))
# Generate convolution operator from a for an n_dim vector.
#njit(complex128[:,:](complex128[:], int32))
def conv_matrix(vec, n_dim):
out_transposed = np.zeros((n_dim,len(vec)+n_dim-1), dtype = np.complex128)
for i in prange(n_dim):
out_transposed[i, i:i+len(vec)] = vec
return(out_transposed.T)
# For solving a*va = v_rhs, where va, vb have the same number of dimensions.
# In the context below, "#dim" represents number of chi mode components.
#
# -- Input --
# v_source_A: 2d matrix, content of ChiPhiFuncGrid, #dim = a
# v_rhs: 2d matrix, content of ChiPhiFuncGrid, #dim = m
# rank_rhs: int, rank of v_rhs (and correct answer)
# -- Output --
# va: 2d matrix, content of ChiPhiFuncGrid. Has #dim = rank_rhs
#njit(complex128[:,:](complex128[:,:], complex128[:,:], int32), parallel=True)
def batch_degen_jit(v_source_A, v_rhs, rank_rhs):
# if type(v_source_A) is not ChiPhiFuncGrid or type(v_source_B) is not ChiPhiFuncGrid:
# raise TypeError('batch_underdetermined_deconv: input should be ChiPhiFuncGrid.')
A_slices = np.ascontiguousarray(v_source_A.T) # now the axis 0 is phi grid
v_rhs_slices = np.ascontiguousarray(v_rhs.T) # now the axis 0 is phi grid
# axis 0 is phi grid, axis 1 is chi mode
va_transposed = np.zeros((len(A_slices), rank_rhs), dtype = np.complex128)
if len(A_slices) != len(v_rhs_slices):
raise ValueError('batch_underdetermined_deconv: A, v_rhs must have the same number of phi grids.')
if len(v_source_A) + rank_rhs - 1 != len(v_rhs):
raise ValueError('batch_underdetermined_deconv: #dim_A + rank_rhs - 1 = #dim_v_rhs must hold.')
for i in prange(len(A_slices)):
A_conv_matrix_i = conv_matrix(A_slices[i], rank_rhs)
# ********** Removing this line somehow makes it compile **********
va_transposed[i, :] = solve_degenerate_jit(A_conv_matrix_i,v_rhs_slices[i])
# ********** Removing this line somehow makes it compile **********
return va_transposed.T
The code compiles fine with parallel=False for the last method. However, with parallel=True, error occurs in for i in prange(len(A_slices)): of def batch_degen_jit(v_source_A, v_rhs, rank_rhs):, seemingly because solve_degenerate_jit(complex128[:,:], complex128[:], int32) accepts int32, but the prange(len(A_slices)) produces int64. Replacing all int32 with int64 solves the problem. Removing the *-marked line also makes it compile.
Error:
LoweringError: Failed in nopython mode pipeline (step: nopython mode backend)
Failed in nopython mode pipeline (step: nopython mode backend)
LLVM IR parsing error
<string>:1278:34: error: '%.777' defined with type 'i64' but expected 'i32'
%".778" = icmp eq i32 %".776", %".777"
^
File "<ipython-input-24-fa65c2d527fa>", line 104:
def batch_degen_jit(v_source_A, v_rhs, rank_rhs):
<source elided>
raise ValueError('batch_underdetermined_deconv: #dim_A + rank_rhs - 1 = #dim_v_rhs must hold.')
for i in prange(len(A_slices)):
^
During: lowering "id=17[LoopNest(index_variable = parfor_index.1805, range = (0, $10call_method.4_size0.1767, 1))]{120: <ir.Block at <ipython-input-24-fa65c2d527fa> (104)>}Var(parfor_index.1805, <ipython-input-24-fa65c2d527fa>:104)" at <ipython-input-24-fa65c2d527fa> (104)
Why is this the case?
Thank you!
(P.S. here's a test case for the methods:
convolver = np.random.rand(10,3)
correct_answer = np.random.rand(10,5)
rhs = np.zeros((10,7))
for i in range(10):
rhs[i] = np.convolve(convolver[i], correct_answer[i])
print(batch_degen_jit(np.complex128(convolver).T, np.complex128(rhs).T, 5))
)
when I run Consumer.py for headersExchange in rabbitmq using python,it is getting error like below
I have mentioned consumer and publish program below
Traceback (most recent call last):
File "headersConsumer.py", line 32, in <module>
main()
File "headersConsumer.py", line 14, in main
channel.exchange_declare(exchange = 'headers_logs',exchange_type='headers',durable=True)
File "C:\Python38\lib\site-packages\pika\adapters\blocking_connection.py", line 2387, in
exchange_declare
self._flush_output(declare_ok_result.is_ready)
File "C:\Python38\lib\site-packages\pika\adapters\blocking_connection.py", line 1339, in
_flush_output
raise self._closing_reason # pylint: disable=E0702
pika.exceptions.ChannelClosedByBroker: (406, "PRECONDITION_FAILED
- inequivalent arg 'type' for exchange 'headers_logs' in vhost '/': received 'headers' but
current is 'fanout'")
I have written consumer code like this
#!/usr/bin/env python
import pika, sys, os
def main():
connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.exchange_declare(exchange = 'headers_logs',exchange_type='headers',durable=True)
channel.queue_declare(queue = "HeaderQueue1", durable=True)
channel.queue_bind(exchange = 'headers_logs', queue="HeadersQueue1", routing_key='',
arguments={'x-match': 'any', 'key1': 'one', 'key2': 'two'})
def callback(ch, method, properties, body):
print(" [x] %r" % body.decode())
print(' [*] Waiting for logs. To exit press CTRL+C')
channel.basic_consume(
queue="HeadersQueue1", on_message_callback=callback, auto_ack=True)
channel.start_consuming()
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print('Interrupted')
try:
sys.exit(0)
except SystemExit:
os._exit(0)
I have written publish program likethis
import pika
import sys
connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
channel = connection.channel()
channel.exchange_declare(exchange='headers_logs',exchange_type='headers')
message = ' '.join(sys.argv[1:]) or "Hello World!"
channel.basic_publish((exchange='headers_logs',routing_key="",body=message,properties=pika.BasicProperties(
delivery_mode = 2, # make message persistent
headers = {'key1':'one', 'key2': 'three'}
))
print(" [x] Sent %r" % message)
connection.close()
I am not understanding this error,Can anyone please suggest this error
PRECONDITION_FAILED means that you declared an exchange with set of parameters then you are trying to create the same queue name using different parameters.
in your case:
headers_logs' in vhost '/': received 'headers' but
current is 'fanout'")
so you are trying to change the exchange type from fanout to headers
Se here for more detail (this is for the queues but exchanges work in the same way).
Before a queue can be used it has to be declared. Declaring a queue
will cause it to be created if it does not already exist. The
declaration will have no effect if the queue does already exist and
its attributes are the same as those in the declaration. When the
existing queue attributes are not the same as those in the declaration
a channel-level exception with code 406 (PRECONDITION_FAILED) will be
raised.
Please is there a switch to enable thousand digit grouping (e.g 100_000) by default in Iex. It would be really helpful if is.
Otherwise how can we specify it in IO.puts?
There is no native option to enable digit grouping as you described according to Inspect.Opts.
However, the following should work to override the behavior of inspect when using IEx with Integer and Float if you place it in your local ~/.iex.exs file:
defmodule PrettyNumericInspect do
def group(value, :binary, true),
do: value |> group_by(8)
def group(value, :decimal, true),
do: value |> group_by(3)
def group(value, :hex, true),
do: value |> group_by(2)
def group(value, :octal, true),
do: value |> group_by(4)
def group(value, _, _),
do: value
defp group_by(value, n) when byte_size(value) > n do
size = byte_size(value)
case size |> rem(n) do
0 ->
(for << << g :: binary-size(n) >> <- value >>,
into: [],
do: g)
|> Enum.join("_")
r ->
{head, tail} = value |> String.split_at(r)
[head, group_by(tail, n)] |> Enum.join("_")
end
end
defp group_by(value, _),
do: value
end
defimpl Inspect, for: Float do
def inspect(thing, %Inspect.Opts{pretty: pretty}) do
[head, tail] = IO.iodata_to_binary(:io_lib_format.fwrite_g(thing))
|> String.split(".", parts: 2)
[PrettyNumericInspect.group(head, :decimal, pretty), tail]
|> Enum.join(".")
end
end
defimpl Inspect, for: Integer do
def inspect(thing, %Inspect.Opts{base: base, pretty: pretty}) do
Integer.to_string(thing, base_to_value(base))
|> PrettyNumericInspect.group(base, pretty)
|> prepend_prefix(base)
end
defp base_to_value(base) do
case base do
:binary -> 2
:decimal -> 10
:octal -> 8
:hex -> 16
end
end
defp prepend_prefix(value, :decimal), do: value
defp prepend_prefix(value, base) do
prefix = case base do
:binary -> "0b"
:octal -> "0o"
:hex -> "0x"
end
prefix <> value
end
end
The Inspect.Opts option :pretty must be set to true for the digit grouping to be displayed. According to the documentation for IEx.configure/1 pretty inspect should be enabled by default.
When launching iex, you will see 2 warnings about redefining Inspect.Float and Inspect.Integer, but it should continue to work like normal afterwards:
iex> 100_000
100_000
iex> 100_000.1
100_000.1
It also supports groupings for the different :base options (:binary, :decimal, :octal, and :hex):
iex> inspect 0b11111111_11111111, base: :binary, pretty: true
"0b11111111_11111111"
iex> inspect 999_999, base: :decimal, pretty: true
"999_999"
iex> inspect 0o7777_7777, base: :octal, pretty: true
"0o7777_7777"
iex> inspect 0xFF_FF, base: :hex, pretty: true
"0xFF_FF"
I have something called a Node. Both Definition and Theorem are a type of node, but only Definitions should be allowed to have a plural attribute:
class Definition(Node):
def __init__(self,dic):
self.type = "definition"
super(Definition, self).__init__(dic)
self.plural = move_attribute(dic, {'plural', 'pl'}, strict=False)
#property
def plural(self):
return self._plural
#plural.setter
def plural(self, new_plural):
if new_plural is None:
self._plural = None
else:
clean_plural = check_type_and_clean(new_plural, str)
assert dunderscore_count(clean_plural)>=2
self._plural = clean_plural
class Theorem(Node):
def __init__(self, dic):
self.type = "theorem"
super().__init__(dic)
self.proofs = move_attribute(dic, {'proofs', 'proof'}, strict=False)
# theorems CANNOT have plurals:
# if 'plural' in self:
# raise KeyError('Theorems cannot have plurals.')
As you can see, Definitions have a plural.setter, but theorems do not. However, the code
theorem = Theorem(some input)
theorem.plural = "some plural"
runs just fine and raises no errors. But I want it to raise an error. As you can see, I tried to check for plurals manually at the bottom of my code shown, but this would only be a patch. I would like to block the setting of ANY attribute that is not expressly defined. What is the best practice for this sort of thing?
I am looking for an answer that satisfies the "chicken" requirement:
I do not think this solves my issue. In both of your solutions, I can
append the code t.chicken = 'hi'; print(t.chicken), and it prints hi
without error. I do not want users to be able to make up new
attributes like chicken.
The short answer is "Yes, you can."
The follow-up question is "Why?" One of the strengths of Python is the remarkable dynamism, and by restricting that ability you are actually making your class less useful (but see edit at bottom).
However, there are good reasons to be restrictive, and if you do choose to go down that route you will need to modify your __setattr__ method:
def __setattr__(self, name, value):
if name not in ('my', 'attribute', 'names',):
raise AttributeError('attribute %s not allowed' % name)
else:
super().__setattr__(name, value)
There is no need to mess with __getattr__ nor __getattribute__ since they will not return an attribute that doesn't exist.
Here is your code, slightly modified -- I added the __setattr__ method to Node, and added an _allowed_attributes to Definition and Theorem.
class Node:
def __setattr__(self, name, value):
if name not in self._allowed_attributes:
raise AttributeError('attribute %s does not and cannot exist' % name)
super().__setattr__(name, value)
class Definition(Node):
_allowed_attributes = '_plural', 'type'
def __init__(self,dic):
self.type = "definition"
super().__init__(dic)
self.plural = move_attribute(dic, {'plural', 'pl'}, strict=False)
#property
def plural(self):
return self._plural
#plural.setter
def plural(self, new_plural):
if new_plural is None:
self._plural = None
else:
clean_plural = check_type_and_clean(new_plural, str)
assert dunderscore_count(clean_plural)>=2
self._plural = clean_plural
class Theorem(Node):
_allowed_attributes = 'type', 'proofs'
def __init__(self, dic):
self.type = "theorem"
super().__init__(dic)
self.proofs = move_attribute(dic, {'proofs', 'proof'}, strict=False)
In use it looks like this:
>>> theorem = Theorem(...)
>>> theorem.plural = 3
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 6, in __setattr__
AttributeError: attribute plural does not and cannot exist
edit
Having thought about this some more, I think a good compromise for what you want, and to actually answer the part of your question about restricting allowed changes to setters only, would be to:
use a metaclass to inspect the class at creation time and dynamically build the _allowed_attributes tuple
modify the __setattr__ of Node to always allow modification/creation of attributes with at least one leading _
This gives you some protection against both misspellings and creation of attributes you don't want, while still allowing programmers to work around or enhance the classes for their own needs.
Okay, the new meta class looks like:
class NodeMeta(type):
def __new__(metacls, cls, bases, classdict):
node_cls = super().__new__(metacls, cls, bases, classdict)
allowed_attributes = []
for base in (node_cls, ) + bases:
for name, obj in base.__dict__.items():
if isinstance(obj, property) and hasattr(obj, '__fset__'):
allowed_attributes.append(name)
node_cls._allowed_attributes = tuple(allowed_attributes)
return node_cls
The Node class has two adjustments: include the NodeMeta metaclass and adjust __setattr__ to only block non-underscore leading attributes:
class Node(metaclass=NodeMeta):
def __init__(self, dic):
self._dic = dic
def __setattr__(self, name, value):
if not name[0] == '_' and name not in self._allowed_attributes:
raise AttributeError('attribute %s does not and cannot exist' % name)
super().__setattr__(name, value)
Finally, the Node subclasses Theorem and Definition have the type attribute moved into the class namespace so there is no issue with setting them -- and as a side note, type is a bad name as it is also a built-in function -- maybe node_type instead?
class Definition(Node):
type = "definition"
...
class Theorem(Node):
type = "theorem"
...
As a final note: even this method is not immune to somebody actually adding or changing attributes, as object.__setattr__(theorum_instance, 'an_attr', 99) can still be used -- or (even simpler) the _allowed_attributes can be modified; however, if somebody is going to all that work they hopefully know what they are doing... and if not, they own all the pieces. ;)
You can check for the attribute everytime you access it.
class Theorem(Node):
...
def __getattribute__(self, name):
if name not in ["allowed", "attribute", "names"]:
raise MyException("attribute "+name+" not allowed")
else:
return self.__dict__[name]
def __setattr__(self, name, value):
if name not in ["allowed", "attribute", "names"]:
raise MyException("attribute "+name+" not allowed")
else:
self.__dict__[name] = value
You can build the allowed method list dynamically as a side effect of a decorator:
allowed_attrs = []
def allowed(f):
allowed_attrs.append(f.__name__)
return f
You would also need to add non method attributes manually.
If you really want to prevent all other dynamic attributes. I assume there's a well-defined time window that you want to allow adding attributes.
Below I allow it until object initialisation is finished. (you can control it with allow_dynamic_attribute variable.
class A:
def __init__(self):
self.allow_dynamic_attribute = True
self.abc = "hello"
self._plural = None # need to give default value
# A.__setattr__ = types.MethodType(__setattr__, A)
self.allow_dynamic_attribute = False
def __setattr__(self, name, value):
if hasattr(self, 'allow_dynamic_attribute'):
if not self.allow_dynamic_attribute:
if not hasattr(self, name):
raise Exception
super().__setattr__(name, value)
#property
def plural(self):
return self._plural
#plural.setter
def plural(self, new_plural):
self._plural = new_plural
a = A()
print(a.abc) # fine
a.plural = "yes" # fine
print(a.plural) # fine
a.dkk = "bed" # raise exception
Or it can be more compact this way, I couldn't figure out how MethodType + super can get along together.
import types
def __setattr__(self, name, value):
if not hasattr(self, name):
raise Exception
else:
super().__setattr__(name,value) # this doesn't work for reason I don't know
class A:
def __init__(self):
self.foo = "hello"
# after this point, there's no more setattr for you
A.__setattr__ = types.MethodType(__setattr__, A)
a = A()
print(a.foo) # fine
a.bar = "bed" # raise exception
Yes, you can create private members that cannot be modified from outside the class. The variable name should start with two underscores:
class Test(object):
def __init__(self, t):
self.__t = t
def __str__(self):
return str(self.__t)
t = Test(2)
print(t) # prints 2
t.__t = 3
print(t) # prints 2
That said, trying to access such a variable as we do in t.__t = 3 will not raise an exception.
A different approach which you can take to achieve the wanted behavior is using functions. This approach will require "accessing attributes" using functional notation, but if that doesn't bother you, you can get exactly what you want. The following demo "hardcodes" the values, but obviously you can have Theorem() accept an argument and use it to set values to the attributes dynamically.
Demo:
# -*- coding: utf-8 -*-
def Theorem():
def f(attrib):
def proofs():
return ''
def plural():
return '◊◊◊◊◊◊◊◊'
if attrib == 'proofs':
return proofs()
elif attrib == 'plural':
return plural()
else:
raise ValueError("Attribute [{}] doesn't exist".format(attrib))
return f
t = Theorem()
print(t('proofs'))
print(t('plural'))
print(t('wait_for_error'))
OUTPUT
◊◊◊◊◊◊◊◊
Traceback (most recent call last):
File "/Users/alfasi/Desktop/1.py", line 40, in <module>
print(t('wait_for_error'))
File "/Users/alfasi/Desktop/1.py", line 32, in f
raise ValueError("Attribute [{}] doesn't exist".format(attrib))
ValueError: Attribute [wait_for_error] doesn't exist
Problem
I am trying to implement an error tolerant parser using Python Lex-Yacc (PLY), but I have trouble using error recovery rules at the end of my input string.
How can I recover from an unexpected end of input?
Example
This example grammar produces strings of the form A END A END A END A END ...
Statement : Expressions
Expressions : Expression Expressions
|
Expression : A END
I want to perform an error recovery if the END Token was omitted, so stings like A A A END or A A A will be recognized by the parser.
My approach
I added an error recovery rule, which allows me to accept input like A A A END
Expression : A END
| A error
Which allows me to accept the following input:
A A A END
But if the last END token is omitted (A A A), I still get a syntax error and cannot recover.
Sample PLY code
from __future__ import print_function
# Tokens
tokens = ('A', 'END')
t_A = r'A'
t_END = r'END'
t_ignore = " "
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Rules
def p_statement_expr(p):
'''statement : expressions'''
print("parsed:", p[1])
def p_expressions(p):
'''expressions : expression expressions'''
p[0] = [p[1]] + p[2]
def p_expressions_empty(p):
'''expressions : '''
p[0] = list()
def p_expression_pharse(p):
'''expression : A END
| A error'''
p[0] = 'A'
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOI")
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('query > ') # use input() on Python 3
except EOFError:
break
yacc.parse(s)
I add it as a new answer (and do know it is too late for the bounty :-( ) because it is a very different approach. If we used flex, it would be much easier, since it has the notion of the <<EOF>> token that matches only at end of file. After thinking about that, I realized that it was very simple to add that functionality to PLY without any change to the original module by using a proxy around the lexer. And Python allows easy implementation of proxies thanks the the __getattr__ special method.
I just add
a new token EOF that will be send at end of file
a proxy around the token method of the lexer that on end of file returns the special EOF token on first pass and then the normal None
the eof token to end statement rule
And still reverse the rule expressions : expressions expression instead of expressions : expression expressions to allow immediate reduce
The code becomes :
from __future__ import print_function
# Tokens
tokens = ('A', 'END', 'EOF')
t_A = r'A'
t_END = r'END'
t_ignore = " "
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
orig_lexer = lex.lex()
class ProxyLexer(object):
def __init__(self, lexer, eoftoken):
self.end = False
self.lexer = lexer
self.eof = eoftoken
def token(self):
tok = self.lexer.token()
if tok is None:
if self.end :
self.end = False
else:
self.end = True
tok = lex.LexToken()
tok.type = self.eof
tok.value = None
tok.lexpos = self.lexer.lexpos
tok.lineno = self.lexer.lineno
# print ('custom', tok)
return tok
def __getattr__(self, name):
return getattr(self.lexer, name)
lexer = ProxyLexer(orig_lexer, 'EOF')
# Rules
def p_statement_expr(p):
'''statement : expressions EOF'''
print("parsed:", p[1])
def p_expressions(p):
'''expressions : expressions expression'''
p[0] = p[1] + [p[2]]
def p_expressions_empty(p):
'''expressions : '''
p[0] = list()
def p_expression_pharse(p):
'''expression : A END
| A error'''
p[0] = 'A'
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOI")
import ply.yacc as yacc
parser = yacc.yacc()
while 1:
try:
s = raw_input('query > ') # use input() on Python 3
except EOFError:
break
parser.parse(s, lexer = lexer)
That way :
the original grammar is unchanged
the error recovery method remains stupidly simple and has no dependance on the remaining of the grammar
it can be easily extended to complex parsers
As you want to accept all elements, you can explicitely declare a rule for a A not followed by a END and use the fact that yacc and PLY friendly deal with ambiguous rules.
You can simply have a normal rule :
Expression : A END
and below a lower priority rule (as it comes later) that will issue a warning
Expression : A
That way, all A will be accepted, there won't be any syntax error, and the warning will be issued for any A not followed by a END including one at the end of the flow. In order to more easily find the offending A, I have added in the warning the position of the symbol in the flow.
Edit:
The script is modified to correctly deal with other syntax error (such as AENDENDAEND), and also to immediately reduce expressions by replacing expressions : expression expressions with expressions : expressions expression
Here is the modified script (tested in python 3.4 simply replacing raw_input with input):
from __future__ import print_function
# Tokens
tokens = ('A', 'END')
t_A = r'A'
t_END = r'END'
t_ignore = " "
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Rules
def p_statement_expr(p):
'''statement : expressions'''
print("parsed:", p[1])
def p_expressions(p):
'''expressions : expressions expression'''
p[0] = p[1] + [p[2]]
def p_expressions_err(p):
'''expressions : expressions error'''
p[0] = p[1]
def p_expressions_empty(p):
'''expressions : '''
p[0] = list()
def p_expression_pharse(p):
'''expression : A END'''
p[0] = 'A'
# add a separate rule BELOW previous one to display a warning
def p_expression_pharse_warn(p):
'''expression : A'''
print("Warning at absolute position %d (line %d)" % (p.lexpos(1), p.lineno(1)))
p[0] = 'A'
def p_error(p):
if p:
print("Syntax error at '%s'" % p.value)
else:
print("Syntax error at EOI")
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('query > ') # use input() on Python 3
except EOFError:
break
yacc.parse(s)
Edit : the following is an incorrect attempt to avoid an additional rule : it is more complex and less efficient than the above version. Please see my conclusion below
Edit per comment :
I understand your point that you do not want to multiply grammar rules. It is possible to be fault tolerant, except for last token. If your last token is in error, it will not be followed by anything and will never be caught in rule expression : A error.
But here is a fault tolerant parser that keeps everything except last token if case of error on that one :
from __future__ import print_function
# Tokens
tokens = ('A', 'END')
t_A = r'A'
t_END = r'END'
t_ignore = " "
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Rules
def p_statement_expr(p):
'''statement : expressions'''
# print("parsed:", p[1])
def p_expressions(p):
'''expressions : expressions expression'''
p[0] = p[1] + [p[2]]
result.append(p[2])
def p_expressions_empty(p):
'''expressions : '''
p[0] = list()
def p_expression_pharse(p):
'''expression : A END
| A error'''
p[0] = 'A'
def p_error(p):
if p:
global lasterr
print("Syntax error at '%s' (%d)" % (p.value, p.lexpos))
else:
print("Syntax error at EOI")
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = input('query > ') # use input() on Python 3
except EOFError:
break
result = []
yacc.parse(s)
print('Result', result)
The princip is to collate by expressions : expressions expression instead of expressions : expression expressions, and to keep all in a global variable.
With an input of A END A A END A A A END it gives
Result ['A', 'A', 'A', 'A', 'A', 'A']
and with : A END A A END A A A END , it gives
Result ['A', 'A', 'A', 'A', 'A']
(all tokens but the last)
With a true flex - bison solution, it would be possible to make use of the special <<EOF>> token that matches at end of input, to always have another token after the last one. Unfortunately, it is not implemented in PLY, and the only real solution is to introduce a rule that accepts alone A token. For a real parser, it also guarantees that you are actually processing the correct token : I used
def p_expression_pharse(p):
'''expression : A END'''
p[0] = 1 + p.lexpos(1)
# add a separate rule BELOW previous one to display a warning
def p_expression_pharse_warn(p):
'''expression : A'''
print("Warning at absolute position %d (line %d)" % (p.lexpos(1), p.lineno(1)))
p[0] = -1 - p.lexpos(1)
to uniquely identify tokens in resul string, and I get correct positions.
And ... the error processing is very simple ...
Discussion TL/DR :
I admit I missed the point of last token error recovery. It is because in all parsers I've seen in real use cases, the error recovery consisted in rejecting the part that was syntactically incorrect (and thus not directly useable) and re-synchonizing the parser on next correct group of token. In all what I have seen, if a partial sentence can be used, it must not be processed by the error recovery mechanizme but by a grammar rule, in which it is easy to describe the appropriate action.
If you just want to keep the offending input for later processing, I think it is not a problem of action depending of a syntax, and I would simply note the position of offending token, or at most note the position of last correctly analysed token (the end of a complete element), the begin of first error recovery token and say that what is between is incorrect.
But it would be much different than what is asked here ...
This works for all examples I could imagine
from __future__ import print_function
# Tokens
tokens = ('A', 'END')
t_A = r'A'
t_END = r'END'
t_ignore = " "
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
# Build the lexer
import ply.lex as lex
lex.lex()
# Rules
def p_statement_expr(p):
'''statement : expressions'''
#
print("parsed:", p[1])
def p_expressions(p):
'''expressions : expression expressions'''
p[0] = p[1] + p[2]
def p_expressions_empty(p):
'''expressions : '''
p[0] = list()
def p_expression_pharse(p):
'''expression : A END'''
p[0] = ['A']
def p_expression_error(p):
'''expression : A error'''
p[0] = ['A']
if p[2] is not None:
p[0] += p[2]
def p_error(p):
if p is None:
print("Syntax error at EOI")
e = yacc.YaccSymbol()
e.type = 'error'
e.value = None
yacc.errok()
return e
elif p.type == 'error':
yacc.errok()
return
elif hasattr(p, 'value'):
print("Syntax error at '%s'" % p.value)
e = yacc.YaccSymbol()
e.type = 'error'
e.value = p.value
yacc.errok()
return e
import ply.yacc as yacc
yacc.yacc()
while 1:
try:
s = raw_input('query > ') # use input() on Python 3
except EOFError:
break
yacc.parse(s)