How do I configure spring-kafka to ignore messages in the wrong format? - error-handling

We have an issue with one of our Kafka topics which is consumed by the DefaultKafkaConsumerFactory & ConcurrentMessageListenerContainer combination described here with a JsonDeserializer used by the Factory. Unfortunately someone got a little enthusiastic and published some invalid messages onto the topic. It appears that spring-kafka silently fails to process past the first of these messages. Is it possible to have spring-kafka log an error and continue? Looking at the error messages which are logged it seems that perhaps the Apache kafka-clients library should deal with the case that when iterating a batch of messages one or more of them may fail to parse?
The below code is an example test case illustrating this issue:
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.junit.ClassRule;
import org.junit.Test;
import org.springframework.kafka.core.DefaultKafkaConsumerFactory;
import org.springframework.kafka.core.DefaultKafkaProducerFactory;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.kafka.listener.KafkaMessageListenerContainer;
import org.springframework.kafka.listener.MessageListener;
import org.springframework.kafka.listener.config.ContainerProperties;
import org.springframework.kafka.support.SendResult;
import org.springframework.kafka.support.serializer.JsonDeserializer;
import org.springframework.kafka.support.serializer.JsonSerializer;
import org.springframework.kafka.test.rule.KafkaEmbedded;
import org.springframework.kafka.test.utils.ContainerTestUtils;
import org.springframework.util.concurrent.ListenableFuture;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.springframework.kafka.test.hamcrest.KafkaMatchers.hasKey;
import static org.springframework.kafka.test.hamcrest.KafkaMatchers.hasValue;
/**
* #author jfreedman
*/
public class TestSpringKafka {
private static final String TOPIC1 = "spring.kafka.1.t";
#ClassRule
public static KafkaEmbedded embeddedKafka = new KafkaEmbedded(1, true, 1, TOPIC1);
#Test
public void submitMessageThenGarbageThenAnotherMessage() throws Exception {
final BlockingQueue<ConsumerRecord<String, JsonObject>> records = createListener(TOPIC1);
final KafkaTemplate<String, JsonObject> objectTemplate = createPublisher("json", new JsonSerializer<JsonObject>());
sendAndVerifyMessage(records, objectTemplate, "foo", new JsonObject("foo"), 0L);
// push some garbage text to Kafka which cannot be marshalled, this should not interrupt processing
final KafkaTemplate<String, String> garbageTemplate = createPublisher("garbage", new StringSerializer());
final SendResult<String, String> garbageResult = garbageTemplate.send(TOPIC1, "bar","bar").get(5, TimeUnit.SECONDS);
assertEquals(1L, garbageResult.getRecordMetadata().offset());
sendAndVerifyMessage(records, objectTemplate, "baz", new JsonObject("baz"), 2L);
}
private <T> KafkaTemplate<String, T> createPublisher(final String label, final Serializer<T> serializer) {
final Map<String, Object> producerProps = new HashMap<>();
producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, embeddedKafka.getBrokersAsString());
producerProps.put(ProducerConfig.CLIENT_ID_CONFIG, "TestPublisher-" + label);
producerProps.put(ProducerConfig.ACKS_CONFIG, "all");
producerProps.put(ProducerConfig.RETRIES_CONFIG, 2);
producerProps.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, 1);
producerProps.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 5000);
producerProps.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, 5000);
producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, serializer.getClass());
final DefaultKafkaProducerFactory<String, T> pf = new DefaultKafkaProducerFactory<>(producerProps);
pf.setValueSerializer(serializer);
return new KafkaTemplate<>(pf);
}
private BlockingQueue<ConsumerRecord<String, JsonObject>> createListener(final String topic) throws Exception {
final Map<String, Object> consumerProps = new HashMap<>();
consumerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, embeddedKafka.getBrokersAsString());
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "TestConsumer");
consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true);
consumerProps.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "100");
consumerProps.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 15000);
consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class);
final DefaultKafkaConsumerFactory<String, JsonObject> cf = new DefaultKafkaConsumerFactory<>(consumerProps);
cf.setValueDeserializer(new JsonDeserializer<>(JsonObject.class));
final KafkaMessageListenerContainer<String, JsonObject> container = new KafkaMessageListenerContainer<>(cf, new ContainerProperties(topic));
final BlockingQueue<ConsumerRecord<String, JsonObject>> records = new LinkedBlockingQueue<>();
container.setupMessageListener((MessageListener<String, JsonObject>) records::add);
container.setBeanName("TestListener");
container.start();
ContainerTestUtils.waitForAssignment(container, embeddedKafka.getPartitionsPerTopic());
return records;
}
private void sendAndVerifyMessage(final BlockingQueue<ConsumerRecord<String, JsonObject>> records,
final KafkaTemplate<String, JsonObject> template,
final String key, final JsonObject value,
final long expectedOffset) throws InterruptedException, ExecutionException, TimeoutException {
final ListenableFuture<SendResult<String, JsonObject>> future = template.send(TOPIC1, key, value);
final ConsumerRecord<String, JsonObject> record = records.poll(5, TimeUnit.SECONDS);
assertThat(record, hasKey(key));
assertThat(record, hasValue(value));
assertEquals(expectedOffset, future.get(5, TimeUnit.SECONDS).getRecordMetadata().offset());
}
public static final class JsonObject {
private String value;
public JsonObject() {}
JsonObject(final String value) {
this.value = value;
}
public String getValue() {
return value;
}
public void setValue(final String value) {
this.value = value;
}
#Override
public boolean equals(final Object o) {
if (this == o) { return true; }
if (o == null || getClass() != o.getClass()) { return false; }
final JsonObject that = (JsonObject) o;
return Objects.equals(value, that.value);
}
#Override
public int hashCode() {
return Objects.hash(value);
}
#Override
public String toString() {
return "JsonObject{" +
"value='" + value + '\'' +
'}';
}
}
}

I have a solution but I don't know if it's the best one, I extended JsonDeserializer as follows which results in a null value being consumed by spring-kafka and requires the necessary downstream changes to handle that case.
class SafeJsonDeserializer[A >: Null](targetType: Class[A], objectMapper: ObjectMapper) extends JsonDeserializer[A](targetType, objectMapper) with Logging {
override def deserialize(topic: String, data: Array[Byte]): A = try {
super.deserialize(topic, data)
} catch {
case e: Exception =>
logger.error("Failed to deserialize data [%s] from topic [%s]".format(new String(data), topic), e)
null
}
}

Starting from the spring-kafka-2.x.x, we now have the comfort of declaring beans in the config file for the interface KafkaListenerErrorHandler with a implementation something as
#Bean
public ConsumerAwareListenerErrorHandler listen3ErrorHandler() {
return (m, e, c) -> {
this.listen3Exception = e;
MessageHeaders headers = m.getHeaders();
c.seek(new org.apache.kafka.common.TopicPartition(
headers.get(KafkaHeaders.RECEIVED_TOPIC, String.class),
headers.get(KafkaHeaders.RECEIVED_PARTITION_ID, Integer.class)),
headers.get(KafkaHeaders.OFFSET, Long.class));
return null;
};
}
more resources can be found at https://docs.spring.io/spring-kafka/reference/htmlsingle/#annotation-error-handling There is also another link with the similar issue: Spring Kafka error handling - v1.1.x and How to handle SerializationException after deserialization

Use ErrorHandlingDeserializer2. This is a delegating key/value deserializer that catches exceptions, returning them in the headers as serialized java objects.
Under consumer configuration, add/update the below lines:
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.springframework.kafka.support.serializer.ErrorHandlingDeserializer2
configProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
classOf[ErrorHandlingDeserializer2[JsonDeserializer]].getName)
configProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[ErrorHandlingDeserializer2[StringDeserializer]].getName)
configProps.put(ErrorHandlingDeserializer2.KEY_DESERIALIZER_CLASS, classOf[StringDeserializer].getName)
configProps.put(ErrorHandlingDeserializer2.VALUE_DESERIALIZER_CLASS, classOf[JsonDeserializer].getName)

Related

Controlling job execution based on exceptions in simple chunk job in Spring Batch

I'm having simple chunk CSV processing job.
I would like to change execution flow when there is particular type of error during processing (eg. invalid line structure)
In order to prevent throwing errors I need to provide custom exceptionHandler that will swallow parsing exception:
#Bean
fun processCsvStep(
stepBuilderFactory: StepBuilderFactory,
reader: ItemReader<InputRow>,
processor: ItemProcessor<InputRow, OutputObject>,
writer: ItemWriter<OutputObject>
) = stepBuilderFactory.get(PROCESS_CSV_STEP)
.chunk<InputRow, OutputObject>(
CHUNKS_NUMBER
)
.reader(reader)
.processor(processor)
.writer(writer)
.exceptionHandler { context: RepeatContext, throwable: Throwable ->
context.setTerminateOnly()
logger.error { "Exception during parsing: ${throwable.message}" }
}
.build()!!
Then in my Job I can rely only on rollback count:
#Bean
fun createCsvJob(jobs: JobBuilderFactory, processCsvStep: Step, moveCsvStep: Step, moveFailedCsvStep: Step) = jobs.get(PROCESS_CSV_JOB)
.start(processCsvStep)
.next { jobExecution: JobExecution, stepExecution: StepExecution ->
return#next when (stepExecution.rollbackCount) {
0 -> FlowExecutionStatus.COMPLETED
else -> FlowExecutionStatus.FAILED
}
}
.on(FlowExecutionStatus.FAILED.name)
.to(moveFailedCsvStep)
.on(FlowExecutionStatus.COMPLETED.name)
.to(moveCsvStep)
.end()
.build()!!
Is there any way to pass information from exception handler to JobExecutionDecider? I would like to make execution decision based on type of exception that happened during parsing. Is this possible?
I would like to make execution decision based on type of exception that happened during parsing. Is this possible?
You can get access to the exception that happened during the step from the decider through stepExecution#getFailureExceptions. Here is an example:
import java.util.Arrays;
import java.util.List;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.job.flow.FlowExecutionStatus;
import org.springframework.batch.core.job.flow.JobExecutionDecider;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
#Configuration
#EnableBatchProcessing
public class MyJob {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
#Bean
public ItemReader<Integer> itemReader() {
return new ListItemReader<>(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
}
#Bean
public ItemWriter<Integer> itemWriter() {
return items -> {
for (Integer item : items) {
if (items.contains(3)) {
throw new IllegalArgumentException("no 3!");
}
System.out.println("item = " + item);
}
};
}
#Bean
public Step step1() {
return steps.get("step1")
.<Integer, Integer>chunk(5)
.reader(itemReader())
.writer(itemWriter())
.build();
}
#Bean
public Step step2() {
return steps.get("step2")
.tasklet((contribution, chunkContext) -> {
System.out.println("step2");
return RepeatStatus.FINISHED;
})
.build();
}
#Bean
public Step step3() {
return steps.get("step3")
.tasklet((contribution, chunkContext) -> {
System.out.println("step3");
return RepeatStatus.FINISHED;
})
.build();
}
#Bean
public JobExecutionDecider decider() {
return (jobExecution, stepExecution) -> {
int rollbackCount = stepExecution.getRollbackCount();
List<Throwable> failureExceptions = stepExecution.getFailureExceptions();
System.out.println("rollbackCount = " + rollbackCount);
System.out.println("failureExceptions = " + failureExceptions);
// make the decision based on rollbackCount and/or failureExceptions and return status accordingly
return FlowExecutionStatus.COMPLETED;
};
}
#Bean
public Job job() {
return jobs.get("job")
.start(step1())
.on("*").to(decider())
.from(decider()).on("COMPLETED").to(step2())
.from(decider()).on("FAILED").to(step3())
.build()
.build();
}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(MyJob.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
jobLauncher.run(job, new JobParameters());
}
}
In this example, if an exception occurs during step1, the decider can get it from the step execution and make the decision accordingly (go to step2 or step3).
So I'm not sure you really need an exception handler and a way to pass information to the decider. The same idea applies of you want to make the decision based on the rollbackCount, commitCount, readCount, or any other metric.
Hope this helps.

AbstractStringBuilder.ensureCapacityInternal get NullPointerException in storm bolt

online system, the storm Bolt get NullPointerException,though I think I check it before line 61; It gets NullPointerException once in a while;
import ***.KeyUtils;
import ***.redis.PipelineHelper;
import ***.redis.PipelinedCacheClusterClient;
import **.redis.R2mClusterClient;
import org.apache.commons.lang3.StringUtils;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.util.Map;
/**
* RedisBolt batch operate
*/
public class RedisBolt implements IRichBolt {
static final long serialVersionUID = 737015318988609460L;
private static ApplicationContext applicationContext;
private static long logEmitNumber = 0;
private static StringBuffer totalCmds = new StringBuffer();
private Logger logger = LoggerFactory.getLogger(getClass());
private OutputCollector _collector;
private R2mClusterClient r2mClusterClient;
#Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
_collector = outputCollector;
if (applicationContext == null) {
applicationContext = new ClassPathXmlApplicationContext("spring/spring-config-redisbolt.xml");
}
if (r2mClusterClient == null) {
r2mClusterClient = (R2mClusterClient) applicationContext.getBean("r2mClusterClient");
}
}
#Override
public void execute(Tuple tuple) {
String log = tuple.getString(0);
String lastCommands = tuple.getString(1);
try {
//log count
if (StringUtils.isNotEmpty(log)) {
logEmitNumber++;
}
if (StringUtils.isNotEmpty(lastCommands)) {
if(totalCmds==null){
totalCmds = new StringBuffer();
}
totalCmds.append(lastCommands);//line 61
}
//日志数量控制
int numberLimit = 1;
String flow_log_limit = r2mClusterClient.get(KeyUtils.KEY_PIPELINE_LIMIT);
if (StringUtils.isNotEmpty(flow_log_limit)) {
try {
numberLimit = Integer.parseInt(flow_log_limit);
} catch (Exception e) {
numberLimit = 1;
logger.error("error", e);
}
}
if (logEmitNumber >= numberLimit) {
StringBuffer _totalCmds = new StringBuffer(totalCmds);
try {
//pipeline submit
PipelinedCacheClusterClient pip = r2mClusterClient.pipelined();
String[] commandArray = _totalCmds.toString().split(KeyUtils.REDIS_CMD_SPILT);
PipelineHelper.cmd(pip, commandArray);
pip.sync();
pip.close();
totalCmds = new StringBuffer();
} catch (Exception e) {
logger.error("error", e);
}
logEmitNumber = 0;
}
} catch (Exception e) {
logger.error(new StringBuffer("====RedisBolt error for log=[ ").append(log).append("] \n commands=[").append(lastCommands).append("]").toString(), e);
_collector.reportError(e);
_collector.fail(tuple);
}
_collector.ack(tuple);
}
#Override
public void cleanup() {
}
#Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
#Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
exception info:
java.lang.NullPointerException at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:113) at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:415) at java.lang.StringBuffer.append(StringBuffer.java:237) at com.jd.jr.dataeye.storm.bolt.RedisBolt.execute(RedisBolt.java:61) at org.apache.storm.daemon.executor$fn__5044$tuple_action_fn__5046.invoke(executor.clj:727) at org.apache.storm.daemon.executor$mk_task_receiver$fn__4965.invoke(executor.clj:459) at org.apache.storm.disruptor$clojure_handler$reify__4480.onEvent(disruptor.clj:40) at org.apache.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:472) at org.apache.storm.utils.DisruptorQueue.consumeBatchWhenAvailable(DisruptorQueue.java:451) at org.apache.storm.disruptor$consume_batch_when_available.invoke(disruptor.clj:73) at org.apache.storm.daemon.executor$fn__5044$fn__5057$fn__5110.invoke(executor.clj:846) at org.apache.storm.util$async_loop$fn__557.invoke(util.clj:484) at clojure.lang.AFn.run(AFn.java:22) at java.lang.Thread.run(Thread.java:745)
can anyone give me some advice to find the reason.
That is really odd thing to happen. Please read the code for two classes.
https://github.com/openjdk-mirror/jdk7u-jdk/blob/master/src/share/classes/java/lang/AbstractStringBuilder.java
https://github.com/openjdk-mirror/jdk7u-jdk/blob/master/src/share/classes/java/lang/StringBuffer.java
AbstractStringBuilder has constructor with no args which doesn't allocate the field 'value', which makes accessing the 'value' field being NPE. Any constructors in StringBuffer use that constructor. So maybe some odd thing happens in serialization/deserialization and unfortunately 'value' field in AbstractStringBuilder is being null.
Maybe initializing totalCmds in prepare() would be better, and also you need to consider synchronization (thread-safety) between bolts. prepare() can be called per bolt instance so fields are thread-safe, but class fields are not thread-safe.
I think I find the problem maybe;
the key point is
"StringBuffer _totalCmds = new StringBuffer(totalCmds);" and " totalCmds.append(lastCommands);//line 61"
when new a object, It takes serval steps:
(1) allocate memory and return reference
(2) initialize
if append after (1) and before (2) then the StringBuffer.java extends AbstractStringBuilder.java
/**
* The value is used for character storage.
*/
char[] value;
value is not initialized;so this will get null:
#Override
public synchronized void ensureCapacity(int minimumCapacity) {
if (minimumCapacity > value.length) {
expandCapacity(minimumCapacity);
}
}
this blot has a another question, some data maybe lost under a multithreaded environment

How to serialize a Predicate<T> from Nashorn engine in java 8

How can i serialize a predicate obtained from java ScriptEngine nashorn? or how can i cast jdk.nashorn.javaadapters.java.util.function.Predicate to Serializable?
Here is the case:
I have this class
import java.io.Serializable;
import java.util.function.Predicate;
public class Filter implements Serializable {
private Predicate<Object> filter;
public Predicate<Object> getFilter() {
return filter;
}
public void setFilter(Predicate<Object> filter) {
this.filter = filter;
}
public boolean evaluate(int value) {
return filter.test(value);
}
}
and
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.function.Predicate;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
public class TestFilterSer {
public static void main(String[] args) throws ScriptException {
Filter f = new Filter();
//This works
//f.setFilter(getCastedPred());
// But I want this to work
f.setFilter(getScriptEnginePred());
System.out.println(f.evaluate(6));
try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File("pred.ser")))) {
oos.writeObject(f);
} catch (IOException e) {
e.printStackTrace();
}
f= null;
try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File("pred.ser")))) {
f= (Filter)ois.readObject();
} catch (IOException | ClassNotFoundException e) {
e.printStackTrace();
}
System.out.println(f.evaluate(7));
}
public static Predicate<Object> getCastedPred() {
Predicate<Object> isEven = (Predicate<Object> & Serializable)(i) -> (Integer)i%2 == 0;
return isEven;
}
public static Predicate<Object> getScriptEnginePred() throws ScriptException {
ScriptEngine engine = new ScriptEngineManager().getEngineByName("nashorn");
Predicate<Object> p = (Predicate<Object> & Serializable)engine.eval(
String.format("new java.util.function.Predicate(%s)", "function(i) i%2==0")
);
return p;
}
}
Requirement: To be able to serialize the Predicate obtained from Nashorn engine.
Observation: When I get Predicate from method getCastedPred(). It works because it is java.util.function.Predicate. it does Serialize after casting to Serializable. But when I get the Predicate from the Nashorn engine, Internally it returns me the jdk.nashorn.javaadapters.java.util.function.Predicate this one doesn't Serialize and casting to Serializable doesn't work.
Any idea how can i serialize this type of Predicate?
The problem is your API uses Predicate, not AggregateFilter. So the target type for the lambda in
setAggregatePredicate(x -> true)
will be Predicate, not AggregateFilter -- and the compiler won't know to make it serializable. If you change your API to use the more specific functional interface, serializable lambdas will be generated.

Spring restController: how to error when unknown #RequestParam is in url

I'm using spring 4.2 to create some restfull webservices.
But we realized that when a user mistypes one of the not-mandatory #RequestParam, we do not get an error that the param he passed is unknown.
like we have #RequestParam(required=false, value="valueA") String value A and in the call he uses '?valuueA=AA' -> we want an error.
But I do not seem to find a way to do this, the value is just ignored and the user is unaware of this.
One possible solution would be to create an implementation of HandlerInterceptor which will verify that all request parameters passed to the handler method are declared in its #RequestParam annotated parameters.
However you should consider the disadvantages of such solution. There might be situations where you want to allow certain parameters to be passed in and not be declared as request params. For instance if you have request like GET /foo?page=1&offset=0 and have handler with following signature:
#RequestMapping
public List<Foo> listFoos(PagingParams page);
and PagingParams is a class containing page and offset properties, it will normally be mapped from the request parameters. Implementation of a solution you want would interfere with this Spring MVC'c functionality.
That being said, here is a sample implementation I had in mind:
public class UndeclaredParamsHandlerInterceptor extends HandlerInterceptorAdapter {
#Override
public boolean preHandle(HttpServletRequest request, HttpServletResponse response,
Object handler) throws Exception {
if (handler instanceof HandlerMethod) {
HandlerMethod handlerMethod = (HandlerMethod) handler;
checkParams(request, getDeclaredRequestParams(handlerMethod));
}
return true;
}
private void checkParams(HttpServletRequest request, Set<String> allowedParams) {
request.getParameterMap().entrySet().forEach(entry -> {
String param = entry.getKey();
if (!allowedParams.contains(param)) {
throw new UndeclaredRequestParamException(param, allowedParams);
}
});
}
private Set<String> getDeclaredRequestParams(HandlerMethod handlerMethod) {
Set<String> declaredRequestParams = new HashSet<>();
MethodParameter[] methodParameters = handlerMethod.getMethodParameters();
ParameterNameDiscoverer parameterNameDiscoverer = new DefaultParameterNameDiscoverer();
for (MethodParameter methodParameter : methodParameters) {
if (methodParameter.hasParameterAnnotation(RequestParam.class)) {
RequestParam requestParam = methodParameter.getParameterAnnotation(RequestParam.class);
if (StringUtils.hasText(requestParam.value())) {
declaredRequestParams.add(requestParam.value());
} else {
methodParameter.initParameterNameDiscovery(parameterNameDiscoverer);
declaredRequestParams.add(methodParameter.getParameterName());
}
}
}
return declaredRequestParams;
}
}
Basically this will do what I described above. You can then add exception handler for the exception it throws and translate it to HTTP 400 response. I've put more of an complete sample on Github, which includes a way to selectively enable this behavior for individual handler methods via annotation.
I translated Bohuslav Burghardt's solution for Spring WebFlux applications.
I dropped the #DisallowUndeclaredRequestParams annotation class from GitHub because I didn't need it -- it just applies the filter to all HandlerMethods. But someone else could update this answer and put it back.
package com.example.springundeclaredparamerror;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.core.DefaultParameterNameDiscoverer;
import org.springframework.core.MethodParameter;
import org.springframework.core.ParameterNameDiscoverer;
import org.springframework.http.HttpStatus;
import org.springframework.http.server.reactive.ServerHttpRequest;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.method.HandlerMethod;
import org.springframework.web.reactive.result.method.annotation.RequestMappingHandlerMapping;
import org.springframework.web.server.ServerWebExchange;
import org.springframework.web.server.WebFilter;
import org.springframework.web.server.WebFilterChain;
import reactor.core.publisher.Mono;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
/**
* Handler interceptor used for ensuring that no request params other than those explicitly
* declared via {#link RequestParam} parameters of the handler method are passed in.
*/
// Implementation translated into WebFlux WebFilter from:
// https://github.com/bohuslav-burghardt/spring-sandbox/tree/master/handler-interceptors/src/main/java/handler_interceptors
#Component
public class DisallowUndeclaredParamsFilter implements WebFilter {
private static final Logger LOGGER = LoggerFactory.getLogger(DisallowUndeclaredParamsFilter.class);
#Autowired
#Qualifier("requestMappingHandlerMapping")
RequestMappingHandlerMapping mapping;
#Autowired
ObjectMapper mapper;
#Override
public Mono<Void> filter(ServerWebExchange serverWebExchange, WebFilterChain webFilterChain) {
Object o = mapping.getHandler(serverWebExchange).toFuture().getNow(null);
Optional<String> undeclaredParam = Optional.empty();
if (o != null && o instanceof HandlerMethod) {
var handlerMethod = (HandlerMethod) o;
undeclaredParam = checkParams(serverWebExchange.getRequest(),
getDeclaredRequestParams(handlerMethod));
}
return undeclaredParam.map((param) -> RespondWithError(serverWebExchange, param))
.orElseGet(() -> webFilterChain.filter(serverWebExchange));
}
/** Responds to the request with an error message for the given undeclared parameter. */
private Mono<Void> RespondWithError(ServerWebExchange serverWebExchange, String undeclaredParam) {
final HttpStatus status = HttpStatus.BAD_REQUEST;
serverWebExchange.getResponse().setStatusCode(status);
serverWebExchange.getResponse().getHeaders().add(
"Content-Type", "application/json");
UndeclaredParamErrorResponse response = new UndeclaredParamErrorResponse();
response.message = "Parameter not expected: " + undeclaredParam;
response.statusCode = status.value();
String error = null;
try {
error = mapper.writeValueAsString(response);
} catch (JsonProcessingException e) {
error = "Parameter not expected; error generating JSON response";
LOGGER.warn("Error generating JSON response for undeclared argument", e);
}
return serverWebExchange.getResponse().writeAndFlushWith(
Mono.just(Mono.just(serverWebExchange.getResponse().bufferFactory().wrap(
error.getBytes(StandardCharsets.UTF_8)))));
}
/** Structure for generating error JSON. */
static class UndeclaredParamErrorResponse {
public String message;
public int statusCode;
}
/**
* Check that all of the request params of the specified request are contained within the specified set of allowed
* parameters.
*
* #param request Request whose params to check.
* #param allowedParams Set of allowed request parameters.
* #return Name of a param in the request that is not allowed, or empty if all params in the request are allowed.
*/
private Optional<String> checkParams(ServerHttpRequest request, Set<String> allowedParams) {
return request.getQueryParams().keySet().stream().filter(param ->
!allowedParams.contains(param)
).findFirst();
}
/**
* Extract all request parameters declared via {#link RequestParam} for the specified handler method.
*
* #param handlerMethod Handler method to extract declared params for.
* #return Set of declared request parameters.
*/
private Set<String> getDeclaredRequestParams(HandlerMethod handlerMethod) {
Set<String> declaredRequestParams = new HashSet<>();
MethodParameter[] methodParameters = handlerMethod.getMethodParameters();
ParameterNameDiscoverer parameterNameDiscoverer = new DefaultParameterNameDiscoverer();
for (MethodParameter methodParameter : methodParameters) {
if (methodParameter.hasParameterAnnotation(RequestParam.class)) {
RequestParam requestParam = methodParameter.getParameterAnnotation(RequestParam.class);
if (StringUtils.hasText(requestParam.value())) {
declaredRequestParams.add(requestParam.value());
} else {
methodParameter.initParameterNameDiscovery(parameterNameDiscoverer);
declaredRequestParams.add(methodParameter.getParameterName());
}
}
}
return declaredRequestParams;
}
}
Here's the unit test I wrote for it. I recommend checking it into your codebase as well.
package com.example.springundeclaredparamerror;
import com.github.tomakehurst.wiremock.junit.WireMockRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.reactive.WebFluxTest;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.reactive.server.WebTestClient;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import reactor.core.publisher.Mono;
import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.wireMockConfig;
#RunWith(SpringRunner.class)
#WebFluxTest(controllers = {DisallowUndeclaredParamFilterTest.TestController.class})
public class DisallowUndeclaredParamFilterTest {
private static final String TEST_ENDPOINT = "/disallowUndeclaredParamFilterTest";
#Rule
public final WireMockRule wireMockRule = new WireMockRule(wireMockConfig().dynamicPort());
#Autowired
private WebTestClient webClient;
#Configuration
#Import({TestController.class, DisallowUndeclaredParamsFilter.class})
static class TestConfig {
}
#RestController
static class TestController {
#GetMapping(TEST_ENDPOINT)
public Mono<String> retrieveEntity(#RequestParam(name = "a", required = false) final String a) {
return Mono.just("ok");
}
}
#Test
public void testAllowsNoArgs() {
webClient.get().uri(TEST_ENDPOINT).exchange().expectBody(String.class).isEqualTo("ok");
}
#Test
public void testAllowsDeclaredArg() {
webClient.get().uri(TEST_ENDPOINT + "?a=1").exchange().expectBody(String.class).isEqualTo("ok");
}
#Test
public void testDisallowsUndeclaredArg() {
webClient.get().uri(TEST_ENDPOINT + "?b=1").exchange().expectStatus().is4xxClientError();
}
}

Neo4j error caused by Lucene (Too many open files)

I've just started evaluating Neo4j to see how well its fits our use case.
I'm using the embedded Java API to insert edges and nodes into a graph.
After creating around 5000 nodes I get the following error (using Neo4j 2.1.6 and 2.1.7 on OS X Yosemite)
org.neo4j.graphdb.TransactionFailureException: Unable to commit transaction
Caused by: javax.transaction.xa.XAException
Caused by: org.neo4j.kernel.impl.nioneo.store.UnderlyingStorageException: java.io.FileNotFoundException: /Users/mihir.k/IdeaProjects/Turant/target/neo4j-hello-db/schema/label/lucene/_8zr.frq (Too many open files)
Caused by: java.io.FileNotFoundException: /Users/mihir.k/IdeaProjects/Turant/target/neo4j-hello-db/schema/label/lucene/_8zr.frq (Too many open files)
I've looked at numerous similar StackOverFlow questions and other related threads online. They all suggest increasing the max open files limit.
I've tried doing that.
These are my settings:
kern.maxfiles: 65536
kern.maxfilesperproc: 65536
However this hasn't fixed the error.
While the Neo4j code runs I tried using the lsof|wc -l command. The code always breaks when around 10000 files are open.
The following is the main class that deals with Neo4j:
import java.io.File;
import java.io.Serializable;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.neo4j.cypher.internal.compiler.v1_9.commands.True;
import org.neo4j.cypher.internal.compiler.v2_0.ast.False;
import org.neo4j.graphdb.*;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.graphdb.schema.Schema;
import org.neo4j.graphdb.schema.IndexDefinition;
import org.neo4j.graphdb.index.UniqueFactory;
import org.neo4j.graphdb.index.Index;
import org.neo4j.graphdb.index.IndexHits;
public class Neo4jDB implements Serializable {
private static final String DB_PATH = "target/neo4j-hello-db-spark";
IndexDefinition indexDefinition;
private static GraphDatabaseFactory dbFactory;
public static GraphDatabaseService db;
public void main(String[] args) {
System.out.println("Life is a disease, sexually transmitted and irrevocably fatal. Stop coding and read some Neil Gaiman.");
}
public void startDbInstance() {
db =new GraphDatabaseFactory().newEmbeddedDatabase(DB_PATH);
}
public Node createOrGetNode ( LabelsUser360 label , String key, String nodeName ,Map<String,Object> propertyMap)
{
System.out.println("Creating/Getting node");
try ( Transaction tx = db.beginTx() ) {
Node node;
if (db.findNodesByLabelAndProperty(label, key, nodeName).iterator().hasNext()) {
node = db.findNodesByLabelAndProperty(label, key, nodeName).iterator().next();
} else {
node = db.createNode(label);
node.setProperty(key, nodeName);
}
for (Map.Entry<String, Object> entry : propertyMap.entrySet()) {
node.setProperty(entry.getKey(), entry.getValue());
}
tx.success();
return node;
}
}
public void createUniquenessConstraint(LabelsUser360 label , String property)
{
try ( Transaction tx = db.beginTx() )
{
db.schema()
.constraintFor(label)
.assertPropertyIsUnique(property)
.create();
tx.success();
}
}
public void createOrUpdateRelationship(RelationshipsUser360 relationshipType ,Node startNode, Node endNode, Map<String,Object> propertyMap)
{
try ( Transaction tx = db.beginTx() ) {
if (startNode.hasRelationship(relationshipType, Direction.OUTGOING)) {
Relationship relationship = startNode.getSingleRelationship(relationshipType, Direction.OUTGOING);
for (Map.Entry<String, Object> entry : propertyMap.entrySet()) {
relationship.setProperty(entry.getKey(), entry.getValue());
}
} else {
Relationship relationship = startNode.createRelationshipTo(endNode, relationshipType);
for (Map.Entry<String, Object> entry : propertyMap.entrySet()) {
relationship.setProperty(entry.getKey(), entry.getValue());
}
}
tx.success();
}
}
public void registerShutdownHook( final GraphDatabaseService graphDb )
{
Runtime.getRuntime().addShutdownHook( new Thread()
{
#Override
public void run()
{
db.shutdown();
}
} );
}
}
There is another Neo4jAdapter class that is used to implement domain specific logic. It uses the Neo4jDB class to do add/update nodes/properties/relationships
import org.apache.lucene.index.IndexWriter;
import org.codehaus.jackson.map.ObjectMapper;
import org.json.*;
import org.neo4j.graphdb.*;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.graphdb.schema.IndexDefinition;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class Neo4jAdapter implements Serializable {
static Neo4jDB n4j = new Neo4jDB();
public static GraphDatabaseService db = Neo4jDB.db ;
public void begin() {
n4j.startDbInstance();
}
public static void main(String[] args) {}
public String graphPut(String jsonString) {
System.out.println("graphput called");
HashMap<String, Object> map = jsonToMap(jsonString); //Json deserializer
Node startNode = n4j.createOrGetNode(...);
Node endNode = n4j.createOrGetNode(...);
propertyMap = new HashMap<String, Object>();
propertyMap.put(....);
try (Transaction tx = Neo4jDB.db.beginTx()) {
Relationship relationship = startNode.getSingleRelationship(...);
if (relationship != null) {
Integer currentCount = (Integer) relationship.getProperty("count");
Integer updatedCount = currentCount + 1;
propertyMap.put("count", updatedCount);
} else {
Integer updatedCount = 1;
propertyMap.put("count", updatedCount);
}
tx.success();
}
n4j.createOrUpdateRelationship(RelationshipsUser360.BLAH, startNode, endNode, propertyMap);
}
}
}
return "Are you sponge worthy??";
}
}
Finally, there is a Sprak App that calls the "graphput" method of the Neo4jAdapter class. The relevant code snippet is (the following is scala+spark code) :
val graphdb : Neo4jAdapter = new Neo4jAdapter()
graphdb.begin()
linesEnriched.foreach(a=>graphdb.graphPutMap(a))
where 'a' is a json string and linesEnriched is a Spark RDD (basically a set of strings)