Mule batch job taking too long to enrich data

Mule batch job taking too long to enrich data - mule

I am trying to enrich the data to create an XML file.
The first query does a Group By to obtain the transaction header.
The second query gets all records (details) that match the header from the same file, to enrich the message.
The problem is that it takes about a second to run the query that enriches the data. I will need to run this process for 184,764 headers. At one second per header this job will take too long. Is there a way to accomplish the same thing without having to query the database for details? Can all the records be loaded first and obtain the details from memory instead? Here's the code:
<db:generic-config name="Generic_Database_Configuration" url="${db.url}"
driverClassName="${driver.class.name}" doc:name="Generic Database
Configuration"/>
<data-mapper:config name="List_Map__To_List_Map_"
transformationGraphPath="list_map__to_list_map_.grf"
doc:name="List_Map__To_List_Map_"/>
<data-mapper:config name="List_Map__To_XML_1"
transformationGraphPath="list_map__to_xml_1.grf"
doc:name="List_Map__To_XML_1"/>
<batch:job name="OrceTransactionImportBatch">
<batch:input>
<db:select config-ref="Generic_Database_Configuration"
doc:name="Database">
<db:parameterized-query><![CDATA[SELECT TRANDATED, STORED, REG#D
AS REG_D, TRAN#D AS TRAN_D, VIP#D AS VIP_D, VIP#D AS VIPNO, SUM(RETAIL*QTY)
AS TOTAL,
CONCAT(SUBSTRING(TRANDATED,1,4),
CONCAT('-',CONCAT(SUBSTRING(TRANDATED,5,2),
CONCAT('-',CONCAT(SUBSTRING(TRANDATED,7,2),'T00:00:00'))))) AS
BusinessDayDate
FROM ORCTEXDTLP
WHERE DGROUPID IN (SELECT HGROUPID FROM ORCTEXHDRP WHERE HPRCFLAG = 'P')
GROUP BY STORED, TRANDATED, REG#D, TRAN#D, VIP#D
FETCH FIRST 60 ROWS ONLY]]></db:parameterized-query>
</db:select>
<logger message="before mapper..." level="INFO" doc:name="before
mapper..."/>
</batch:input>
<batch:process-records>
<batch:step name="Batch_Step">
<data-mapper:transform config-ref="List_Map__To_List_Map_"
doc:name="List<Map> To List<Map>"/>
<logger message="before enricher..." level="INFO"
doc:name="before enricher..."/>
</batch:step>
<batch:step name="Batch_Step1">
<logger message="BEFORE FOR EACH..." level="INFO"
doc:name="Logger"/>
<enricher target="#[variable:LineItem]" doc:name="Message
Enricher">
<db:select config-ref="Generic_Database_Configuration"
doc:name="Database">
<db:parameterized-query><![CDATA[SELECT TRANCODED,
CONCAT(SUBSTRING(TRANDATED,1,4),
CONCAT('-',CONCAT(SUBSTRING(TRANDATED,5,2),
CONCAT('-',CONCAT(SUBSTRING(TRANDATED,7,2),'T00:00:00'))))) AS
BusinessDayDate, STORED AS RetailStoreID, TRAN#D AS TransactionNumber, REG#D
AS WorkstationID, RETAIL AS TransactionGrandAmount, VIP#D AS AlternateID,
DISCOUNT, VOUCHER#D AS VOUCHER_D, TRIM(SKU#) AS ItemID, A03K2 AS
UnitCostPrice, RETAIL AS RegularSalesUnitPrice, (RETAIL*QTY) AS
ExtendedAmount, QTY AS Quantity, ROW_NUMBER() OVER () rownumber,
(RETAIL*QTY) AS ActualRetail,
VOUCHERCD AS VoucherCode, VOUCHER#D AS VoucherNumber
FROM FBF02P
LEFT OUTER JOIN KSK2P ON SKUK2 = SKU#
WHERE TRANDATED = #[payload[0]['TRANDATED']] AND STORED = #[payload[0]
['STORED']] AND REG#D = #[payload[0]['REG_D']] AND TRAN#D = #[payload[0]
['TRAN_D']]]]></db:parameterized-query>
</db:select>
</enricher>
<expression-component doc:name="Expression"><![CDATA[#
[payload[0].LineItem=flowVars.LineItem]]]></expression-component>
<logger message="#[payload[0]['TRAN_D']]" level="INFO"
doc:name="Logger"/>
</batch:step>
<batch:step name="Batch_Step2">
<batch:commit streaming="true" doc:name="Batch Commit">
<data-mapper:transform config-ref="List_Map__To_XML_1"
doc:name="List<Map> To XML"/>
<file:outbound-endpoint path="${output.path}"
outputPattern="TranImport#[server.dateTime.format('yyyyMMdd_HHmmss')].xml"
responseTimeout="10000" doc:name="File"/>
</batch:commit>
</batch:step>
</batch:process-records>
<batch:on-complete>
<logger message="DONE..." level="INFO" doc:name="Logger"/>
</batch:on-complete>
</batch:job>
<flow name="OrceTransactionImportFlow">
<poll doc:name="Poll">
<fixed-frequency-scheduler frequency="1" timeUnit="DAYS"/>
<db:update config-ref="Generic_Database_Configuration"
doc:name="Database">
<db:parameterized-query><![CDATA[UPDATE ORCTEXHDRP
SET HPRCFLAG = 'P'
WHERE HPRCFLAG = '' OR HPRCFLAG = 'P']]></db:parameterized-query>
</db:update>
</poll>
<choice doc:name="Choice">
<when expression="#[payload == 0]">
<logger message="Zero payload..." level="INFO"
doc:name="Logger"/>
</when>
<otherwise>
<batch:execute name="OrceTransactionImportBatch"
doc:name="OrceTransactionImportBatch"/>
</otherwise>
</choice>
</flow>

Inside your database connector configuration you should setup a Connection Pooling profile.

Related

Mule flow is not triggering

I am using Groovy inside Poll to check file existence at given location.
My flow is working fine, when there is a file. But If I delete that file, flow is not triggering.Below is my code
<flow name="monitor-dst-file-flow">
<poll doc:name="Poll">
<schedulers:cron-scheduler expression="0 0 23 ? * TUE-SAT"/>
<scripting:transformer doc:name="Groovy">
<scripting:script engine="Groovy"><![CDATA[def endpointBuilder = muleContext.endpointFactory.getEndpointBuilder(
"sftp://${user}:${pwForGroovy}#${host}:${port}${inputpath}/?connector=SFTP")
endpointBuilder.addMessageProcessor(new org.mule.routing.MessageFilter(new org.mule.transport.file.filters.FilenameWildcardFilter('test.txt')))
def inboundEndpoint = endpointBuilder.buildInboundEndpoint()
inboundEndpoint.request(30000L)]]>
</scripting:transformer>
</poll>
<choice doc:name="Choice">
<when expression="#[message.inboundProperties.originalFilename =="test.txt"]">
<logger level="INFO" doc:name="Logger" message="File Exists..."/>
</when>
<otherwise>
<logger message="FILE EXISTS" level="ERROR" doc:name="Logger"/>
<flow-ref name="email-notification-sub-flow" doc:name="Flow Reference"/>
</otherwise>
</choice>
</flow>
Here, if there is no test.txt file, I am not able to debug Choice component.
It says -Polling of monitor-dst-file-flow returned null, the flow will not be invoked.
I am not able to identify the exact solution to run my flow. I have to handle that condition, where given file is not there.

You need to return something besides null from the poller's target for the flow to be invoked. I'd recommend doing this in a sub-flow:
<flow name="monitor-dst-file-flow">
<poll doc:name="Poll">
<schedulers:cron-scheduler expression="0 0 23 ? * TUE-SAT"/>
<flow-ref name="pollerProcessor" doc:name="pollerProcessor"/>
</poll>
<choice doc:name="Choice">
<when expression="#[payload == 'file not found']">
<logger level="INFO" doc:name="Logger" message="File Exists..."/>
</when>
<otherwise>
<logger message="FILE EXISTS" level="ERROR" doc:name="Logger"/>
<flow-ref name="email-notification-sub-flow" doc:name="Flow Reference"/>
</otherwise>
</choice>
</flow>
<sub-flow name="pollerProcessor">
<scripting:transformer doc:name="Groovy">
<scripting:script engine="Groovy"><![CDATA[def endpointBuilder = muleContext.endpointFactory.getEndpointBuilder(
"sftp://${user}:${pwForGroovy}#${host}:${port}${inputpath}/?connector=SFTP")
endpointBuilder.addMessageProcessor(new org.mule.routing.MessageFilter(new org.mule.transport.file.filters.FilenameWildcardFilter('test.txt')))
def inboundEndpoint = endpointBuilder.buildInboundEndpoint() inboundEndpoint.request(30000L)]]>
</scripting:script>
</scripting:transformer>
<set-payload value="#[payload == null ? 'file not found' : payload]" doc:name="Set Payload"/>
</sub-flow>

Mule DB data retrieval into chunks

We are trying to extract approx. 40 GB data from database and want to generate multiple csv files. We used mule DB connector in streaming fashion, which is returning 'ResultSetIterator'
Q1) How to convert this ResultSetIterator to arraylist? or any readable format which we can use further to generate files
Q2) We tried using For-Each component to split this data in chunks, its working for limited set of data and for huge data giving SerializationException
In below input snippet we are making chunks of data using for-each and providing it to batch process for multiple files
<batch:job name="testBatchWithDBOutside">
<batch:input>
<logger message="#[payload]" level="INFO" doc:name="Logger"/>
</batch:input>
<batch:process-records>
<batch:step name="Batch_Step">
<batch:commit size="10" doc:name="Batch Commit">
<object-to-string-transformer doc:name="Object to String"/>
<logger message="#[payload]" level="INFO" doc:name="Logger"/>
<file:outbound-endpoint path="C:\output" outputPattern="#[message.id].txt" responseTimeout="10000" doc:name="File"/>
</batch:commit>
</batch:step>
</batch:process-records>
</batch:job>
<flow name="testBatchWithDBOutsideFlow" processingStrategy="synchronous">
<file:inbound-endpoint path="C:\input" responseTimeout="10000" doc:name="File"/>
<db:select config-ref="MySQL_Configuration" streaming="true" fetchSize="10" doc:name="Database">
<db:parameterized-query><![CDATA[select * from classicmodels]]></db:parameterized-query>
</db:select>
<foreach batchSize="5" doc:name="For Each">
<batch:execute name="testBatchWithDBOutside" doc:name="testBatchWithDBOutside"/>
</foreach>
</flow>

Q1. You don't want to convert the Iterator to a List, as this will defeat the purpose of streaming from the DB connector and load all records into memory. Mule handles Iterators and Lists in the same way anyway.
Q2. The batch module implies a for-each operation. The output of batch:input needs to be a List or an Iterator. You should be able to simplify this
<batch:job name="testBatch">
<batch:input>
<db:select config-ref="MySQL_Configuration" streaming="true" fetchSize="10" doc:name="Database">
<db:parameterized-query><![CDATA[select * from classicmodels]]></db:parameterized-query>
</db:select>
</batch:input>
<batch:process-records>
<batch:step name="Batch_Step">
<object-to-string-transformer doc:name="Object to String"/>
<file:outbound-endpoint path="C:\output" outputPattern="#[message.id].txt" responseTimeout="10000" doc:name="File"/>
</batch:step>
</batch:process-records>
</batch:job>
You will also need to replace the object-to-string-transformer with a component that converts a database record (the payload at this point will be a map where the key is the column name, and the value is the record value) into a csv line.
You can find a decent example in the Mule blog here: https://blogs.mulesoft.com/dev/anypoint-platform-dev/batch-module-reloaded/
Another option would be to remove the batch processor and use DataWeave to generate csv output and stream it to the file. This might be helpful: https://docs.mulesoft.com/mule-user-guide/v/3.7/dataweave-streaming
Dataweave will call next on the ResultSetIterator as it processes each record, and that Iterator will handle selecting chunks of records from the underlying database, so there is no queueing in between steps, or loading the full dataset into memory.
<flow name="batchtestFlow">
<http:listener config-ref="HTTP_Listener_Configuration" path="/batch" allowedMethods="GET" doc:name="HTTP"/>
<db:select config-ref="Generic_Database_Configuration" streaming="true" doc:name="Database">
<db:parameterized-query><![CDATA[select * from Employees]]></db:parameterized-query>
</db:select>
<dw:transform-message doc:name="Transform Message">
<dw:set-payload><![CDATA[%dw 1.0
%input payload application/java
%output application/csv streaming=true, header=true, quoteValues=true
---
payload map ((e, i) -> {
surname: e.SURNAME,
firstname: e.FIRST_NAME
})]]></dw:set-payload>
</dw:transform-message>
<file:outbound-endpoint path="C:/tmp" outputPattern="testbatchfile.csv" connector-ref="File" responseTimeout="10000" doc:name="File"/>
</flow>

You want to use OutputHander. Make sure you have streaming turned on and then use a script component, for instance select groovy and handle each row one at a time like so:
// script.groovy
return {evt, out ->
payload.each { row ->
out << row.SOMECOLUMN.... }
} as OutputHandler
And the component in your xml
<scripting:transformer returnClass="TODO" doc:name="ScriptComponent">
<scripting:script engine="Groovy" file="script.groovy" />
</scripting:transformer>
If you want to return some output. However if you want to write to a file in your case you wouldn't use the variable out but instead write to your files.

I found a simple and quickest way as below:
Here DB connector is in streaming mode, and For-Each is splitting the records in given Batch Size
<flow name="testFlow" processingStrategy="synchronous">
<composite-source doc:name="Composite Source">
<quartz:inbound-endpoint jobName="test" cronExpression="0 48 13 1/1 * ? *" repeatInterval="0" connector-ref="Quartz" responseTimeout="10000" doc:name="Quartz">
<quartz:event-generator-job/>
</quartz:inbound-endpoint>
<http:listener config-ref="HTTP_Listener_Configuration" path="/hit" doc:name="HTTP"/>
</composite-source>
<db:select config-ref="MySQL_Configuration" streaming="true" fetchSize="10000" doc:name="Database">
<db:parameterized-query><![CDATA[SELECT * FROM tblName]]></db:parameterized-query>
</db:select>
<foreach batchSize="10000" doc:name="For Each">
<dw:transform-message doc:name="Transform Message">
<dw:set-payload><![CDATA[%dw 1.0
%output application/csv
---
payload map {
field1:$.InterfaceId,
field2:$.Component
}]]></dw:set-payload>
</dw:transform-message>
<file:outbound-endpoint path="F:\output" outputPattern="#[message.id].csv" responseTimeout="10000" doc:name="File"/>
</foreach>
<set-payload value="*** Success ***" doc:name="Set Payload"/>
</flow>

Expected return type java.lang.Iterable

How to convert type=java.lang.String to type=java.lang.Iterable as batch step (Process Records) is expecting type of java.lang.Iterable. Note : Input is an xml file and mule flow is a batch job.
When the xml has only one 'Report_Entry' record below error is recieved. For multiple entries of 'Report_Entry' flow works fine.
<object-to-string-transformer doc:name="Object to String"/>
<logger message="#[payload]" level="INFO" doc:name="Logger"/>
<set-payload
value="#[xpath3('/*:Report_Data/*:Report_Entry', payload, 'NODESET')]" doc:name="Set Payload"/>
<logger message="XML Record - #[payload]" level="INFO" doc:name="Logger"/>
</batch:input>
<batch:process-records>
<batch:step name="Batch_Step1">
<json:object-to-json-transformer doc:name="Object to JSON"/>
<logger message="XML Record - #[payload]" level="INFO" doc:name="Logger"/>
<amqp:outbound-endpoint exchangeName="${amqp.exchangeName}" queueName="${amqp.queueName}" responseTimeout="10000" encoding="UTF-8" mimeType="application/xml" connector-ref="AMQP_Connector" doc:name="AMQP"/>
</batch:step>
</batch:process-records>
In the logger it is printing 'org.mule.api.processor.LoggerMessageProcessor: XML Record - net.sf.saxon.dom.DOMNodeList#57d263b4' after the set-payload condition. Our requirement is to convert the xml record to JSON and write to AMQP.

That's because of the splitter. If you just want a collection/iterable before the batch job, jsut use set-payload:
<set-payload
value="#[xpath3('/*:Report_Data/*:Report_Entry', payload, 'NODESET')]" />
<batch:execute name="test" />
This should wok regardless of the amount nodes.

Mule:Polling on multiple table of a database connector

Need to poll multiple tables of a database connector. When trying to apply separate poll on tables using composite source
<composite-source>
<poll>
<db:select config-ref="databaseConnector"/> <!--select on table 1-->
</poll>
<poll>
<db:select config-ref="databaseConnector"/> <!--select on table 2-->
</poll>
</composite-source>
getting an error poller already registered on endpoint uri. How can i poll multiple tables for updated data using a database connector.

Use three flows:
<flow name="poll-table-1">
<poll frequency="...">...</poll>
<flow-ref name="table-data-processor" />
</flow>
<flow name="poll-table-2">
<poll frequency="...">...</poll>
<flow-ref name="table-data-processor" />
</flow>
<flow name="table-data-processor">
...
</flow>

You can try the following way:-
<composite-source>
<poll frequency="10000" doc:name="Poll">
<processor-chain >
<db:select config-ref="Oracle_Configuration" doc:name="Database">
<db:parameterized-query><![CDATA[select * from Table1]]></db:parameterized-query>
</db:select>
<logger level="INFO" message="Your Payload from Table1:- ....." doc:name="Logger"/>
<db:select config-ref="Oracle_Configuration" doc:name="Database">
<db:parameterized-query><![CDATA[select * from Table2]]></db:parameterized-query>
</db:select>
<logger level="INFO" message="Your Payload from Table2:- ...." doc:name="Logger"/>
</processor-chain>
</poll>
</composite-source>
<logger level="INFO" message="The remaining flow " doc:name="Logger"/>
This is working fine for me :)

Mule ESB: How to handle Exception in Mule Batch through MEL

I'm using batch in mule for the first time, not sure how to handle the exceptions for batch records.
Records getting failed input phase, but not able to catch failure exception either in input phase logger and also in Batch step( Failure flow)logger. Perhaps MEL #[inputPhaseException] itself throwing exception.
<batch:job name="Batch1" max-failed-records="-1">
<batch:threading-profile poolExhaustedAction="WAIT"/>
<batch:input>
<file:inbound-endpoint path="C:\IN" responseTimeout="10000" doc:name="File"/>
<component class="com.General" doc:name="Java"/>
<logger message="InputPhase: #[inputPhaseException]" level="INFO" doc:name="Logger"/>
</batch:input>
<batch:process-records>
<batch:step name="Batch_Step" accept-policy="ALL" ">
<data-mapper:transform config-ref="Pojo_To_CSV" doc:name="Pojo To CSV"/>
<file:outbound-endpoint path="C:\Users\OUT" outputPattern="#[function:dateStamp]_product.csv" responseTimeout="10000" doc:name="File"/>
</batch:step>
<batch:step name="FailureFlow" accept-policy="ONLY_FAILURES">
<logger message="Inside Failure: #[getStepExceptions()], Loading Phase: #[failureExceptionForStep],#[inputPhaseException] " level="ERROR" doc:name="Logger"/>
</batch:step>
</batch:process-records>
<batch:on-complete>
<logger level="INFO" doc:name="Logger" message=" On Complete: #[payload.loadedRecords] Loaded Records #[payload.failedRecords] Failed Records"/>
</batch:on-complete>
</batch:job>
Is there is any restriction for batch MEL to be used only inputphase and certain MEL on Process Record and Complete. Because i tried keep most of the get..Exception{} in Failure flow, it is throwing error.
Please suggest, Thanks in advance.

Yes .. you are right .. #[inputPhaseException] is causing all the issues ..
I have modified your Mule flow and you can try the following :-
<batch:job name="Batch1" max-failed-records="-1">
<batch:threading-profile poolExhaustedAction="WAIT"/>
<batch:input>
<file:inbound-endpoint path="C:\IN" responseTimeout="10000" doc:name="File"/>
<component class="com.General" doc:name="Java"/>
</batch:input>
<batch:process-records>
<batch:step name="Batch_Step" accept-policy="ALL" ">
<data-mapper:transform config-ref="Pojo_To_CSV" doc:name="Pojo To CSV"/>
<file:outbound-endpoint path="C:\Users\OUT" outputPattern="#[function:dateStamp]_product.csv" responseTimeout="10000" doc:name="File"/>
</batch:step>
<batch:step name="Batch_Failed">
<logger doc:name="Logger" level="ERROR" message="Record with the following payload has failed. Payload:: #[message.payload], Loading Phase: #[failureExceptionForStep], Inside Failure the exception is :- #[getStepExceptions()]" />
</batch:step>
</batch:process-records>
<batch:on-complete>
<logger message="Number of failed Records: #[payload.failedRecords] " level="INFO" doc:name="Failed Records" />
<logger level="INFO" doc:name="Logger" message=" Number of loadedRecord: #[payload.loadedRecords]"/>
<logger message="Number of sucessfull Records: #[payload.successfulRecords]" level="INFO" doc:name="Sucessfull Records" />
<logger message="ElapsedTime #[payload.getElapsedTimeInMillis()]" level="INFO" doc:name="Elapsed Time" />
</batch:on-complete>
</batch:job>

Yes, You need to use in The On complete Phase as it acts Finally block to collect the Successful and Unsuccessful Batch results.
https://dzone.com/articles/handle-errors-your-batch-job%E2%80%A6

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Mule batch job taking too long to enrich data - mule

Inside your database connector configuration you should setup a Connection Pooling profile.

Related

Mule flow is not triggering

Mule DB data retrieval into chunks

Expected return type java.lang.Iterable

Mule:Polling on multiple table of a database connector

Mule ESB: How to handle Exception in Mule Batch through MEL

Categories

Resources