Mule - CSV Lookup Table - mule

I have a mule flow with DataMapper configuration to map XML to JSON, in that I have a requirement to Lookup a CSV file which contains, for example, two columns and four rows to map to one of the JSON output field.
Below are my sample CSV file contents:
Name,Contact
Ram,111-222
Kumar,222-333
John,333-444
I got below exception while running DataMapper:
Element [CSV:CSV]-Pre-Execution of lookup table [CSV:CSV]failed
Error when parsing record #2 field Name
Caused by: java.lang.RuntimeException: Parsing error: Unexpected record delimiter, probably record has too few fields.
When the CSV file contains only one row there is no exception.
Could any one provide me a way to solve the issue? Thanks in advance.
EDIT: Please find the DataMapper configuration
<mule>
<data-mapper:config name="XML_To_JSON_1" transformationGraphPath="xml_to_json_1.grf" doc:name="XML_To_JSON" />
<flow name="mule-csv-lookupFlow1" doc:name="mule-csv-lookupFlow1">
<http:inbound-endpoint exchange-pattern="request-response" host="localhost" port="9090" doc:name="HTTP" />
<data-mapper:transform config-ref="XML_To_JSON_1" doc:name="XML To JSON" />
</flow>
</mule>
Below is the .grf file
<?xml version="1.0" encoding="UTF-8"?><Graph __version="3.5.0" author="pradeep" created="Wed Dec 03 13:06:48 IST 2014" description="XML To JSON" guiVersion="3.4.4.P" id="1417593604462" licenseCode="Unlicensed" licenseType="Unknown" modified="Wed Dec 03 13:06:48 IST 2014" modifiedBy="pradeep" name="XML_To_JSON" preview-file="${Project_Home}/src/main/resources/employee.xml" revision="1.0" showComponentDetails="false">
<Global>
<Metadata __index="0" __referenceCounter="1" __sourcePath="{}/object/contact" _dataStructure="SINGLE_DIMENSIONAL_COLLECTION" _id="__id" _metadataParentId="4430d2fe-5b06-4e39-b259-492c325fe164" _parent_id="__parent_id" _type="Output" id="f11ab87d-ec49-4034-a277-1e96a57b7925">
<Record fieldDelimiter="," name="contact" recordDelimiter="\n\\|\r\n\\|\r" type="delimited">
<Field __artificialType="_parent_id" __systemManaged="true" name="__parent_id" type="string"/>
<Field __artificialType="_id" __systemManaged="true" name="__id" type="string"/>
<Field __index="0" __sourcePath="{}/object/contact/name" containerType="SINGLE" label="name" name="name" type="string"/>
<Field __index="1" __sourcePath="{}/object/contact/phone" containerType="SINGLE" label="phone" name="phone" type="string"/>
</Record>
</Metadata>
<Metadata _type="Lookup" id="963db4af-a7a1-428c-be6b-39af261e93d4">
<Record fieldDelimiter="," name="CSV" recordDelimiter="\n\\|\r\n\\|\r" type="delimited">
<Field containerType="SINGLE" label="firstname" name="firstname" size="10" type="string"/>
<Field containerType="SINGLE" eofAsDelimiter="true" label="phone" name="phone" size="10" type="string"/>
</Record>
</Metadata>
<Metadata __index="0" __referenceCounter="1" __schemaType="employeeType" __sourcePath="{}/employees/employee" _dataStructure="SINGLE_DIMENSIONAL_COLLECTION" _id="__id" _metadataParentId="80f1c157-e81d-439d-afa8-e475de2dfe36" _parent_id="__parent_id" _type="Input" id="019cda9c-1078-4d00-bdcc-f81a59df5292">
<Record fieldDelimiter="," name="employee" recordDelimiter="\n\\|\r\n\\|\r" type="delimited">
<Field __artificialType="_parent_id" __systemManaged="true" name="__parent_id" type="string"/>
<Field __artificialType="_id" __systemManaged="true" name="__id" type="string"/>
<Field __index="0" __schemaType="employeeType" __sourcePath="{}/employees/employee/#firstname" containerType="SINGLE" label="firstname" name="firstname" type="string"/>
<Field __index="1" __schemaType="employeeType" __sourcePath="{}/employees/employee/#lastname" containerType="SINGLE" label="lastname" name="lastname" type="string"/>
<Field __index="2" __schemaType="employeeType" __sourcePath="{}/employees/employee/#gender" containerType="SINGLE" label="gender" name="gender" type="string"/>
<Field __index="3" __schemaType="employeeType" __sourcePath="{}/employees/employee/#phone" containerType="SINGLE" label="phone" name="phone" type="string"/>
<Field __index="4" __schemaType="employeeType" __sourcePath="{}/employees/employee/#text()" containerType="SINGLE" label="text()" name="text" type="string"/>
</Record>
</Metadata>
<Metadata __filteredMetadata="false" __index="0" __referenceCounter="1" __schemaType="employeesType" __sourcePath="{}/employees" _dataStructure="OBJECT" _id="__id" _type="Input" id="80f1c157-e81d-439d-afa8-e475de2dfe36">
<Record fieldDelimiter="," name="employees" recordDelimiter="\n\\|\r\n\\|\r" type="delimited">
<Field __artificialType="_id" __systemManaged="true" name="__id" type="string"/>
</Record>
</Metadata>
<Metadata __index="0" __referenceCounter="1" __sourcePath="{}/object" _dataStructure="OBJECT" _id="__id" _type="Output" id="4430d2fe-5b06-4e39-b259-492c325fe164">
<Record fieldDelimiter="," name="object" recordDelimiter="\n\\|\r\n\\|\r" type="delimited">
<Field __artificialType="_id" __systemManaged="true" name="__id" type="string"/>
</Record>
</Metadata>
<LookupTable fileURL="D:/1_Ram/DEV/Workspace/mule-workspace/mule-datamapper-lookup/src/main/resources/staff.txt" id="CSV" key="phone" metadata="963db4af-a7a1-428c-be6b-39af261e93d4" name="CSV" type="simpleLookup">
<attr name="lookupType"><![CDATA[CSV]]></attr>
</LookupTable>
<Dictionary>
<Entry id="DictionaryEntry0" input="true" name="inputPayload" output="false" type="object"/>
<Entry id="DictionaryEntry1" input="false" name="outputPayload" output="true" type="object"/>
</Dictionary>
</Global>
<Phase number="0">
<Node enabled="enabled" guiName="Foreach 'employees' -> 'object'" guiX="460" guiY="20" id="FOREACH_EMPLOYEES_OBJECT" transformClass="com.mulesoft.datamapper.transform.MelRecordTransform" type="REFORMAT">
<attr name="melScript"><![CDATA[//MEL
//START -> DO NOT REMOVE
output.__id = input.__id;
//END -> DO NOT REMOVE
]]></attr>
</Node>
<Node enabled="enabled" guiName="Foreach 'employee' -> 'contact'" guiX="460" guiY="120" id="FOREACH_EMPLOYEE_CONTACT" transformClass="com.mulesoft.datamapper.transform.MelRecordTransform" type="REFORMAT">
<attr name="melScript"><![CDATA[//MEL
//START -> DO NOT REMOVE
output.__id = input.__id;
output.__parent_id = input.__parent_id;
//END -> DO NOT REMOVE
output.phone = input.phone + (isnull(lookup("CSV").get([input.firstname])) ? null : lookup("CSV").get([input.firstname]).phone);
output.name = input.lastname + ' ' + input.firstname;]]></attr>
</Node>
<Node cacheInMemory="true" charset="UTF-8" enabled="enabled" fileURL="dict:outputPayload" guiName="JSON WRITER" guiX="900" guiY="20" id="JSON_WRITER0" type="JSON_WRITER">
<attr name="mapping"><![CDATA[<?xml version="1.0" encoding="UTF-8"?>
<object xmlns:clover="http://www.cloveretl.com/ns/xmlmapping" clover:inPort="0">
<clover:collection clover:name="contact">
<item clover:inPort="1" clover:key="__parent_id" clover:parentKey="__id">
<name>$1.name</name>
<phone>$1.phone</phone>
</item>
</clover:collection>
</object>]]></attr>
<attr name="__isJsonSchema"><![CDATA[false]]></attr>
<attr name="_data_format"><![CDATA[JSON]]></attr>
<attr name="jsonSchemaURL"><![CDATA[./src/main/resources/contacts.json]]></attr>
</Node>
<Node charset="UTF-8" dataPolicy="strict" enabled="enabled" fileURL="dict:inputPayload" guiName="XML READER" guiX="20" guiY="20" id="XML_READER0" type="XML_READER">
<attr name="xmlSchemaURL"><![CDATA[./src/main/resources/employees.xsd]]></attr>
<attr name="mapping"><![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<Context outPort="0" sequenceField="__id" xpath="/employees">
<Context generatedKey="__parent_id" outPort="1" parentKey="__id" sequenceField="__id" xpath="employee">
<Mapping cloverField="firstname" trim="true" xpath="#firstname"/>
<Mapping cloverField="lastname" trim="true" xpath="#lastname"/>
<Mapping cloverField="gender" trim="true" xpath="#gender"/>
<Mapping cloverField="phone" trim="true" xpath="#phone"/>
<Mapping cloverField="text" trim="true" xpath="text()"/>
</Context>
</Context>
]]></attr>
<attr name="_data_format"><![CDATA[XML]]></attr>
<attr name="__rootGlobalElementName"><![CDATA[employees]]></attr>
</Node>
<Edge debugMode="true" fromNode="FOREACH_EMPLOYEES_OBJECT:0" guiBendpoints="" id="Edge1" inPort="Port 0 (in)" metadata="4430d2fe-5b06-4e39-b259-492c325fe164" outPort="Port 0 (out)" toNode="JSON_WRITER0:0"/>
<Edge debugMode="true" fromNode="FOREACH_EMPLOYEE_CONTACT:0" guiBendpoints="" id="Edge3" inPort="Port 1 (in)" metadata="f11ab87d-ec49-4034-a277-1e96a57b7925" outPort="Port 0 (out)" toNode="JSON_WRITER0:1"/>
<Edge debugMode="true" fromNode="XML_READER0:0" guiBendpoints="" id="Edge0" inPort="Port 0 (in)" metadata="80f1c157-e81d-439d-afa8-e475de2dfe36" outPort="Port 0 (out)" toNode="FOREACH_EMPLOYEES_OBJECT:0"/>
<Edge debugMode="true" fromNode="XML_READER0:1" guiBendpoints="" id="Edge2" inPort="Port 0 (in)" metadata="019cda9c-1078-4d00-bdcc-f81a59df5292" outPort="Port 1 (out)" toNode="FOREACH_EMPLOYEE_CONTACT:0"/>
</Phase>
</Graph>

I noticed the following issues in your .grf file:
1. CSV Record fields are firstname and phone while in your actual csv file they are Name and Contact (I assume the CSV sample in your original post matches what in staff.txt file).
2. LookupTable key is phone while you lookup based on name in your phone mapping lookup("CSV").get([input.firstname]).phone

Related

SQL BCP Unknown Error while attempting to read format file on Ubuntu

I'm trying to automate some importing jobs to my SQL Server database and have the following XML format file:
<?xml version="1.0"?>
<BCPFORMAT xmlns="http://schemas.microsoft.com/sqlserver/2004/bulkload/format" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<RECORD>
<FIELD ID="1" xsi:type="CharFixed" LENGTH="11" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="2" xsi:type="CharFixed" LENGTH="30" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="3" xsi:type="CharFixed" LENGTH="2" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="4" xsi:type="CharFixed" LENGTH="2" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="5" xsi:type="CharFixed" LENGTH="2" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="6" xsi:type="CharFixed" LENGTH="1" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="7" xsi:type="CharFixed" LENGTH="1" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="8" xsi:type="CharFixed" LENGTH="2" COLLATION="SQL_Latin1_General_CP1_CI_AS" />
<FIELD ID="9" xsi:type="CharTerm" TERMINATOR="\n"/>
</RECORD>
<ROW>
<COLUMN SOURCE="1" NAME="cuit" xsi:type="SQLNUMERIC" PRECISION="11" />
<COLUMN SOURCE="2" NAME="denominacion" xsi:type="SQLVARYCHAR" LENGTH="30" />
<COLUMN SOURCE="3" NAME="imp_ganancias" xsi:type="SQLVARYCHAR" LENGTH="2" />
<COLUMN SOURCE="4" NAME="imp_iva" xsi:type="SQLVARYCHAR" LENGTH="2" />
<COLUMN SOURCE="5" NAME="monotributo" xsi:type="SQLVARYCHAR" LENGTH="2" />
<COLUMN SOURCE="6" NAME="integra_sociedad" xsi:type="SQLVARYCHAR" LENGTH="1" />
<COLUMN SOURCE="7" NAME="empleador" xsi:type="SQLVARYCHAR" LENGTH="1" />
<COLUMN SOURCE="8" NAME="actividad_monotributo" xsi:type="SQLVARYCHAR" LENGTH="2" />
</ROW>
</BCPFORMAT>
And it works fine on my local machine running windows 10 if I run the bcp command
bcp myDB.dbo.myTable IN File.txt -f File.xml -U x -P x -S myIp
But the problem is that I need to leave this commands running on a remote machine running on Ubuntu and when I do I get the following error
SQLState = S1000, NativeError = 0
Error = [Microsoft][ODBC Driver 17 for SQL Server]Format File : Unknown error occurred while attempting to read.
And I have no Idea why this is happening, I tried changing permissions on the file and running as root but nothing changes.

index all files inside a folder in solr

I am having troubles indexing a folder in solr
example-data-config.xml:
<dataConfig>
<dataSource type="BinFileDataSource" />
<document>
<entity name="files"
dataSource="null"
rootEntity="false"
processor="FileListEntityProcessor"
baseDir="C:\Temp\" fileName=".*"
recursive="true"
onError="skip">
<field column="fileAbsolutePath" name="id" />
<field column="fileSize" name="size" />
<field column="fileLastModified" name="lastModified" />
<entity
name="documentImport"
processor="TikaEntityProcessor"
url="${files.fileAbsolutePath}"
format="text">
<field column="file" name="fileName"/>
<field column="Author" name="author" meta="true"/>
<field column="text" name="text"/>
</entity>
</entity>
</document>
then I create the schema.xml:
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="fileName" type="string" indexed="true" stored="true" />
<field name="author" type="string" indexed="true" stored="true" />
<field name="title" type="string" indexed="true" stored="true" />
<field name="size" type="plong" indexed="true" stored="true" />
<field name="lastModified" type="pdate" indexed="true" stored="true" />
<field name="text" type="text_general" indexed="true" stored="true" multiValued="true"/>
finally I modify the file solrConfig.xml adding the requesthandler and the dataImportHandler and dataImportHandler-extra jars:
<requestHandler name="/dataimport" class="solr.DataImportHandler">
<lst name="defaults">
<str name="config">example-data-config.xml</str>
</lst>
</requestHandler>
I run it and the result is:
Inside that folder there are like 20.000 files in diferent formats (.py,.java,.wsdl, etc)
Any suggestion will be appreciated. Thanks :)
Check your Solr logs . Answer for what is the Root Cause will definitely be there . I also faced same situation once and found through solr logs that my DataImportHandler was throwing exceptions because of encrypted documents present in the folder . Your reasons may be different, but first analyze your solr logs, execute your entity again in DataImport section, and then check the immediate logs for errors by going on the logging section on admin page . If you are getting errors other than I what I mentioned , post them here , so they can be understood and deciphered .

Solr: how to query particuler entity when multiple

I am starting to learn Solr (using version 5.5.0). I am using managed-schema and data-congif.xml files to inex two sql server tables: Company & Contact.
I am able to execute from the UI, the data import, selecting one entity at a time.
This is the message I get for Company:
Indexing completed. Added/Updated: 8,293 documents. Deleted 0 documents. (Duration: 01s)
Requests: 1 (1/s), Fetched: 8,293 (8,293/s), Skipped: 0, Processed: 8,293 (8,293/s) Started: less than a minute ago
This is the message I get for Contact:
Indexing completed. Added/Updated: 81 documents. Deleted 0 documents.
Requests: 1, Fetched: 81, Skipped: 0, Processed: 81
Started: less than a minute ago
When I click the Query section, I want to perform a query to see all the Contact, and/ or Company records, not necessarily combined, but just be able to query them.
I am not sure how to do this, is it possible to get some help to understand how to specify against which entity I want to execute the query?
Here are the 2 files I modified:
data-cofig.xml:
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.microsoft.sqlserver.jdbc.SQLServerDriver"
url="jdbc:sqlserver://sql.server.com\test;databaseName=test"
user="testusr"
password="testpwd"/>
<document>
<entity name="Company" pk="CompanyID" query="SELECT * FROM tblCompany">
<field column="CompanyID" name="company_companyid"/>
<field column="Name" name="company_name"/>
<field column="Website" name="company_website"/>
<field column="Description" name="company_description"/>
<field column="NumberOfEmployees" name="company_numberofemployees"/>
<field column="AnnualRevenue" name="company_annualrevenue"/>
<field column="YearFounded" name="company_yearfounded"/>
</entity>
<entity name="Contact" pk="ContactID" query="SELECT * FROM tblContact">
<field column="ContactID" name="contact_contactid"/>
<field column="FirstName" name="contact_firstname"/>
<field column="MiddleInitial" name="contact_middleinitial"/>
<field column="LastName" name="contact_lastname"/>
<field column="Email" name="contact_email"/>
<field column="Description" name="contact_description"/>
</entity>
</document>
</dataConfig>
managed-schema:
<!-- Company Begin -->
<field name="company_companyid" type="string" indexed="true"/>
<field name="company_name" type="string" indexed="true"/>
<field name="company_website" type="string" indexed="true"/>
<field name="company_description" type="string" indexed="true"/>
<field name="company_numberofemployees" type="string" indexed="true"/>
<field name="company_annualrevenue" type="string" indexed="true"/>
<field name="company_yearfounded" type="string" indexed="true"/>
<!-- Company End -->
<!-- Contact Begin -->
<field name="contact_contactid" type="string" indexed="true" />
<field name="contact_firstname" type="string" indexed="true"/>
<field name="contact_middleinitial" type="string" indexed="true"/>
<field name="contact_lastname" type="string" indexed="true"/>
<field name="contact_email" type="string" indexed="true"/>
<!-- Contact End -->
UPDATE
I tried using the fl field to select company_companyid, but I did not get any results.
I am including a screen shot:
To get fields as needed from a document, use fl. For example, if you were using SolrJ, you would have something like query.set("fl", "fieldA, fieldB").
In a URL, it looks like this: http://host:port/solr/coreName/select?q=*%3A*&fl=fieldA,fieldB&wt=json&indent=true

DeltaImport not happening by default

I'm having issues with deltaquery where it's doesn't work automatically. Below is the data-config I have
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.microsoft.sqlserver.jdbc.SQLServerDriver"
url="jdbc:sqlserver://WTL-sql-1.com;databaseName=eng_metrics"
user="metrics"
password="metrics"/>
<document name="content">
<entity name="id"
query="select defect_id,headline,description,modify_date,issue_type,category,product,state FROM defects WHERE state not like 'Duplicate'"
deltaImportQuery="select defect_id,headline,description,modify_date,issue_type,category,product,state FROM defects WHERE defect_id = '${dataimporter.delta.defect_id}' and state not like 'Duplicate'"
deltaQuery="select defect_id FROM defects WHERE modify_date > '${dataimporter.last_index_time}'">
<field column="defect_id" name="defect_id" />
<field column="headline" name="headline" />
<field column="description" name="description" />
<field column="modify_date" name="modify_date" />
<field column="issue_type" name="issue_type" />
<field column="category" name="category" />
<field column="product" name="product" />
<field column="state" name="state" />
</entity>
</document>
</dataConfig>
But what I see that no matter the modify_date changes in the DB, I don't see any update happening unless I try doing a delta import explicitly.
Can someone provide me some thoughts on whether I need to change some config or some query to make that happen automatically?
Actually, DataImportHandler will not do it automatically. You have to trigger it by call delta import 's url.
You may want something like this:
http://wiki.apache.org/solr/DataImportHandler#Scheduling
or you can implement similar one by youself.
But I've this data-config which works fine in some cases
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.microsoft.sqlserver.jdbc.SQLServerDriver"
url="jdbc:sqlserver://127.0.0.1\SQLEXPRESS;databaseName=sustaining_trends"
user="sa"
password="metrics"/>
<document name="content">
<entity name="id"
query="select id,createtime,lastmodified,modifiedby,title,keywords,general,symptom,diagnosis,resolution FROM trends"
deltaImportQuery="select id,createtime,lastmodified,modifiedby,title,keywords,general,symptom,diagnosis,resolution FROM trends WHERE id = ${dataimporter.delta.id}"
deltaQuery="select id FROM trends WHERE lastmodified > '${dataimporter.last_index_time}' or createtime > '${dataimporter.last_index_time}'">
<field column="id" name="trendid" />
<field column="lastmodified" name="lastmodified" />
<field column="modifiedby" name="modifiedby" />
<field column="title" name="title" />
<field column="keywords" name="keywords" />
<field column="general" name="general" />
<field column="symptom" name="symptom" />
<field column="diagnosis" name="diagnosis" />
<field column="resolution" name="resolution" />
</entity>
</document>
</dataConfig>
Here if the item is modified immediately that gets updated without any interference but if a new data is created that doesn't get updated until either I do a manual delta import or else some entry gets modified.
How does this work automatically incase of modification and not work automatically for creation?

Content type not using custom form

I've created content type with following xml but it ain't using my custom forms specified in the xml below. What's wrong ?
<?xml version="1.0" encoding="utf-8"?>
<Elements xmlns="http://schemas.microsoft.com/sharepoint/">
<Field ID="{854DCDF4-5091-4B1E-AA31-D9DC71A29637}"
Type="Text"
Required="TRUE"
DisplayName="Customer ID"
Name="Customer-ID"
Indexed="TRUE"
EnforceUniqueValues="FALSE" />
<Field ID="{7E898932-91C0-4285-A8A2-B6440BE2FDC9}"
Type="Text"
Required="FALSE"
DisplayName="Customer Name"
Name="Customer-Name" />
<Field ID="{5168AB24-21A8-438A-8112-E82E24E180CE}"
Type="Text"
Required="FALSE"
DisplayName="Country"
Name="Country" />
<!-- Parent ContentType: Item (0x01) -->
<ContentType ID="0x010081e20b9903d945a8beacde43ae09f766"
Name="Customer"
Group="Custom Content Types"
Description="Customer Content Type"
Inherits="TRUE"
Version="0">
<FieldRefs>
<FieldRef ID="{854DCDF4-5091-4B1E-AA31-D9DC71A29637}" />
<FieldRef ID="{7E898932-91C0-4285-A8A2-B6440BE2FDC9}" />
<FieldRef ID="{5168AB24-21A8-438A-8112-E82E24E180CE}" />
</FieldRefs>
<XmlDocuments>
<XmlDocument NamespaceURI="http://schemas.microsoft.com/sharepoint/v3/contenttype/forms/url">
<FormUrls xmlns="http://schemas.microsoft.com/sharepoint/v3/contenttype/forms/url">
<New>_layouts/customer/customer.aspx</New>
<Display>_layouts/customer/customerdisp.aspx</Display>
<Edit>_layouts/customer/customeredit.aspx</Edit>
</FormUrls>
</XmlDocument>
</XmlDocuments>
</ContentType>
</Elements>
Try Inherts="FALSE" as an attribute on the element