Text replace with Image in Apache PDFBOX

Text replace with Image in Apache PDFBOX - pdfbox

Can anyone help me out how to replace text with Image by using Apache PDFBOX ?

import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
public class RubberImagePaste {
public void createPDFFromImage( String inputFile, String imagePath, String outputFile )
throws IOException
{
// the document
PDDocument doc = null;
try
{
doc = PDDocument.load( new File(inputFile) );
PDPage page = doc.getPage(0);
// createFromFile is the easiest way with an image file
// if you already have the image in a BufferedImage,
// call LosslessFactory.createFromImage() instead
PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);
PDPageContentStream contentStream = new PDPageContentStream(doc, page, true, true);
// contentStream.drawImage(ximage, 20, 20 );
// better method inspired by http://stackoverflow.com/a/22318681/535646
// reduce this value if the image is too large
float scale = 1f;
contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth()*scale, pdImage.getHeight()*scale);
contentStream.close();
doc.save( outputFile );
}
finally
{
if( doc != null )
{
doc.close();
}
}
}
/**
*
*
* #param args The command line arguments.
*
* #throws IOException If there is an error parsing the document.
*/
public static void main( String[] args ) throws IOException
{
String documentFile = "D:/ABC.pdf";
String stampFile = "D:/Sign.png";
String outFile = "D:ABCnew.pdf";
new File("target/test-output").mkdirs();
String[] args1 = new String[] { documentFile, outFile, stampFile };
RubberImagePaste rubberStamp = new RubberImagePaste();
rubberStamp.createPDFFromImage( documentFile, stampFile, outFile);
}
/**
* This will print the usage for this example.
*/
private void usage()
{
System.err.println( "Usage: java "+getClass().getName()+" <input-pdf> <output-pdf> <image-filename>" );
}
}
Special Thanks to Tilman

Related

Apache Lucene 8.4.1 How to get indexed fields and term list?

I'am new to Apache Lucene, I'm using Apache Lucene 8.4.1, I can do Lucene Indexing and Searching but don't know how to read and list index / print index using java.
How to get indexed fields and term list ? .
I was able to get Fileds list by using following function grabbed from Other Stackoverflow article.
public static String[] getFieldNames(IndexReader reader) {
List<String> fieldNames = new ArrayList<String>();
//For a simple reader over only one index, reader.leaves() should only return one LeafReaderContext
for (LeafReaderContext readerCtx : reader.leaves()) {
FieldInfos fields = readerCtx.reader().getFieldInfos();
for (FieldInfo field : fields) {
//Check whether the field is indexed and searchable, perhaps?
fieldNames.add(field.name);
}
}
return fieldNames.toArray(new String[fieldNames.size()]);
}
Thanks

package com.lucene.ram;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
/**
*
* #author W.P.Roshan
* #email sunone5 at gmail.com
*
* The RAMDirector is deprecated instead you can use
*
* import org.apache.lucene.index.memory.MemoryIndex;
*
*/
public class RAMDirectoryExample {
public RAMDirectoryExample() {
// TODO Auto-generated constructor stub
}
static void writeIndex(RAMDirectory ramDir, Analyzer analyzer) {
try {
// IndexWriter Configuration
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(OpenMode.CREATE);
// IndexWriter writes new index files to the directory
IndexWriter writer = new IndexWriter(ramDir, iwc);
// Create some docs with name and content
indexDoc(writer, "document-1", "hello world");
indexDoc(writer, "document-2", "hello happy world");
indexDoc(writer, "document-3", "hello happy world");
indexDoc(writer, "document-4", "hello hello world");
// don't forget to close the writer
writer.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
}
}
static void indexDoc(IndexWriter writer, String name, String content) throws IOException {
Document doc = new Document();
doc.add(new TextField("name", name, Store.YES));
doc.add(new TextField("content", content, Store.YES));
writer.addDocument(doc);
}
static void searchIndex(RAMDirectory ramDir, Analyzer analyzer) {
IndexReader reader = null;
try {
// Create Reader
reader = DirectoryReader.open(ramDir);
// Create index searcher
IndexSearcher searcher = new IndexSearcher(reader);
// Build query
QueryParser qp = new QueryParser("content", analyzer);
Query query = qp.parse("happy");
// Search the index
TopDocs foundDocs = searcher.search(query, 10);
// Total found documents
System.out.println("Total Results :: " + foundDocs.totalHits);
// Let's print found doc names and their content along with score
for (ScoreDoc sd : foundDocs.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println("Document Number : " + sd.doc + " :: Document Name : " + d.get("name")
+ " :: Content : " + d.get("content") + " :: Score : " + sd.score);
}
System.out.println("");
// don't forget to close the reader
reader.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
static void readIndex_Get_Documents(RAMDirectory ramDir) {
IndexReader reader = null;
try {
// Create Reader
reader = DirectoryReader.open(ramDir);
// Create index searcher
IndexSearcher searcher = new IndexSearcher(reader);
System.out.println("-----------------------Document List-----------------------");
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
Document d = reader.document(i);
/**
* There are three types of method to retrieve indexed document name list
*/
/**
* Method 1 for get document name list
*/
// System.out.println(""+d.getFields().iterator().next().stringValue());
/**
* Method 2 for get document name list
*/
// System.out.println(""+d.iterator().next().stringValue());
/**
* Method 3 for get document name list
*/
String[] vls = d.getValues("name");
for (int j = 0; j < vls.length; j++) {
System.out.println("" + vls[j].toString());
}
}
// don't forget to close the reader
reader.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
}
}
static void readIndex_Get_Terms(RAMDirectory ramDir) {
IndexReader reader = null;
try {
// Create Reader
reader = DirectoryReader.open(ramDir);
// Create index searcher
IndexSearcher searcher = new IndexSearcher(reader);
System.out.println("");
System.out.println("--------------------------Term List------------------------");
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
Document d = reader.document(i);
/**
* There are three types of methods to retrieve indexed term list
*/
/**
* Method 1 for retrieve terms list
*/
// System.out.println(""+d.get("content").toString());
/**
* Method 2 for retrieve terms list
*/
// System.out.println(""+d.getField("content").stringValue());
/**
* Method 3 for retrieve terms list
*/
String[] vl = searcher.doc(i).getValues("content");
for (int k = 0; k < vl.length; k++) {
System.out.println("" + vl[k].toString());
}
}
// don't forget to close the reader
reader.close();
} catch (IOException e) {
// Any error goes here
e.printStackTrace();
}
}
public static void main(String[] args) {
// Create RAMDirectory instance
RAMDirectory ramDir = new RAMDirectory();
// Builds an analyzer with the default stop words
Analyzer analyzer = new StandardAnalyzer();
// Write some docs to RAMDirectory
writeIndex(ramDir, analyzer);
// Search indexed docs in RAMDirectory
searchIndex(ramDir, analyzer);
// read Index get indexed document list
readIndex_Get_Documents(ramDir);
// read Index get indexed terms list
readIndex_Get_Terms(ramDir);
}
}
I found the way to list documents & terms list . complete example will be available on git-hub for anyone reference. https://github.com/sunone5/lucene-ramdirectory-index

How can I add source PDF content to destination PDF using iText 7 without losing the header and footer?

I am using iText 7. I have two PDF files. The source PDF has some content. The destination PDF has header and footer. I have a requirement to add the content from source PDF to destination PDF in the middle of the page without overlapping header and footer of the destination PDF. What should the code be?
Below is my code and attached document is the screenshot of the source PDF file which needs to be embedded in the final.pdf file:
import java.io.File;
import java.io.FileOutputStream;
import java.net.MalformedURLException;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import com.itextpdf.io.font.FontProgram;
import com.itextpdf.io.font.FontProgramFactory;
import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.events.Event;
import com.itextpdf.kernel.events.IEventHandler;
import com.itextpdf.kernel.events.PdfDocumentEvent;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.Style;
import com.itextpdf.layout.borders.Border;
import com.itextpdf.layout.borders.SolidBorder;
import com.itextpdf.layout.element.Cell;
import com.itextpdf.layout.element.Image;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.element.Table;
import com.itextpdf.layout.element.Text;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.layout.property.HorizontalAlignment;
import com.itextpdf.layout.property.TextAlignment;
import com.itextpdf.layout.property.UnitValue;
import com.itextpdf.layout.property.VerticalAlignment;
public class TestPdf {
public static void main(String[] args) {
String uuid = UUID.randomUUID().toString();
try {
#SuppressWarnings("resource")
PdfWriter writer = new PdfWriter(new FileOutputStream(new File(Paths.get("Output").toAbsolutePath()+"/final.pdf"))).setSmartMode(true);
PdfDocument pdfDoc = new PdfDocument(writer);
pdfDoc.setDefaultPageSize(PageSize.A4.rotate());
String fonts[] = {Paths.get("fonts").toAbsolutePath() + "/TREBUC.TTF", Paths.get("fonts").toAbsolutePath() + "/TREBUCBD.TTF", Paths.get("fonts").toAbsolutePath() + "/TREBUCBI.TTF",Paths.get("fonts").toAbsolutePath() + "/TREBUCIT.TTF"};
FontProvider fontProvider = new FontProvider();
Map<String, PdfFont> pdfFontMap = new HashMap();
for (String font : fonts) {
FontProgram fontProgram = FontProgramFactory.createFont(font);
if(font.endsWith("TREBUC.TTF")) {
pdfFontMap.put("NORMAL", PdfFontFactory.createFont(fontProgram, PdfEncodings.WINANSI, true));
} else if(font.endsWith("TREBUCBD.TTF")) {
pdfFontMap.put("BOLD", PdfFontFactory.createFont(fontProgram, PdfEncodings.WINANSI, true));
} else if(font.endsWith("TREBUCBI.TTF")) {
pdfFontMap.put("BOLD_ITALIC", PdfFontFactory.createFont(fontProgram, PdfEncodings.WINANSI, true));
} else if(font.endsWith("TREBUCIT.TTF")) {
pdfFontMap.put("ITALIC", PdfFontFactory.createFont(fontProgram, PdfEncodings.WINANSI, true));
}
fontProvider.addFont(fontProgram);
}
TestPdf testPdf = new TestPdf();
NormalPageHeader headerHandler = testPdf.new NormalPageHeader(Paths.get("images").toAbsolutePath() + "\\logo.png", pdfFontMap);
pdfDoc.addEventHandler(PdfDocumentEvent.START_PAGE, headerHandler);
PageEndEvent pageEndEvent = testPdf.new PageEndEvent(Paths.get("images").toAbsolutePath() + "\\FooterLineExternal.png" ,pdfFontMap);
pdfDoc.addEventHandler(PdfDocumentEvent.END_PAGE, pageEndEvent);
Document doc = new Document(pdfDoc);
doc.getPageEffectiveArea(PageSize.A4.rotate());
Table imageTable = new Table(1);
imageTable.setBorder(Border.NO_BORDER);
imageTable.setWidth(UnitValue.createPercentValue(100));
Cell cell = new Cell();
Paragraph paragraph = new Paragraph("Title");
paragraph.setVerticalAlignment(VerticalAlignment.TOP);
cell.add(paragraph);
cell.setBorder(Border.NO_BORDER);
cell.setPaddingTop(50);
imageTable.addCell(cell);
doc.add(imageTable);
doc.close();
System.out.println("Converted to PDF Succesfully >>> convertedSvg_"+uuid+".pdf");
}
catch(Exception e){
e.printStackTrace();
System.out.println("Error Occured while converting to PDF = " + e.getMessage());
}
}
class NormalPageHeader implements IEventHandler {
String header;
Map<String, PdfFont> font;
public NormalPageHeader(String header, Map<String, PdfFont> font) {
this.header = header;
this.font = font;
}
#Override
public void handleEvent(Event event) {
//Retrieve document and
PdfDocumentEvent docEvent = (PdfDocumentEvent) event;
PdfDocument pdf = docEvent.getDocument();
PdfPage page = docEvent.getPage();
Rectangle pageSize = page.getPageSize();
PdfCanvas pdfCanvas = new PdfCanvas(
page.getLastContentStream(), page.getResources(), pdf);
Canvas canvas = new Canvas(pdfCanvas, pdf, pageSize);
canvas.setFontSize(10f);
Table table = new Table(3);
table.setBorder(Border.NO_BORDER);
table.setWidth(UnitValue.createPercentValue(100));
Cell leftCell = new Cell();
leftCell.setFont(font.get("NORMAL"));
leftCell.setPaddingTop(15);
leftCell.setPaddingLeft(20);
leftCell.setBorder(Border.NO_BORDER);
leftCell.setBorderBottom(new SolidBorder(0.5f));
leftCell.setWidth(UnitValue.createPercentValue(33.3f));
Text userLabel = new Text("Username: ");
userLabel.setBold();
Paragraph paragraph = new Paragraph(userLabel);
Cell middleCell = new Cell();
middleCell.setFont(font.get("NORMAL"));
middleCell.setPaddingTop(15);
middleCell.setBorder(Border.NO_BORDER);
middleCell.setBorderBottom(new SolidBorder(0.5f));
middleCell.setWidth(UnitValue.createPercentValue(33.3f));
paragraph = new Paragraph("Main Header");
paragraph.setTextAlignment(TextAlignment.CENTER);
paragraph.setBold();
paragraph.setFontSize(12);
middleCell.add(paragraph);
String programString = "Sample header";
paragraph = new Paragraph(programString);
paragraph.setTextAlignment(TextAlignment.CENTER);
paragraph.setBold();
paragraph.setFontSize(10);
middleCell.add(paragraph);
table.addCell(middleCell);
Cell rightCell = new Cell();
rightCell.setFont(font.get("NORMAL"));
rightCell.setPaddingTop(20);
rightCell.setWidth(UnitValue.createPercentValue(33.3f));
rightCell.setHorizontalAlignment(HorizontalAlignment.RIGHT);
rightCell.setBorder(Border.NO_BORDER);
rightCell.setBorderBottom(new SolidBorder(0.5f));
rightCell.setPaddingRight(20);
//Write text at position
Image img;
try {
img = new Image(ImageDataFactory.create(header));
img.setHorizontalAlignment(HorizontalAlignment.RIGHT);
Style style = new Style();
style.setWidth(91);
style.setHeight(25);
img.addStyle(style);
rightCell.add(img);
table.addCell(rightCell);
table.setMarginLeft(15);
table.setMarginRight(15);
canvas.add(table);
}
catch (MalformedURLException e) {
e.printStackTrace();
}
}
}
class PageEndEvent implements IEventHandler {
protected PdfFormXObject placeholder;
protected float side = 20;
protected float x = 300;
protected float y = 10;
protected float space = 4.5f;
private String bar;
protected float descent = 3;
Map<String, PdfFont> font;
public PageEndEvent(String bar, Map<String, PdfFont> font) {
this.bar = bar;
this.font = font;
placeholder =new PdfFormXObject(new Rectangle(0, 0, side, side));
}
#Override
public void handleEvent(Event event) {
Table table = new Table(3);
table.setBorder(Border.NO_BORDER);
table.setWidth(UnitValue.createPercentValue(100));
Cell confCell = new Cell();
confCell.setFont(font.get("NORMAL"));
confCell.setPaddingTop(15);
confCell.setPaddingLeft(20);
confCell.setBorder(Border.NO_BORDER);
confCell.setBorderBottom(new SolidBorder(0.5f));
confCell.setWidth(UnitValue.createPercentValue(100));
PdfDocumentEvent docEvent = (PdfDocumentEvent) event;
PdfDocument pdf = docEvent.getDocument();
PdfPage page = docEvent.getPage();
Rectangle pageSize = page.getPageSize();
PdfCanvas pdfCanvas = new PdfCanvas(
page.getLastContentStream(), page.getResources(), pdf);
Canvas canvas = new Canvas(pdfCanvas, pdf, pageSize);
Image img;
try {
img = new Image(ImageDataFactory.create(bar));
img.setHorizontalAlignment(HorizontalAlignment.LEFT);
Style style = new Style();
style.setWidth(UnitValue.createPercentValue(100));
style.setHeight(50);
img.addStyle(style);
Paragraph p = new Paragraph().add("Test: Confidential");
p.setFont(font.get("NORMAL"));
p.setFontSize(8);
p.setFontColor(com.itextpdf.kernel.colors.ColorConstants.GRAY);
canvas.showTextAligned(p, x, y, TextAlignment.CENTER);
pdfCanvas.addXObject(placeholder, x + space, y - descent);
pdfCanvas.release();
}
catch (MalformedURLException e) {
e.printStackTrace();
}
}
public void writeTotal(PdfDocument pdf) {
Canvas canvas = new Canvas(placeholder, pdf);
canvas.showTextAligned(String.valueOf(pdf.getNumberOfPages()),
0, descent, TextAlignment.LEFT);
}
}
}

First off, some words on the iText architecture behind some constructs you use:
When you use a Document instance to add content to a document that iText shall layout automatically, the assumption is that the area where iText can layout stuff is the whole page minus the page margins.
Thus, if you add further page material via other channels than the Document, e.g. like you do in your NormalPageHeader headerHandler and your PageEndEvent pageEndEvent, it is your responsibility to do so outside the layout area explained above, i.e. in the margin areas. (Unless that additional material is background stuff, like a water sign...)
For this you should set the margins large enough to guarantee that your further material is in the margins. By default the page margins are set to 36pt on each side of the page which usually is enough for a single line header or footer but not really for multi-line ones.
In your code you create a header which requires at least some 52pt plus a bit to prevent the content iText will layout from touching the header line.
Keeping that in mind it is pretty straight forward to insert a given PdfPage sourcePage into your page:
...
NormalPageHeader headerHandler = testPdf.new NormalPageHeader(Paths.get("images").toAbsolutePath() + "\\logo.png", pdfFontMap);
pdfDoc.addEventHandler(PdfDocumentEvent.START_PAGE, headerHandler);
PageEndEvent pageEndEvent = testPdf.new PageEndEvent(Paths.get("images").toAbsolutePath() + "\\FooterLineExternal.png" ,pdfFontMap);
pdfDoc.addEventHandler(PdfDocumentEvent.END_PAGE, pageEndEvent);
Document doc = new Document(pdfDoc);
doc.setTopMargin(55);
PdfFormXObject xobject = sourcePage.copyAsFormXObject(pdfDoc);
Rectangle xobjectBoundaryBox = xobject.getBBox().toRectangle();
xobject.getPdfObject().put(PdfName.Matrix, new PdfArray(new float[] {1, 0, 0, 1, -xobjectBoundaryBox.getLeft(), -xobjectBoundaryBox.getBottom()}));
Image image = new Image(xobject);
image.setAutoScale(true);
doc.add(image);
doc.close();
...
(excerpt from InsertInSpace helper insertIntoNithinTestFile)
If you use the original source page as is, the above code will insert it including all margin space. If you don't want this but instead cut that space of, you can proceed as follows to determine the actual bounding box of the page content, reduce the page to that box, and forward it to the method insertIntoNithinTestFile above, assuming page 1 of PdfDocument pdfDocument shall be processed:
PdfDocumentContentParser contentParser = new PdfDocumentContentParser(pdfDocument);
MarginFinder strategy = contentParser.processContent(1, new MarginFinder());
PdfPage page = pdfDocument.getPage(1);
page.setCropBox(strategy.getBoundingBox());
page.setMediaBox(strategy.getBoundingBox());
insertIntoNithinTestFile(page, "test-InsertIntoNithinTestFile.pdf");
(InsertInSpace test testInsertSimpleTestPdf)
The MarginFinder is a port of the iText5 MarginFinder to iText 7:
public class MarginFinder implements IEventListener {
public Rectangle getBoundingBox() {
return boundingBox != null ? boundingBox.clone() : null;
}
#Override
public void eventOccurred(IEventData data, EventType type) {
if (data instanceof ImageRenderInfo) {
ImageRenderInfo imageData = (ImageRenderInfo) data;
Matrix ctm = imageData.getImageCtm();
for (Vector unitCorner : UNIT_SQUARE_CORNERS) {
Vector corner = unitCorner.cross(ctm);
addToBoundingBox(new Rectangle(corner.get(Vector.I1), corner.get(Vector.I2), 0, 0));
}
} else if (data instanceof TextRenderInfo) {
TextRenderInfo textRenderInfo = (TextRenderInfo) data;
addToBoundingBox(textRenderInfo.getAscentLine().getBoundingRectangle());
addToBoundingBox(textRenderInfo.getDescentLine().getBoundingRectangle());
} else if (data instanceof PathRenderInfo) {
PathRenderInfo renderInfo = (PathRenderInfo) data;
if (renderInfo.getOperation() != PathRenderInfo.NO_OP)
{
Matrix ctm = renderInfo.getCtm();
Path path = renderInfo.getPath();
for (Subpath subpath : path.getSubpaths())
{
for (Point point2d : subpath.getPiecewiseLinearApproximation())
{
Vector vector = new Vector((float)point2d.getX(), (float)point2d.getY(), 1);
vector = vector.cross(ctm);
addToBoundingBox(new Rectangle(vector.get(Vector.I1), vector.get(Vector.I2), 0, 0));
}
}
}
} else if (data != null) {
logger.fine(String.format("Ignored %s event, class %s.", type, data.getClass().getSimpleName()));
} else {
logger.fine(String.format("Ignored %s event with null data.", type));
}
}
#Override
public Set<EventType> getSupportedEvents() {
return null;
}
void addToBoundingBox(Rectangle rectangle) {
if (boundingBox == null)
boundingBox = rectangle.clone();
else
boundingBox = Rectangle.getCommonRectangle(boundingBox, rectangle);
}
Rectangle boundingBox = null;
Logger logger = Logger.getLogger(MarginFinder.class.getName());
static List<Vector> UNIT_SQUARE_CORNERS = Arrays.asList(new Vector(0,0,1), new Vector(1,0,1), new Vector(1,1,1), new Vector(0,1,1));
}
(MarginFinder.java)

How to compile and run java source code in memory [duplicate]

This question already has answers here:
Compile code fully in memory with javax.tools.JavaCompiler [duplicate]
(7 answers)
Closed 6 years ago.
I want to treat a String as a Java file then compile and run it. In other words, use Java as a script language.
To get better performance, we should avoid writing .class files to disk.

This answer is from one of my blogs, Compile and Run Java Source Code in Memory.
Here are the three source code files.
MemoryJavaCompiler.java
package me.soulmachine.compiler;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.tools.*;
/**
* Simple interface to Java compiler using JSR 199 Compiler API.
*/
public class MemoryJavaCompiler {
private javax.tools.JavaCompiler tool;
private StandardJavaFileManager stdManager;
public MemoryJavaCompiler() {
tool = ToolProvider.getSystemJavaCompiler();
if (tool == null) {
throw new RuntimeException("Could not get Java compiler. Please, ensure that JDK is used instead of JRE.");
}
stdManager = tool.getStandardFileManager(null, null, null);
}
/**
* Compile a single static method.
*/
public Method compileStaticMethod(final String methodName, final String className,
final String source)
throws ClassNotFoundException {
final Map<String, byte[]> classBytes = compile(className + ".java", source);
final MemoryClassLoader classLoader = new MemoryClassLoader(classBytes);
final Class clazz = classLoader.loadClass(className);
final Method[] methods = clazz.getDeclaredMethods();
for (final Method method : methods) {
if (method.getName().equals(methodName)) {
if (!method.isAccessible()) method.setAccessible(true);
return method;
}
}
throw new NoSuchMethodError(methodName);
}
public Map<String, byte[]> compile(String fileName, String source) {
return compile(fileName, source, new PrintWriter(System.err), null, null);
}
/**
* compile given String source and return bytecodes as a Map.
*
* #param fileName source fileName to be used for error messages etc.
* #param source Java source as String
* #param err error writer where diagnostic messages are written
* #param sourcePath location of additional .java source files
* #param classPath location of additional .class files
*/
private Map<String, byte[]> compile(String fileName, String source,
Writer err, String sourcePath, String classPath) {
// to collect errors, warnings etc.
DiagnosticCollector<JavaFileObject> diagnostics =
new DiagnosticCollector<JavaFileObject>();
// create a new memory JavaFileManager
MemoryJavaFileManager fileManager = new MemoryJavaFileManager(stdManager);
// prepare the compilation unit
List<JavaFileObject> compUnits = new ArrayList<JavaFileObject>(1);
compUnits.add(fileManager.makeStringSource(fileName, source));
return compile(compUnits, fileManager, err, sourcePath, classPath);
}
private Map<String, byte[]> compile(final List<JavaFileObject> compUnits,
final MemoryJavaFileManager fileManager,
Writer err, String sourcePath, String classPath) {
// to collect errors, warnings etc.
DiagnosticCollector<JavaFileObject> diagnostics =
new DiagnosticCollector<JavaFileObject>();
// javac options
List<String> options = new ArrayList<String>();
options.add("-Xlint:all");
// options.add("-g:none");
options.add("-deprecation");
if (sourcePath != null) {
options.add("-sourcepath");
options.add(sourcePath);
}
if (classPath != null) {
options.add("-classpath");
options.add(classPath);
}
// create a compilation task
javax.tools.JavaCompiler.CompilationTask task =
tool.getTask(err, fileManager, diagnostics,
options, null, compUnits);
if (task.call() == false) {
PrintWriter perr = new PrintWriter(err);
for (Diagnostic diagnostic : diagnostics.getDiagnostics()) {
perr.println(diagnostic);
}
perr.flush();
return null;
}
Map<String, byte[]> classBytes = fileManager.getClassBytes();
try {
fileManager.close();
} catch (IOException exp) {
}
return classBytes;
}
}
MemoryJavaFileManager.java
package me.soulmachine.compiler;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.CharBuffer;
import java.util.HashMap;
import java.util.Map;
import javax.tools.FileObject;
import javax.tools.ForwardingJavaFileManager;
import javax.tools.JavaFileManager;
import javax.tools.JavaFileObject;
import javax.tools.JavaFileObject.Kind;
import javax.tools.SimpleJavaFileObject;
/**
* JavaFileManager that keeps compiled .class bytes in memory.
*/
#SuppressWarnings("unchecked")
final class MemoryJavaFileManager extends ForwardingJavaFileManager {
/** Java source file extension. */
private final static String EXT = ".java";
private Map<String, byte[]> classBytes;
public MemoryJavaFileManager(JavaFileManager fileManager) {
super(fileManager);
classBytes = new HashMap<>();
}
public Map<String, byte[]> getClassBytes() {
return classBytes;
}
public void close() throws IOException {
classBytes = null;
}
public void flush() throws IOException {
}
/**
* A file object used to represent Java source coming from a string.
*/
private static class StringInputBuffer extends SimpleJavaFileObject {
final String code;
StringInputBuffer(String fileName, String code) {
super(toURI(fileName), Kind.SOURCE);
this.code = code;
}
public CharBuffer getCharContent(boolean ignoreEncodingErrors) {
return CharBuffer.wrap(code);
}
}
/**
* A file object that stores Java bytecode into the classBytes map.
*/
private class ClassOutputBuffer extends SimpleJavaFileObject {
private String name;
ClassOutputBuffer(String name) {
super(toURI(name), Kind.CLASS);
this.name = name;
}
public OutputStream openOutputStream() {
return new FilterOutputStream(new ByteArrayOutputStream()) {
public void close() throws IOException {
out.close();
ByteArrayOutputStream bos = (ByteArrayOutputStream)out;
classBytes.put(name, bos.toByteArray());
}
};
}
}
public JavaFileObject getJavaFileForOutput(JavaFileManager.Location location,
String className,
Kind kind,
FileObject sibling) throws IOException {
if (kind == Kind.CLASS) {
return new ClassOutputBuffer(className);
} else {
return super.getJavaFileForOutput(location, className, kind, sibling);
}
}
static JavaFileObject makeStringSource(String fileName, String code) {
return new StringInputBuffer(fileName, code);
}
static URI toURI(String name) {
File file = new File(name);
if (file.exists()) {
return file.toURI();
} else {
try {
final StringBuilder newUri = new StringBuilder();
newUri.append("mfm:///");
newUri.append(name.replace('.', '/'));
if(name.endsWith(EXT)) newUri.replace(newUri.length() - EXT.length(), newUri.length(), EXT);
return URI.create(newUri.toString());
} catch (Exception exp) {
return URI.create("mfm:///com/sun/script/java/java_source");
}
}
}
}
MemoryClassLoader.java
package me.soulmachine.compiler;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
/**
* ClassLoader that loads .class bytes from memory.
*/
final class MemoryClassLoader extends URLClassLoader {
private Map<String, byte[]> classBytes;
public MemoryClassLoader(Map<String, byte[]> classBytes,
String classPath, ClassLoader parent) {
super(toURLs(classPath), parent);
this.classBytes = classBytes;
}
public MemoryClassLoader(Map<String, byte[]> classBytes, String classPath) {
this(classBytes, classPath, ClassLoader.getSystemClassLoader());
}
public MemoryClassLoader(Map<String, byte[]> classBytes) {
this(classBytes, null, ClassLoader.getSystemClassLoader());
}
public Class load(String className) throws ClassNotFoundException {
return loadClass(className);
}
public Iterable<Class> loadAll() throws ClassNotFoundException {
List<Class> classes = new ArrayList<Class>(classBytes.size());
for (String name : classBytes.keySet()) {
classes.add(loadClass(name));
}
return classes;
}
protected Class findClass(String className) throws ClassNotFoundException {
byte[] buf = classBytes.get(className);
if (buf != null) {
// clear the bytes in map -- we don't need it anymore
classBytes.put(className, null);
return defineClass(className, buf, 0, buf.length);
} else {
return super.findClass(className);
}
}
private static URL[] toURLs(String classPath) {
if (classPath == null) {
return new URL[0];
}
List<URL> list = new ArrayList<URL>();
StringTokenizer st = new StringTokenizer(classPath, File.pathSeparator);
while (st.hasMoreTokens()) {
String token = st.nextToken();
File file = new File(token);
if (file.exists()) {
try {
list.add(file.toURI().toURL());
} catch (MalformedURLException mue) {}
} else {
try {
list.add(new URL(token));
} catch (MalformedURLException mue) {}
}
}
URL[] res = new URL[list.size()];
list.toArray(res);
return res;
}
}
Explanations:
In order to represent a Java source file in memory instead of disk, I defined a StringInputBuffer class in the MemoryJavaFileManager.java.
To save the compiled .class files in memory, I implemented a class MemoryJavaFileManager. The main idea is to override the function getJavaFileForOutput() to store bytecodes into a map.
To load the bytecodes in memory, I have to implement a customized classloader MemoryClassLoader, which reads bytecodes in the map and turn them into classes.
Here is a unite test.
package me.soulmachine.compiler;
import org.junit.Test;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import static org.junit.Assert.assertEquals;
public class MemoryJavaCompilerTest {
private final static MemoryJavaCompiler compiler = new MemoryJavaCompiler();
#Test public void compileStaticMethodTest()
throws ClassNotFoundException, InvocationTargetException, IllegalAccessException {
final String source = "public final class Solution {\n"
+ "public static String greeting(String name) {\n"
+ "\treturn \"Hello \" + name;\n" + "}\n}\n";
final Method greeting = compiler.compileStaticMethod("greeting", "Solution", source);
final Object result = greeting.invoke(null, "soulmachine");
assertEquals("Hello soulmachine", result.toString());
}
}
Reference
JavaCompiler.java from Cloudera Morphlines
How to create an object from a string in Java (how to eval a string)?
InMemoryJavaCompiler
Java-Runtime-Compiler
动态的Java - 无废话JavaCompilerAPI中文指南

pdfbox sign pdf PKCS12 invalid signature bouncycastle

Already tested with various versions of PDFBox keeps giving the same error, generates a signature invalid in pdf reader adobe and does not show the certificate , I am using Java Jdk jdk1.8.0_40 , PDFBox -app- 2.0.0-20150529.000527-1365.jar the code is as follows (mycert22.p12 is sha256withrsa 2048bit key):
(pdf shows : signature invalid , there are mistakes in formatting or the information contained in this signature to didentidade the subscriber was not verified )
and not to see the certificate details what 'll be happening?
JAVA:
package ca_c4_sign_v2;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.security.KeyStore;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.PrivateKey;
import java.security.UnrecoverableKeyException;
import java.security.cert.CertStore;
import java.security.cert.Certificate;
import java.security.cert.CertificateException;
import java.security.cert.CollectionCertStoreParameters;
import java.security.cert.X509Certificate;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.List;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.SignatureException;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureOptions;
import org.bouncycastle.cms.CMSException;
import org.bouncycastle.cms.CMSProcessable;
import org.bouncycastle.cms.CMSSignedData;
import org.bouncycastle.cms.CMSSignedDataGenerator;
import org.bouncycastle.cms.CMSSignedGenerator;
import org.bouncycastle.jce.provider.BouncyCastleProvider;
public class CA_c4_sign_v2 implements SignatureInterface{
private static BouncyCastleProvider provider = new BouncyCastleProvider();
private PrivateKey privKey;
private Certificate[] cert;
private SignatureOptions options;
/**
* Initialize the signature creator with a keystore (pkcs12) and pin that
* should be used for the signature.
*
* #param keystore
* is a pkcs12 keystore.
* #param pin
* is the pin for the keystore / private key
*/
public CA_c4_sign_v2(KeyStore keystore, char[] pin)
{
try
{
/*
* grabs the first alias from the keystore and get the private key. An
* alternative method or constructor could be used for setting a specific
* alias that should be used.
*/
Enumeration<String> aliases = keystore.aliases();
String alias = null;
if (aliases.hasMoreElements())
{
alias = aliases.nextElement();
}
else
{
throw new RuntimeException("Could not find alias");
}
privKey = (PrivateKey) keystore.getKey(alias, pin);
cert = keystore.getCertificateChain(alias);
System.out.println("alias: " + alias + "\n" + "cert: " + cert + "\n");
}
catch (KeyStoreException e)
{
e.printStackTrace();
}
catch (UnrecoverableKeyException e)
{
System.err.println("Could not extract private key.");
e.printStackTrace();
}
catch (NoSuchAlgorithmException e)
{
System.err.println("Unknown algorithm.");
e.printStackTrace();
}
}
/**
* Signs the given pdf file.
*
* #param document is the pdf document
* #return the signed pdf document
* #throws IOException
* #throws COSVisitorException
* #throws SignatureException
*/
public File signPDF(File document) throws IOException, COSVisitorException,
SignatureException
{
byte[] buffer = new byte[8 * 1024];
if (document == null || !document.exists())
{
new RuntimeException("Documento nao existe!");
}
// creating output document and prepare the IO streams.
String name = document.getName();
String substring = name.substring(0, name.lastIndexOf("."));
File outputDocument = new File(document.getParent(), substring+"_signed.pdf");
FileInputStream fis = new FileInputStream(document);
FileOutputStream fos = new FileOutputStream(outputDocument);
int c;
while ((c = fis.read(buffer)) != -1)
{
fos.write(buffer, 0, c);
}
fis.close();
fis = new FileInputStream(outputDocument);
File scratchFile = File.createTempFile("pdfbox_scratch", ".bin");
RandomAccessFile randomAccessFile = new RandomAccessFile(scratchFile, "rw");
try
{
// load document
PDDocument doc = PDDocument.load(document, randomAccessFile);
// create signature dictionary
PDSignature signature = new PDSignature();
signature.setFilter(PDSignature.FILTER_ADOBE_PPKLITE); // default filter
// subfilter for basic and PAdES Part 2 signatures
signature.setSubFilter(PDSignature.SUBFILTER_ADBE_X509_RSA_SHA1);
signature.setName("name");
signature.setLocation("loc");
signature.setReason("reason");
// the signing date, needed for valid signature
signature.setSignDate(Calendar.getInstance());
// register signature dictionary and sign interface
if (options==null)
{
doc.addSignature(signature, this);
}
else
{
doc.addSignature(signature, this, options);
}
// write incremental (only for signing purpose)
doc.saveIncremental(fis, fos);
}
finally
{
if (randomAccessFile!= null)
{
randomAccessFile.close();
}
if (scratchFile != null && scratchFile.exists() && !scratchFile.delete())
{
scratchFile.deleteOnExit();
}
}
return outputDocument;
}
/**
* <p>
* SignatureInterface implementation.
* </p>
*
* <p>
* This method will be called from inside of the pdfbox and create the pkcs7
* signature. The given InputStream contains the bytes that are providen by
* the byte range.
* </p>
*
* <p>
* This method is for internal use only.
* </p>
*
* <p>
* Here the user should use his favorite cryptographic library and implement a
* pkcs7 signature creation.
* </p>
*/
public byte[] sign(InputStream content) throws SignatureException,
IOException
{
CMSProcessableInputStream input = new CMSProcessableInputStream(content);
CMSSignedDataGenerator gen = new CMSSignedDataGenerator();
// CertificateChain
List<Certificate> certList = Arrays.asList(cert);
CertStore certStore = null;
try
{
certStore = CertStore.getInstance("Collection",
new CollectionCertStoreParameters(certList), provider);
gen.addSigner(privKey, (X509Certificate) certList.get(0),
CMSSignedGenerator.DIGEST_SHA256);
gen.addCertificatesAndCRLs(certStore);
CMSSignedData signedData = gen.generate(input, false, provider);
return signedData.getEncoded();
}
catch (Exception e)
{
// should be handled
System.err.println("Erro ao criar a assinatura.");
e.printStackTrace();
}
throw new RuntimeException("Problem while preparing signature");
}
public static void main(String[] args) throws KeyStoreException,
NoSuchAlgorithmException, CertificateException, FileNotFoundException,
IOException, COSVisitorException, SignatureException
{
File ksFile = new File("mycert22.p12");
KeyStore keystore = KeyStore.getInstance("PKCS12", provider);
char[] pin = "mycert22".toCharArray();
keystore.load(new FileInputStream(ksFile), pin);
File document = new File("C4_p.pdf");
CA_c4_sign_v2 signing = new CA_c4_sign_v2(keystore, pin.clone());
signing.signPDF(document);
}
}
/**
* Wrap a InputStream into a CMSProcessable object for bouncy castle. It's an
* alternative to the CMSProcessableByteArray.
*
* #author Thomas Chojecki
*
*/
class CMSProcessableInputStream implements CMSProcessable
{
InputStream in;
public CMSProcessableInputStream(InputStream is)
{
in = is;
}
public Object getContent()
{
return in;
}
public void write(OutputStream out) throws IOException, CMSException
{
// read the content only one time
byte[] buffer = new byte[8 * 1024];
int read;
while ((read = in.read(buffer)) != -1)
{
out.write(buffer, 0, read);
}
in.close();
}
}

Lucene ,highlighting and NullPointerException

I am trying to highlight some results . I index the body (the text) of my documents in the field "contents" and when I try to highilight using highlighter.getBestFragment(...) I get a NullPointerException .
But when,for exemple I try to highlight the fileName it works properly.
I know since I use only one field with the fileReader or (ParsingReader) my text is tokenized which is different from a file name .
Here's my code ,please help me .
package xxxxxx;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.parser.ParsingReader;
public class Indexer {
static long start = 0;
public static void main(String[] args) throws Exception {
System.out.println("l'index se trouve Ã  " + args[0]);
System.out.println("le dossier ou s'effectue l'indexation est :" + args[1]);
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Indexer.class.getName()
+ " <index dir> <data dir>");
}
String indexDir = args[0];
String dataDir = args[1];
start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws IOException, InterruptedException {
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), true,
IndexWriter.MaxFieldLength.UNLIMITED);
writer.setUseCompoundFile(true);
}
public void close() throws IOException {
writer.optimize();
writer.close();
}
public int index(String dataDir, FileFilter filter) throws Exception {
File[] files = new File(dataDir).listFiles();
for (File f : files) {
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) {
if (!(f.getCanonicalPath().endsWith("~"))) {
indexFile(f);
}
} else {
index(f.toString(), filter);
}
}
return writer.numDocs();
}
private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return true;
}
}
protected Document getDocument(File f) throws Exception {
// FileReader frf = new FileReader(f);
Document doc = new Document();
Reader reader = new ParsingReader(f);
doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.ANALYZED ));
doc.add(new Field("fullpath", f.getCanonicalPath(),Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
return doc;
}
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
System.out.println(System.currentTimeMillis() - start);
}
}
-------------------------------------------------------------------
package xxxxxxxxxxxxxxxxxxxx;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Searcher {
public static void main(String[] args) throws IllegalArgumentException,
IOException, ParseException, InvalidTokenOffsetsException {
System.out.println("endroit ou se situe l'index " + args[0]);
System.out.println(args[1]);
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java "
+ Searcher.class.getName()
+ " <index dir> <query>");
}
String indexDir = args[0];
String q = args[1];
search(indexDir, q);
}
public static void search(String indexDir, String q) throws IOException, ParseException, InvalidTokenOffsetsException {
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher indexSearcher = new IndexSearcher(dir);
QueryParser parserC = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer(Version.LUCENE_30));
// QueryParser parserN = new QueryParser(Version.LUCENE_30, "filename", new StandardAnalyzer(Version.LUCENE_30));
QueryParser parserP = new QueryParser(Version.LUCENE_30, "fullpath", new StandardAnalyzer(Version.LUCENE_30));
parserC.setDefaultOperator(QueryParser.Operator.OR);
// parserN.setDefaultOperator(QueryParser.Operator.OR);
parserC.setPhraseSlop(10);
// parserN.setPhraseSlop(10);
DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(6);
Query query = new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"contents", "filename"},
new CustomAnalyzer()).parse(q);
Query queryC = parserC.parse(q);
//Query queryN = parserN.parse(q);
dmq.add(queryC);
//dmq.add(queryN);
// dmq.add(query) ;
QueryScorer scorer = new QueryScorer(dmq, "contents");
Highlighter highlighter = new Highlighter(scorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
System.out.println(query.toString());
long start = System.currentTimeMillis();
TopDocs hits = indexSearcher.search(dmq, 15);
System.out.println(hits.totalHits);
long end = System.currentTimeMillis();
System.err.println("Found " + hits.totalHits
+ " document(s) (in " + (end - start)
+ " milliseconds) that matched query '"
+ q + "':");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = indexSearcher.doc(scoreDoc.doc);
System.out.print(scoreDoc.score);
System.out.println(doc.get("fullpath"));
String contents = doc.get("contents"); // I am pretty sure the mistake is here , contents is always Null
//But what can I do to make this thing work ?
TokenStream stream =
TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(),
scoreDoc.doc,
"contents",
doc,
new StandardAnalyzer(Version.LUCENE_30));
String fragment =
highlighter.getBestFragment(stream, contents);
System.out.println(fragment);
}
indexSearcher.close();
}
}
----------------------------------------------------------------------

You need it to be stored if you want to use that highlighter. "filename" is stored but "contents" isn't, which is why you see them behaving differently:
doc.add(new Field("contents", reader, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.ANALYZED ));

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Text replace with Image in Apache PDFBOX - pdfbox

Can anyone help me out how to replace text with Image by using Apache PDFBOX ?

Related

Apache Lucene 8.4.1 How to get indexed fields and term list?

How can I add source PDF content to destination PDF using iText 7 without losing the header and footer?

How to compile and run java source code in memory [duplicate]

pdfbox sign pdf PKCS12 invalid signature bouncycastle

Lucene ,highlighting and NullPointerException

Categories

Resources