How to remove one indirectly referenced image from a PDF and keep all others? - pdf

I would like to parse a PDF and find the logo via known attributes and when I find a match, remove that image and then copy everything else.
I am using the code below to replace an image with a blank white image to remove a logo from PDFs that are to be printed on letterhead. It replaces the image with a white image of the same size. Is there a way to modify this to actually remove the image (and thus save some space, etc.?).
private static void Main(string[] args)
{
ManipulatePdf(#"C:\in.pdf", #"C:\out.pdf");
Console.WriteLine("Finished - press a key");
Console.ReadKey();
}
public static void ManipulatePdf(String src, String dest)
{
Console.WriteLine("Start");
PdfReader reader = new PdfReader(src);
// first read all references and find the one we wish to work on.
PdfDictionary page = reader.GetPageN(1); // all resources are available to every page (?)
PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES);
PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT);
page = reader.GetPageN(1);
resources = page.GetAsDict(PdfName.RESOURCES);
xobjects = resources.GetAsDict(PdfName.XOBJECT);
foreach (PdfName pdfName in xobjects.Keys)
{
PRStream stream = (PRStream) xobjects.GetAsStream(pdfName);
if (stream.Length > 100000)
{
PdfImage image = new PdfImage(MakeBlankImg(), "", null);
Console.WriteLine("Calling replace stream");
ReplaceStream(stream, image);
}
}
PdfStamper stamper = new PdfStamper(reader, new FileStream(dest, FileMode.Create));
stamper.Close();
reader.Close();
}
public static iTextSharp.text.Image MakeBlankImg()
{
Console.WriteLine("Making small blank image");
byte[] array;
using (MemoryStream ms = new MemoryStream())
{
//var drawingImage = image.GetDrawingImage();
using (Bitmap newBi = new Bitmap(1, 1))
{
using (Graphics g = Graphics.FromImage(newBi))
{
g.Clear(Color.White);
g.Flush();
}
newBi.Save(ms, ImageFormat.Jpeg);
}
array = ms.ToArray();
}
Console.WriteLine("Image array is " + array.Length + " bytes.");
return iTextSharp.text.Image.GetInstance(array);
}
public static void ReplaceStream(PRStream orig, PdfStream stream)
{
orig.Clear();
MemoryStream ms = new MemoryStream();
stream.WriteContent(ms);
orig.SetData(ms.ToArray(), false);
Console.WriteLine("Iterating keys");
foreach (KeyValuePair<PdfName, PdfObject> keyValuePair in stream)
{
Console.WriteLine("Key: " + keyValuePair.Key.ToString());
orig.Put(keyValuePair.Key, stream.Get(keyValuePair.Key));
}
}
}

Related

Fill XFA without breaking usage rights

I have an XFA form that I can successfully fill in by extracting the XML modifying and writing back. Works great if you have the full Adobe Acrobat, but fails with Adobe Reader. I have seen various questions on the same thing with answers but they were some time ago so updating an XFA that is readable by Adobe Reader may no longer be doable?
I use this code below and I've utilised the StampingProperties of append as in the iText example but still failing. I'm using iText 7.1.15.
//open file and write to temp one
PdfDocument pdf = new(new PdfReader(FileToProcess), new PdfWriter(NewPDF), new StampingProperties().UseAppendMode());
PdfAcroForm form = PdfAcroForm.GetAcroForm(pdf, true);
XfaForm xfa = form.GetXfaForm();
XElement node = xfa.GetDatasetsNode();
IEnumerable<XNode> list = node.Nodes();
foreach (XNode item in list)
{
if (item is XElement element && "data".Equals(element.Name.LocalName))
{
node = element;
break;
}
}
XmlWriterSettings settings = new() { Indent = true };
using XmlWriter writer = XmlWriter.Create(XMLOutput, settings);
{
node.WriteTo(writer);
writer.Flush();
writer.Close();
}
//We now how to strip an extra xfa line if updating
if(update)
{
string TempXML= CSTrackerHelper.MakePath($"{AppContext.BaseDirectory}Temp", $"{Guid.NewGuid()}.XML");
StreamReader fsin = new(XMLOutput);
StreamWriter fsout = new(TempXML);
string linedata = string.Empty;
int cnt = 0;
while (!fsin.EndOfStream)
{
if (cnt != 3 && linedata != string.Empty)
{
fsout.WriteLine(linedata);
}
linedata = fsin.ReadLine();
cnt++;
}
fsout.Close();
fsin.Close();
XMLOutput = TempXML;
}
xlogger.Info("Populating pdf fields");
//Now loop through our field data and update the XML
XmlDocument xmldoc = new();
xmldoc.Load(XMLOutput);
XmlNamespaceManager xmlnsManager = new(xmldoc.NameTable);
xmlnsManager.AddNamespace("xfa", #"http://www.xfa.org/schema/xfa-data/1.0/");
string[] FieldValues;
string[] MultiNodes;
foreach (KeyValuePair<string, DocumentFieldData> v in DocumentData.FieldData)
{
if (!string.IsNullOrEmpty(v.Value.Field))
{
FieldValues = v.Value.Field.Contains(";") ? v.Value.Field.Split(';') : (new string[] { v.Value.Field });
foreach (string FValue in FieldValues)
{
XmlNodeList aNodes;
if (FValue.Contains("{"))
{
aNodes = xmldoc.SelectNodes(FValue.Substring(0, FValue.LastIndexOf("{")), xmlnsManager);
if (aNodes.Count > 1)
{
//We have a multinode
MultiNodes = FValue.Split('{');
int NodeIndex = int.Parse(MultiNodes[1].Replace("}", ""));
aNodes[NodeIndex].InnerText = v.Value.Data;
}
}
else
{
aNodes = xmldoc.SelectNodes(FValue, xmlnsManager);
if (aNodes.Count >= 1)
{
aNodes[0].InnerText = v.Value.Data;
}
}
}
}
}
xmldoc.Save(XMLOutput);
//Now we've updated the XML apply it to the pdf
xfa.FillXfaForm(new FileStream(XMLOutput, FileMode.Open, FileAccess.Read));
xfa.Write(pdf);
pdf.Close();
FYI I've also tried to set a field directly also with the same results.
PdfReader preader = new PdfReader(source);
PdfDocument pdfDoc=new PdfDocument(preader, new PdfWriter(dest), new StampingProperties().UseAppendMode());
PdfAcroForm pdfForm = PdfAcroForm.GetAcroForm(pdfDoc, true);
XfaForm xform = pdfForm.GetXfaForm();
xform.SetXfaFieldValue("VRM[0].CoverPage[0].Wrap2[0].Table[0].CSID[0]", "Test");
xform.Write(pdfForm);
pdfDoc.Close();
If anyone has any ideas it would be appreciated.
Cheers
I ran into a very similar issue. I was attempting to auto fill an XFA that was password protected while not breaking the certificate or usage rights (it allowed filling). iText7 seems to have made this not possible for legal/practical reasons, however it is still very much possible with iText5. I wrote the following working codeusing iTextSharp (C# version if iText5):
using iTextSharp.text;
using iTextSharp.text.pdf;
string pathToRead = "/Users/home/Desktop/c#pdfParser/encrypted_empty.pdf";
string pathToSave = "/Users/home/Desktop/c#pdfParser/xfa_encrypted_filled.pdf";
string data = "/Users/home/Desktop/c#pdfParser/sample_data.xml";
FillByItextSharp5(pathToRead, pathToSave, data);
static void FillByItextSharp5(string pathToRead, string pathToSave, string data)
{
using (FileStream pdf = new FileStream(pathToRead, FileMode.Open))
using (FileStream xml = new FileStream(data, FileMode.Open))
using (FileStream filledPdf = new FileStream(pathToSave, FileMode.Create))
{
PdfReader.unethicalreading = true;
PdfReader pdfReader = new PdfReader(pdf);
PdfStamper stamper = new PdfStamper(pdfReader, filledPdf, '\0', true);
stamper.AcroFields.Xfa.FillXfaForm(xml, true);
stamper.Close();
pdfReader.Close();
}
}
PdfStamper stamper = new PdfStamper(pdfReader, filledPdf, '\0', true)
you have to use this line.

Extract images of signatures contained in a pdf file with iText7

I am wondering how we can use ITEXT7 to extract image info associated to digital signatures. I know there have been similar questions asked in the past, but they were mostly around ITEXT5, which is quite different from the ITEXT7 after all the updates and modifications to the software.
You can extract the image from a signature appearance using low-level API.
Complete Java code:
private void saveImageFromSignature(PdfDocument document, String fieldName) throws IOException {
PdfAcroForm acroForm = PdfAcroForm.getAcroForm(document, false);
PdfDictionary xObject = acroForm.getField(name)
.getWidgets()
.get(0)
.getNormalAppearanceObject()
.getAsDictionary(PdfName.Resources)
.getAsDictionary(PdfName.XObject)
.getAsStream(new PdfName("FRM"))
.getAsDictionary(PdfName.Resources)
.getAsDictionary(PdfName.XObject);
PdfStream stream = xObject.getAsStream(new PdfName("Im1"));
PdfImageXObject image = new PdfImageXObject(stream);
BufferedImage result = createImageFromBytes(image.getImageBytes());
//pdf allows using masked image in the signature appearance
PdfStream maskStream = (PdfStream) stream.getAsStream(PdfName.SMask);
if (maskStream != null) {
PdfImageXObject maskImage = new PdfImageXObject(maskStream);
BufferedImage maskBimage = createImageFromBytes(maskImage.getImageBytes());
String fileMask = String.format(getOutputFolder() + "/file_mask_%d.%s",
image.getPdfObject().getIndirectReference().getObjNumber(),
image.identifyImageFileExtension());
ImageIO.write(maskBimage,
image.identifyImageFileExtension(),
new File(fileMask));
//the mask defines an alfa channel
Image transpImg = transformToTransperency(maskBimage);
result = applyTransperency(result, transpImg);
}
String filenameComp = String.format(getOutputFolder() + "/file_comp_%d.%s",
image.getPdfObject().getIndirectReference().getObjNumber(),
image.identifyImageFileExtension());
ImageIO.write(result,
image.identifyImageFileExtension(),
new File(filenameComp));
document.close();
}
private Image transformToTransperency(BufferedImage bi) {
ImageFilter filter = new RGBImageFilter() {
#Override
public int filterRGB(int x, int y, int rgb) {
return (rgb << 8) & 0xFF000000;
}
};
ImageProducer ip = new FilteredImageSource(bi.getSource(), filter);
return Toolkit.getDefaultToolkit().createImage(ip);
}
private BufferedImage applyTransperency(BufferedImage bi, Image mask) {
BufferedImage dest = new BufferedImage(
bi.getWidth(), bi.getHeight(),
BufferedImage.TYPE_INT_ARGB);
Graphics2D g2 = dest.createGraphics();
g2.drawImage(bi, 0, 0, null);
AlphaComposite ac = AlphaComposite.getInstance(AlphaComposite.DST_IN, 1.0F);
g2.setComposite(ac);
g2.drawImage(mask, 0, 0, null);
g2.dispose();
return dest;
}
Upd: This works for a very limited number of cases. Thanks for #mkl.
First of all, thank you for the proposals which personally guided me.
After several tries, here is the code that worked for me:
public void extract(String inputFilename, String fieldName) throws IOException {
try (PdfDocument document = new PdfDocument(new PdfReader(inputFilename))){
PdfAcroForm acroForm = PdfAcroForm.getAcroForm(document, false);
final PdfFormField signatorySignature1 = acroForm.getField(fieldName);
final PdfDictionary appearanceDic = signatorySignature1.getPdfObject().getAsDictionary(PdfName.AP);
final PdfStream normalAppearance = appearanceDic.getAsStream(PdfName.N);
final PdfDictionary ressourceDic = normalAppearance.getAsDictionary(PdfName.Resources);
PdfResources resources = new PdfResources(ressourceDic);
final ImageRenderInfo imageRenderInfo = extractImageRenderInfo(normalAppearance.getBytes(), resources);
Files.write(
Path.of(inputFilename + "_" + fieldName + "_" + System.currentTimeMillis() + ".png"),
imageRenderInfo.getImage().getImageBytes());
} catch (Exception e) {
e.printStackTrace();
}
}
public ImageRenderInfo extractImageRenderInfo(byte[] contentBytes, PdfResources pdfResource) {
MyLocationExtractionStrategy strategy = new MyLocationExtractionStrategy();
PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy, new HashMap<>());
parser.processContent(contentBytes, pdfResource);
return strategy.getImageRenderInfo();
}
class MyLocationExtractionStrategy implements ILocationExtractionStrategy {
private ImageRenderInfo imageRenderInfo;
#Override public Collection<IPdfTextLocation> getResultantLocations() {
return null;
}
#Override public void eventOccurred(IEventData iEventData, EventType eventType) {
if (eventType.equals(EventType.RENDER_IMAGE)) {
imageRenderInfo = (ImageRenderInfo)iEventData;
}
}
#Override public Set<EventType> getSupportedEvents() {
return null;
}
public ImageRenderInfo getImageRenderInfo() {
return this.imageRenderInfo;
}
}

CopyToAsync() dont' fill the memory stream

i'm using xamarin.forms app and need to save file(it this situation pdf file). This is my scenario: I'm using media plugin to save images with camera and from that images with PdfDocument object i generate PDF file:
PdfDocument document = new PdfDocument();
for (int i = 0; i < Images.Count(); i++)
{
PdfPage page = document.Pages.Add();
PdfGraphics graphics = page.Graphics;
Stream imageStream = Images.ElementAt(i);
PdfBitmap image = new PdfBitmap(imageStream);
page.Graphics.DrawImage(image, new PointF(40, 100));
}
MemoryStream stream = new MemoryStream();
document.Save(stream);
document.Close(true);
String localPath =
Task.Run(() => DependencyService.Get<ISave>().SaveFile(stream, "test.pdf")).Result;
And everything is working fine, its generates me pdf document with pages stream is filled with bytes, and the problem is in this SaveFile:
[assembly: Dependency(typeof(Save))]
namespace PdfSave.Droid.Shared
{
public class Save: ISave
{
private readonly string _rootDir = Path.Combine(System.Environment.GetFolderPath(System.Environment.SpecialFolder.Personal), "TestFolder");
public async Task<string> SaveFile(Stream pdfStream, string fileName)
{
if (!Directory.Exists(_rootDir))
Directory.CreateDirectory(_rootDir);
var filePath = Path.Combine(_rootDir, fileName);
using (var memoryStream = new MemoryStream())
{
await pdfStream.CopyToAsync(memoryStream);
File.WriteAllBytes(filePath, memoryStream.ToArray());
}
return filePath;
}
}
the problem is in this line
await pdfStream.CopyToAsync(memoryStream);
the memory stream is empty! . Anyone know what should might be the problem?

How to hide layers when merging multiple pdf documents

I'm using iText 5 to fill existing pdf forms with content and then merge them into a single pdf. I also want to turn on/off layers, but after merging all layers are visible.
This code shows the problem without using existing pdf forms. I would like to hide layer two but it seems not working.
static void Main(string[] args)
{
byte[] pdfPage = CreatePage();
byte[] result = Merge(new byte[][] { pdfPage, pdfPage });
File.WriteAllBytes(#"c:\test1.pdf", result);
}
private static byte[] CreatePage()
{
Document doc = new Document();
MemoryStream ms = new MemoryStream();
PdfWriter writer = PdfWriter.GetInstance(doc, ms);
doc.Open();
PdfLayer layer1 = new PdfLayer("Layer 1", writer);
PdfLayer layer2 = new PdfLayer("Layer 2", writer);
PdfContentByte cb = writer.DirectContent;
cb.BeginLayer(layer1);
ColumnText.ShowTextAligned(cb, Element.ALIGN_LEFT, new Phrase("Layertext 1"), 100, 700, 0);
cb.EndLayer();
cb.BeginLayer(layer2);
ColumnText.ShowTextAligned(cb, Element.ALIGN_LEFT, new Phrase("Layertext 2"), 100, 600, 0);
cb.EndLayer();
layer1.On = true;
// turn off layer 2
layer2.On = false;
doc.Close();
return ms.ToArray();
}
private static byte[] Merge(byte[][] pages)
{
Document doc = new Document();
MemoryStream ms = null;
using (ms = new MemoryStream())
{
PdfCopy copy = new PdfCopy(doc, ms);
doc.Open();
foreach (byte[] page in pages)
{
PdfReader reader = new PdfReader(new MemoryStream(page));
PdfImportedPage imp = copy.GetImportedPage(reader, 1);
copy.AddPage(imp);
reader.Close();
}
doc.Close();
}
return ms.ToArray();
}

how to append one pdf to other pdf file using itextsharp

How to append pages to one pdf file from another pdf file without creating a new pdf using itextsharp. I have metadata attached to one pdf so i just want to add only the other pdf pages,so that first pdf metadata should remain as it is.
Regards
Himvj
Assuming you have 2 pdf files: file1.pdf and file2.pdf that you want to concatenate and save the resulting pdf to file1.pdf (by replacing its contents) you could try the following:
using (var output = new MemoryStream())
{
var document = new Document();
var writer = new PdfCopy(document, output);
document.Open();
foreach (var file in new[] { "file1.pdf", "file2.pdf" })
{
var reader = new PdfReader(file);
int n = reader.NumberOfPages;
PdfImportedPage page;
for (int p = 1; p <= n; p++)
{
page = writer.GetImportedPage(reader, p);
writer.AddPage(page);
}
}
document.Close();
File.WriteAllBytes("file1.pdf", output.ToArray());
}
You can try this it add the whole document with metadata
public static void MergeFiles(string destinationFile, string[] sourceFiles)
{
try
{
//1: Create the MemoryStream for the destination document.
using (MemoryStream ms = new MemoryStream())
{
//2: Create the PdfCopyFields object.
PdfCopyFields copy = new PdfCopyFields(ms);
// - Set the security and other settings for the destination file.
//copy.Writer.SetEncryption(PdfWriter.STRENGTH128BITS, null, "1234", PdfWriter.AllowPrinting | PdfWriter.AllowCopy | PdfWriter.AllowFillIn);
copy.Writer.ViewerPreferences = PdfWriter.PageModeUseOutlines;
// - Create an arraylist to hold bookmarks for later use.
ArrayList outlines = new ArrayList();
int pageOffset = 0;
int f = 0;
//3: Import the documents specified in args[1], args[2], etc...
while (f < sourceFiles.Length)
{
// Grab the file from args[] and open it with PdfReader.
string file = sourceFiles[f];
PdfReader reader = new PdfReader(file);
// Import the pages from the current file.
copy.AddDocument(reader);
// Create an ArrayList of bookmarks in the file being imported.
// ArrayList bookmarkLst = SimpleBookmark.GetBookmark(reader);
// Shift the pages to accomidate any pages that were imported before the current document.
// SimpleBookmark.ShiftPageNumbers(bookmarkLst, pageOffset, null);
// Fill the outlines ArrayList with each bookmark as a HashTable.
// foreach (Hashtable ht in bookmarkLst)
// {
// outlines.Add(ht);
// }
// Set the page offset to the last page imported.
//copy.Writer.SetPageSize(rec);
pageOffset += reader.NumberOfPages;
f++;
}
//4: Put the outlines from all documents under a new "Root" outline and
// set them for destination document
// copy.Writer.Outlines = GetBookmarks("Root", ((Hashtable)outlines[0])["Page"], outlines);
//5: Close the PdfCopyFields object.
copy.Close();
//6: Save the MemoryStream to a file.
MemoryStreamToFile(ms, destinationFile);
}
}
catch (System.Exception e)
{
System.Console.Error.WriteLine(e.Message);
System.Console.Error.WriteLine(e.StackTrace);
System.Console.ReadLine();
}
}
public static void MemoryStreamToFile(MemoryStream MS, string FileName)
{
using (FileStream fs = new FileStream(#FileName, FileMode.Create))
{
byte[] data = MS.ToArray();
fs.Write(data, 0, data.Length);
fs.Close();
}
}