How to replace / remove text from a PDF file - pdf

How would I go about replacing / removing text from a PDF file?
I have a PDF file that I obtained somewhere, and I want to be able to replace some text within it.
Or, I have a PDF file that I want to obscure (redact) some of the text within it so that it's no longer visible [and so that it looks cool, like the CIA files].
Or, I have a PDF that contains global Javascript that I want to stop from interrupting my use of the PDF.

This is possible in a limited fashion with the use of iText / iTextSharp.
It will only work with Tj/TJ opcodes (i.e. standard text, not text embedded in images, or drawn with shapes).
You need to override the default PdfContentStreamProcessor to act on the page content streams, as presented by Mkl here Removing Watermark from PDF iTextSharp. Inherit from this class, and in your new class look for the Tj/TJ opcodes, the operand(s) will generally be the text element(s) (for a TJ this may not be straightforward text, and may require further parsing of all the operands).
A pretty basic example of some of the flexibility around iTextSharp is available from this github repository https://github.com/bevanweiss/PdfEditor (code excerpts below also)
NOTE: This utilises the AGPL version of iTextSharp (and is hence also AGPL), so if you will be distributing executables derived from this code or allowing others to interact with those executables in any way then you must also provide your modified source code. There is also no warranty, implied or expressed, related to this code. Use at your own peril.
PdfContentStreamEditor
using System.Collections.Generic;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
namespace PDFCleaner
{
public class PdfContentStreamEditor : PdfContentStreamProcessor
{
/**
* This method edits the immediate contents of a page, i.e. its content stream.
* It explicitly does not descent into form xobjects, patterns, or annotations.
*/
public void EditPage(PdfStamper pdfStamper, int pageNum)
{
var pdfReader = pdfStamper.Reader;
var page = pdfReader.GetPageN(pageNum);
var pageContentInput = ContentByteUtils.GetContentBytesForPage(pdfReader, pageNum);
page.Remove(PdfName.CONTENTS);
EditContent(pageContentInput, page.GetAsDict(PdfName.RESOURCES), pdfStamper.GetUnderContent(pageNum));
}
/**
* This method processes the content bytes and outputs to the given canvas.
* It explicitly does not descent into form xobjects, patterns, or annotations.
*/
public virtual void EditContent(byte[] contentBytes, PdfDictionary resources, PdfContentByte canvas)
{
this.Canvas = canvas;
ProcessContent(contentBytes, resources);
this.Canvas = null;
}
/**
* This method writes content stream operations to the target canvas. The default
* implementation writes them as they come, so it essentially generates identical
* copies of the original instructions the {#link ContentOperatorWrapper} instances
* forward to it.
*
* Override this method to achieve some fancy editing effect.
*/
protected virtual void Write(PdfContentStreamProcessor processor, PdfLiteral operatorLit, List<PdfObject> operands)
{
var index = 0;
foreach (var pdfObject in operands)
{
pdfObject.ToPdf(null, Canvas.InternalBuffer);
Canvas.InternalBuffer.Append(operands.Count > ++index ? (byte) ' ' : (byte) '\n');
}
}
//
// constructor giving the parent a dummy listener to talk to
//
public PdfContentStreamEditor() : base(new DummyRenderListener())
{
}
//
// constructor giving the parent a dummy listener to talk to
//
public PdfContentStreamEditor(IRenderListener renderListener) : base(renderListener)
{
}
//
// Overrides of PdfContentStreamProcessor methods
//
public override IContentOperator RegisterContentOperator(string operatorString, IContentOperator newOperator)
{
var wrapper = new ContentOperatorWrapper();
wrapper.SetOriginalOperator(newOperator);
var formerOperator = base.RegisterContentOperator(operatorString, wrapper);
return (formerOperator is ContentOperatorWrapper operatorWrapper ? operatorWrapper.GetOriginalOperator() : formerOperator);
}
public override void ProcessContent(byte[] contentBytes, PdfDictionary resources)
{
this.Resources = resources;
base.ProcessContent(contentBytes, resources);
this.Resources = null;
}
//
// members holding the output canvas and the resources
//
protected PdfContentByte Canvas = null;
protected PdfDictionary Resources = null;
//
// A content operator class to wrap all content operators to forward the invocation to the editor
//
class ContentOperatorWrapper : IContentOperator
{
public IContentOperator GetOriginalOperator()
{
return _originalOperator;
}
public void SetOriginalOperator(IContentOperator op)
{
this._originalOperator = op;
}
public void Invoke(PdfContentStreamProcessor processor, PdfLiteral oper, List<PdfObject> operands)
{
if (_originalOperator != null && !"Do".Equals(oper.ToString()))
{
_originalOperator.Invoke(processor, oper, operands);
}
((PdfContentStreamEditor)processor).Write(processor, oper, operands);
}
private IContentOperator _originalOperator = null;
}
//
// A dummy render listener to give to the underlying content stream processor to feed events to
//
class DummyRenderListener : IRenderListener
{
public void BeginTextBlock() { }
public void RenderText(TextRenderInfo renderInfo) { }
public void EndTextBlock() { }
public void RenderImage(ImageRenderInfo renderInfo) { }
}
}
}
TextReplaceStreamEditor
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
namespace PDFCleaner
{
public class TextReplaceStreamEditor : PdfContentStreamEditor
{
public TextReplaceStreamEditor(string MatchPattern, string ReplacePattern)
{
_matchPattern = MatchPattern;
_replacePattern = ReplacePattern;
}
private string _matchPattern;
private string _replacePattern;
protected override void Write(PdfContentStreamProcessor processor, PdfLiteral oper, List<PdfObject> operands)
{
var operatorString = oper.ToString();
if ("Tj".Equals(operatorString) || "TJ".Equals(operatorString))
{
for(var i = 0; i < operands.Count; i++)
{
if(!operands[i].IsString())
continue;
var text = operands[i].ToString();
if(Regex.IsMatch(text, _matchPattern))
{
operands[i] = new PdfString(Regex.Replace(text, _matchPattern, _replacePattern));
}
}
}
base.Write(processor, oper, operands);
}
}
}
TextRedactStreamEditor
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
namespace PDFCleaner
{
public class TextRedactStreamEditor : PdfContentStreamEditor
{
public TextRedactStreamEditor(string MatchPattern) : base(new RedactRenderListener(MatchPattern))
{
_matchPattern = MatchPattern;
}
private string _matchPattern;
protected override void Write(PdfContentStreamProcessor processor, PdfLiteral oper, List<PdfObject> operands)
{
base.Write(processor, oper, operands);
}
public override void EditContent(byte[] contentBytes, PdfDictionary resources, PdfContentByte canvas)
{
((RedactRenderListener)base.RenderListener).SetCanvas(canvas);
base.EditContent(contentBytes, resources, canvas);
}
}
//
// A pretty simple render listener, all we care about it text stuff.
// We listen out for text blocks, look for our text, and then put a
// black box over it.. text 'redacted'
//
class RedactRenderListener : IRenderListener
{
private PdfContentByte _canvas;
private string _matchPattern;
public RedactRenderListener(string MatchPattern)
{
_matchPattern = MatchPattern;
}
public RedactRenderListener(PdfContentByte Canvas, string MatchPattern)
{
_canvas = Canvas;
_matchPattern = MatchPattern;
}
public void SetCanvas(PdfContentByte Canvas)
{
_canvas = Canvas;
}
public void BeginTextBlock() { }
public void RenderText(TextRenderInfo renderInfo)
{
var text = renderInfo.GetText();
var match = Regex.Match(text, _matchPattern);
if(match.Success)
{
var p1 = renderInfo.GetCharacterRenderInfos()[match.Index].GetAscentLine().GetStartPoint();
var p2 = renderInfo.GetCharacterRenderInfos()[match.Index+match.Length].GetAscentLine().GetEndPoint();
var p3 = renderInfo.GetCharacterRenderInfos()[match.Index+match.Length].GetDescentLine().GetEndPoint();
var p4 = renderInfo.GetCharacterRenderInfos()[match.Index].GetDescentLine().GetStartPoint();
_canvas.SaveState();
_canvas.SetColorStroke(BaseColor.BLACK);
_canvas.SetColorFill(BaseColor.BLACK);
_canvas.MoveTo(p1[Vector.I1], p1[Vector.I2]);
_canvas.LineTo(p2[Vector.I1], p2[Vector.I2]);
_canvas.LineTo(p3[Vector.I1], p3[Vector.I2]);
_canvas.LineTo(p4[Vector.I1], p4[Vector.I2]);
_canvas.ClosePathFillStroke();
_canvas.RestoreState();
}
}
public void EndTextBlock() { }
public void RenderImage(ImageRenderInfo renderInfo) { }
}
}
Using them with iTextSharp
var reader = new PdfReader("SRC FILE PATH GOES HERE");
var dstFile = File.Open("DST FILE PATH GOES HERE", FileMode.Create);
pdfStamper = new PdfStamper(reader, output, reader.PdfVersion, false);
// We don't need to auto-rotate, as the PdfContentStreamEditor will already deal with pre-rotated space..
// if we enable this we will inadvertently rotate the content.
pdfStamper.RotateContents = false;
// This is for the Text Replace
var replaceTextProcessor = new TextReplaceStreamEditor(
"TEXT TO REPLACE HERE",
"TEXT TO SUBSTITUTE IN HERE");
for(int i=1; i <= reader.NumberOfPages; i++)
replaceTextProcessor.EditPage(pdfStamper, i);
// This is for the Text Redact
var redactTextProcessor = new TextRedactStreamEditor(
"TEXT TO REDACT HERE");
for(int i=1; i <= reader.NumberOfPages; i++)
redactTextProcessor.EditPage(pdfStamper, i);
// Since our redacting just puts a box over the top, we should secure the document a bit... just to prevent people copying/pasting the text behind the box.. we also prevent text to speech processing of the file, otherwise the 'hidden' text will be spoken
pdfStamper.Writer.SetEncryption(null,
Encoding.UTF8.GetBytes("ownerPassword"),
PdfWriter.AllowDegradedPrinting | PdfWriter.AllowPrinting,
PdfWriter.ENCRYPTION_AES_256);
// hey, lets get rid of Javascript too, because it's annoying
pdfStamper.Javascript = "";
// and then finally we close our files (saving it in the process)
pdfStamper.Close();
reader.Close();

You can use GroupDocs.Redaction (available for .NET) for replacing or removing the text from PDF documents. You can perform the exact phrase, case-sensitive and regular expression redaction (removal) of the text. The following code snippet replaces the word "candy" with "[redacted]" in the loaded PDF document.
C#:
using (Document doc = Redactor.Load("D:\\candy.pdf"))
{
doc.RedactWith(new ExactPhraseRedaction("candy", new ReplacementOptions("[redacted]")));
// Save the document to "*_Redacted.*" file.
doc.Save(new SaveOptions() { AddSuffix = true, RasterizeToPDF = false });
}
Disclosure: I work as Developer Evangelist at GroupDocs.

Related

What I do wrong with algorithm to compare samples?

I want compare and recognize two sound streams. I created my own algorithm, but its not working exactly like i would like. I trying for example, compare a few letters "A,B,C" with "D,E,F" or words "facebook" with "music" and algorithm give a true value for this comparing, but these are not the same words. My algorithm is so imprecise or it is cause of quality of sounds recorded using a microphone from laptop?
My concept of comparing algorithm:
Im taking for example 100 samples from one stream ( it can be in the middle of track) and im checking in loop every piece of second stream in specified way: first 0-99 samples , 1-100, 2-101 etc.
My program have about few tracks to compare with one input track, so my algorithm could get the best solution ( the most similar sample in track ) from every track Unfortunately it is getting wrong results.
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.ComponentModel;
using System.IO;
using System.Runtime.CompilerServices;
using System.Windows;
using Controller.Annotations;
using NAudio.Wave;
namespace Controller.Models
{
class DecompositionOfSound
{
private int _numberOfSimilarSamples;
private String _stream;
public string Stream
{
get { return _stream; }
set { _stream = value; }
}
public int IloscPodobnychProbek
{
get { return _numberOfSimilarSamples; }
set { _numberOfSimilarSamples = value; }
}
public DecompositionOfSound(string stream)
{
_stream = stream;
SaveSamples(stream);
}
private void SaveSamples(string stream)
{
var wave = new WaveChannel32(new WaveFileReader(stream));
Samples = new byte[wave.Length];
wave.Read(Samples, 0, (int) wave.Length);
}
private byte[] _samples;
public byte[] Samples
{
get { return _samples; }
set { _samples = value; }
}
}
class Sample: INotifyPropertyChanged
{
#region Cechy
private IList<DecompositionOfSound> _listoOfSoundSamples = new ObservableCollection<DecompositionOfSound>();
private string[] _filePaths;
#endregion
#region Property
public string[] FilePaths
{
get { return _filePaths; }
set { _filePaths = value; }
}
public IList<DecompositionOfSound> ListaSciezekDzwiekowych
{
get { return _listoOfSoundSamples; }
set { _listoOfSoundSamples = value; }
}
#endregion
#region Metody
public Sample()
{
LoadSamples(); // przy każdym nowym nagraniu należy zaktualizować !!!
}
public void DisplayMatchingOfSamples()
{
foreach (var decompositionOfSound in ListaSciezekDzwiekowych)
{
MessageBox.Show(decompositionOfSound.IloscPodobnychProbek.ToString());
}
}
public DecompositionOfSound BestMatchingOfSamples()
{
int max=0;
DecompositionOfSound referenceToObject = null;
foreach (var numberOfMatching in _listoOfSoundSamples)
{
if (numberOfMatching.IloscPodobnychProbek > max)
{
max = numberOfMatching.IloscPodobnychProbek;
referenceToObject = numberOfMatching;
}
}
return referenceToObject;
}
public void LoadSamples()
{
int i = 0;
_filePaths = Directory.GetFiles(#"Samples","*.wav");
while (i < _filePaths.Length)
{
ListaSciezekDzwiekowych.Add(new DecompositionOfSound(_filePaths[i]));
i++;
}
}
public void CheckMatchingOfWord(byte[] inputSound,double eps)
{
foreach (var probka in _listoOfSoundSamples)
{
CompareBufforsOfSamples(inputSound, probka, eps);
}
}
public void CheckMatchingOfWord(String inputSound,int iloscProbek, double eps)
{
var wave = new WaveChannel32(new WaveFileReader(inputSound));
var samples = new byte[wave.Length];
wave.Read(samples, 0, (int)wave.Length);
var licznik = 0;
var samplesTmp = new byte[iloscProbek];
while (licznik < iloscProbek)
{
samplesTmp[licznik] = samples[licznik + (wave.Length >> 1)];
licznik++;
}
foreach (var probka in _listoOfSoundSamples)
{
CompareBufforsOfSamples(samplesTmp, probka, eps);
}
}
private void CompareBufforsOfSamples(byte[] inputSound, DecompositionOfSound samples, double eps)
{
int max = 0;
for (int i = 0; i < (samples.Samples.Length - inputSound.Length); i++)
{
int counter = 0;
for (int j = 0; j < inputSound.Length; j++)
{
if (inputSound[j] * eps <= samples.Samples[i + j] &&
(inputSound[j] + inputSound[j] *(1 - eps)) >= samples.Samples[i + j])
{
counter++;
}
}
if (counter > max) max = counter;
}
samples.IloscPodobnychProbek = max;
}
#endregion
#region INotifyPropertyChange
public event PropertyChangedEventHandler PropertyChanged;
[NotifyPropertyChangedInvocator]
protected virtual void OnPropertyChanged([CallerMemberName] string propertyName = null)
{
PropertyChangedEventHandler handler = PropertyChanged;
if (handler != null) handler(this, new PropertyChangedEventArgs(propertyName));
}
#endregion
}
During comaparing all of sound samples, algorithm finds the soundtrack which have the highest number of matching samples, but it isnt correct record. Do my comparison of the two records make sense and how i can fix it to get expected result. Would you like to help me find solution of this problem ? Sorry for my English.
Kind Regards
You simply cannot do sample-level comparisons on recordings to determine your match. Even given two recordings on the same computer of the same word spoken by the same person - ie: every detail exactly the same - the recorded samples will differ. Digital audio is like that. It can sound the same, but the actual recorded samples will not match.
Speech To Text is not simple, and neither is voice recognition (ie: checking the identity of a person from their voice).
Instead of samples you need to examine the frequency profiles of the recordings. Various sounds in natural speech have distinctive frequency distributions. Sibilants - the s sound - have a wide distribution of higher frequencies for instance, so are easy to spot - which is why they used to use sibilant detection for the old yes/no response detection on phone systems.
You can get the frequency profile of a waveform by using a Fast Fourier Transform on a block of samples. Run through the audio stream and do a series of FFTs to get a 2D map of the frequencies of the waveform, then look for interesting things like sibilants (lots of high frequency, very little low frequency).
Of course you could just use one of the web-based Speech to Text APIs.

Text extraction from table cells

I have a pdf. The pdf contains a table. The table contains many cells (>100). I know the exact position (x,y) and dimension (w,h) of every cell of the table.
I need to extract text from cells using itextsharp. Using PdfReaderContentParser + FilteredTextRenderListener (using a code like this http://itextpdf.com/examples/iia.php?id=279 ) I can extract text but I need to run the whole procedure for each cell. My pdf have many cells and the program needs too much time to run. Is there a way to extract text from a list of "rectangle"? I need to know the text of each rectangle. I'm looking for something like PDFTextStripperByArea by PdfBox (you can define as many regions as you need and the get text using .getTextForRegion("region-name") ).
This option is not immediately included in the iTextSharp distribution but it is easy to realize. In the following I use the iText (Java) class, interface, and method names because I am more at home with Java. They should easily be translatable into iTextSharp (C#) names.
If you use the LocationTextExtractionStrategy, you can can use its a posteriori TextChunkFilter mechanism instead of the a priori FilteredRenderListener mechanism used in the sample you linked to. This mechanism has been introduced in version 5.3.3.
For this you first parse the whole page content using the LocationTextExtractionStrategy without any FilteredRenderListener filtering applied. This makes the strategy object collect TextChunk objects for all PDF text objects on the page containing the associated base line segment.
Then you call the strategy's getResultantText overload with a TextChunkFilter argument (instead of the regular no-argument overload):
public String getResultantText(TextChunkFilter chunkFilter)
You call it with a different TextChunkFilter instance for each table cell. You have to implement this filter interface which is not too difficult as it only defines one method:
public static interface TextChunkFilter
{
/**
* #param textChunk the chunk to check
* #return true if the chunk should be allowed
*/
public boolean accept(TextChunk textChunk);
}
So the accept method of the filter for a given cell must test whether the text chunk in question is inside your cell.
(Instead of separate instances for each cell you can of course also create one instance whose parameters, i.e. cell coordinates, can be changed between getResultantText calls.)
PS: As mentioned by the OP, this TextChunkFilter has not yet been ported to iTextSharp. It should not be hard to do so, though, only one small interface and one method to add to the strategy.
PPS: In a comment sschuberth asked
Do you then still call PdfTextExtractor.getTextFromPage() when using getResultantText(), or does it somehow replace that call? If so, how to you then specify the page to extract to?
Actually PdfTextExtractor.getTextFromPage() internally already uses the no-argument getResultantText() overload:
public static String getTextFromPage(PdfReader reader, int pageNumber, TextExtractionStrategy strategy, Map<String, ContentOperator> additionalContentOperators) throws IOException
{
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
return parser.processContent(pageNumber, strategy, additionalContentOperators).getResultantText();
}
To make use of a TextChunkFilter you could simply build a similar convenience method, e.g.
public static String getTextFromPage(PdfReader reader, int pageNumber, LocationTextExtractionStrategy strategy, Map<String, ContentOperator> additionalContentOperators, TextChunkFilter chunkFilter) throws IOException
{
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
return parser.processContent(pageNumber, strategy, additionalContentOperators).getResultantText(chunkFilter);
}
In the context at hand, though, in which we want to parse the page content only once and apply multiple filters, one for each cell, we might generalize this to:
public static List<String> getTextFromPage(PdfReader reader, int pageNumber, LocationTextExtractionStrategy strategy, Map<String, ContentOperator> additionalContentOperators, Iterable<TextChunkFilter> chunkFilters) throws IOException
{
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
parser.processContent(pageNumber, strategy, additionalContentOperators)
List<String> result = new ArrayList<>();
for (TextChunkFilter chunkFilter : chunkFilters)
{
result.add(strategy).getResultantText(chunkFilter);
}
return result;
}
(You can make this look fancier by using Java 8 collection streaming instead of the old'fashioned for loop.)
Here's my take on how to extract text from a table-like structure in a PDF using itextsharp. It returns a collection of rows and each row contains a collection of interpreted columns. This may work for you on the premise that there is a gap between one column and the next which is greater than the average width of a single character. I also added an option to check for wrapped text within a virtual column. Your mileage may vary.
using (PdfReader pdfReader = new PdfReader(stream))
{
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
TableExtractionStrategy tableExtractionStrategy = new TableExtractionStrategy();
string pageText = PdfTextExtractor.GetTextFromPage(pdfReader, page, tableExtractionStrategy);
var table = tableExtractionStrategy.GetTable();
}
}
public class TableExtractionStrategy : LocationTextExtractionStrategy
{
public float NextCharacterThreshold { get; set; } = 1;
public int NextLineLookAheadDepth { get; set; } = 500;
public bool AccomodateWordWrapping { get; set; } = true;
private List<TableTextChunk> Chunks { get; set; } = new List<TableTextChunk>();
public override void RenderText(TextRenderInfo renderInfo)
{
base.RenderText(renderInfo);
string text = renderInfo.GetText();
Vector bottomLeft = renderInfo.GetDescentLine().GetStartPoint();
Vector topRight = renderInfo.GetAscentLine().GetEndPoint();
Rectangle rectangle = new Rectangle(bottomLeft[Vector.I1], bottomLeft[Vector.I2], topRight[Vector.I1], topRight[Vector.I2]);
Chunks.Add(new TableTextChunk(rectangle, text));
}
public List<List<string>> GetTable()
{
List<List<string>> lines = new List<List<string>>();
List<string> currentLine = new List<string>();
float? previousBottom = null;
float? previousRight = null;
StringBuilder currentString = new StringBuilder();
// iterate through all chunks and evaluate
for (int i = 0; i < Chunks.Count; i++)
{
TableTextChunk chunk = Chunks[i];
// determine if we are processing the same row based on defined space between subsequent chunks
if (previousBottom.HasValue && previousBottom == chunk.Rectangle.Bottom)
{
if (chunk.Rectangle.Left - previousRight > 1)
{
currentLine.Add(currentString.ToString());
currentString.Clear();
}
currentString.Append(chunk.Text);
previousRight = chunk.Rectangle.Right;
}
else
{
// if we are processing a new line let's check to see if this could be word wrapping behavior
bool isNewLine = true;
if (AccomodateWordWrapping)
{
int readAheadDepth = Math.Min(i + NextLineLookAheadDepth, Chunks.Count);
if (previousBottom.HasValue)
for (int j = i; j < readAheadDepth; j++)
{
if (previousBottom == Chunks[j].Rectangle.Bottom)
{
isNewLine = false;
break;
}
}
}
// if the text was not word wrapped let's treat this as a new table row
if (isNewLine)
{
if (currentString.Length > 0)
currentLine.Add(currentString.ToString());
currentString.Clear();
previousBottom = chunk.Rectangle.Bottom;
previousRight = chunk.Rectangle.Right;
currentString.Append(chunk.Text);
if (currentLine.Count > 0)
lines.Add(currentLine);
currentLine = new List<string>();
}
else
{
if (chunk.Rectangle.Left - previousRight > 1)
{
currentLine.Add(currentString.ToString());
currentString.Clear();
}
currentString.Append(chunk.Text);
previousRight = chunk.Rectangle.Right;
}
}
}
return lines;
}
private struct TableTextChunk
{
public Rectangle Rectangle;
public string Text;
public TableTextChunk(Rectangle rect, string text)
{
Rectangle = rect;
Text = text;
}
public override string ToString()
{
return Text + " (" + Rectangle.Left + ", " + Rectangle.Bottom + ")";
}
}
}

Dataexporter pdf header position

I'm using dataexporter to create a pdf of a data table, in my data table the header of the columns is centralized, however the pdf version of the same columns is align to the left. how can I make the columns of the pdf be centralized like the data table.
I use the solution to customize the PDFExporter, it work very well, thank you for your attention. Below is how i've done:
My custom class:
public class CustomPDFExporter extends PDFExporter {
#Override
protected void addColumnFacets(DataTable table, PdfPTable pdfTable, ColumnType columnType) {
for(UIColumn col : table.getColumns()) {
if(!col.isRendered()) {
continue;
}
if(col instanceof DynamicColumn) {
((DynamicColumn) col).applyModel();
}
if(col.isExportable()) {
addHeaderValue(pdfTable, col.getFacet(columnType.facet()), FontFactory.getFont(FontFactory.TIMES, "iso-8859-1", Font.DEFAULTSIZE, Font.BOLD));
}
}
}
protected void addHeaderValue(PdfPTable pdfTable, UIComponent component, Font font) {
String value = component == null ? "" : exportValue(FacesContext.getCurrentInstance(), component);
PdfPCell cell = new PdfPCell(new Paragraph(value, font));
cell.setHorizontalAlignment(Element.ALIGN_CENTER);
pdfTable.addCell(cell);
}
}
bean:
public void exportPDF(DataTable table, String filename) throws IOException {
FacesContext context = FacesContext.getCurrentInstance();
Exporter exporter = new CustomPDFExporter();
exporter.export(context, table, filename, false, false, "iso-8859-1", null, null);
context.responseComplete();
}
In my page I added:
<h:commandLink action="#{boxBean.exportPDF(boxTable, 'relatorio_caixas')}" >
<p:graphicImage value="/resources/img/pdf.png"/>
</h:commandLink>
Well your answer is already on stackoverflow: changing style on generating pdf with Primefaces dataExporter
Also take a look here: http://www.primefaces.org/showcase/ui/exporterProcessor.jsf how to use the exportProcessor of Primefaces.
In short you need to create your own processor to create an custom PDF

How can I use MEF to manage interdependent modules?

I found this question difficult to express (particularly in title form), so please bear with me.
I have an application that I am continually modifying to do different things. It seems like MEF might be a good way to manage the different pieces of functionality. Broadly speaking, there are three sections of the application that form a pipeline of sorts:
Acquisition
Transformation
Expression
In it's simplest form, I can express each of these stages as an interface (IAcquisition etc). The problems start when I want to use acquisition components that provides richer data than standard. I want to design modules that use this richer data, but I can't rely on it being there.
I could, of course, add all of the data to the interface specification. I could deal with poorer data sources by throwing an exception or returning a null value. This seems a long way from ideal.
I'd prefer to do the MEF binding in three stages, such that modules are offered to the user only if they are compatible with those selected previously.
So my question: Can I specify metadata which restricts the set of available imports?
An example:
Acquision1 offers BasicData only
Acquision2 offers BasicData and AdvancedData
Transformation1 requires BasicData
Transformation2 requires BasicData and AdvancedData
Acquisition module is selected first.
If Acquisition1 is selected, don't offer Transformation 2, otherwise offer both.
Is this possible? If so, how?
Your question suggests a structure like this:
public class BasicData
{
public string Basic { get; set; } // example data
}
public class AdvancedData : BasicData
{
public string Advanced { get; set; } // example data
}
Now you have your acquisition, transformation and expression components. You want to be able to deal with different kinds of data, so they're generic:
public interface IAcquisition<out TDataKind>
{
TDataKind Acquire();
}
public interface ITransformation<TDataKind>
{
TDataKind Transform(TDataKind data);
}
public interface IExpression<in TDataKind>
{
void Express(TDataKind data);
}
And now you want to build a pipeline out of them that looks like this:
IExpression.Express(ITransformation.Transform(IAcquisition.Acquire));
So let's start building a pipeline builder:
using System;
using System.Collections.Generic;
using System.ComponentModel.Composition;
using System.ComponentModel.Composition.Hosting;
using System.ComponentModel.Composition.Primitives;
using System.Linq;
using System.Linq.Expressions;
// namespace ...
public static class PipelineBuidler
{
private static readonly string AcquisitionIdentity =
AttributedModelServices.GetTypeIdentity(typeof(IAcquisition<>));
private static readonly string TransformationIdentity =
AttributedModelServices.GetTypeIdentity(typeof(ITransformation<>));
private static readonly string ExpressionIdentity =
AttributedModelServices.GetTypeIdentity(typeof(IExpression<>));
public static Action BuildPipeline(ComposablePartCatalog catalog,
Func<IEnumerable<string>, int> acquisitionSelector,
Func<IEnumerable<string>, int> transformationSelector,
Func<IEnumerable<string>, int> expressionSelector)
{
var container = new CompositionContainer(catalog);
The class holds MEF type identities for your three contract interfaces. We'll need those later to identify the correct exports. Our BuildPipeline method returns an Action. That is going to be the pipeline, so we can just do pipeline(). It takes a ComposablePartCatalog and three Funcs (to select an export). That way, we can keep all the dirty work inside this class. Then we start by creating a CompositionContainer.
Now we have to build ImportDefinitions, first for the acquisition component:
var aImportDef = new ImportDefinition(def => (def.ContractName == AcquisitionIdentity), null, ImportCardinality.ZeroOrMore, true, false);
This ImportDefinition simply filters out all exports of the IAcquisition<> interface. Now we can give it to the container:
var aExports = container.GetExports(aImportDef).ToArray();
aExports now holds all IAcquisition<> exports in the catalog. So let's run the selector on this:
var selectedAExport = aExports[acquisitionSelector(aExports.Select(export => export.Metadata["Name"] as string))];
And there we have our acquisition component:
var acquisition = selectedAExport.Value;
var acquisitionDataKind = (Type)selectedAExport.Metadata["DataKind"];
Now we're going to do the same for the transformation and the expression components, but with one slight difference: The ImportDefinition is going to ensure that each component can handle the output of the previous component.
var tImportDef = new ImportDefinition(def => (def.ContractName == TransformationIdentity) && ((Type)def.Metadata["DataKind"]).IsAssignableFrom(acquisitionDataKind),
null, ImportCardinality.ZeroOrMore, true, false);
var tExports = container.GetExports(tImportDef).ToArray();
var selectedTExport = tExports[transformationSelector(tExports.Select(export => export.Metadata["Name"] as string))];
var transformation = selectedTExport.Value;
var transformationDataKind = (Type)selectedTExport.Metadata["DataKind"];
var eImportDef = new ImportDefinition(def => (def.ContractName == ExpressionIdentity) && ((Type)def.Metadata["DataKind"]).IsAssignableFrom(transformationDataKind),
null, ImportCardinality.ZeroOrMore, true, false);
var eExports = container.GetExports(eImportDef).ToArray();
var selectedEExport = eExports[expressionSelector(eExports.Select(export => export.Metadata["Name"] as string))];
var expression = selectedEExport.Value;
var expressionDataKind = (Type)selectedEExport.Metadata["DataKind"];
And now we can wire it all up in an expression tree:
var acquired = Expression.Call(Expression.Constant(acquisition), typeof(IAcquisition<>).MakeGenericType(acquisitionDataKind).GetMethod("Acquire"));
var transformed = Expression.Call(Expression.Constant(transformation), typeof(ITransformation<>).MakeGenericType(transformationDataKind).GetMethod("Transform"), acquired);
var expressed = Expression.Call(Expression.Constant(expression), typeof(IExpression<>).MakeGenericType(expressionDataKind).GetMethod("Express"), transformed);
return Expression.Lambda<Action>(expressed).Compile();
}
}
And that's it! A simple example application would look like this:
[Export(typeof(IAcquisition<>))]
[ExportMetadata("DataKind", typeof(BasicData))]
[ExportMetadata("Name", "Basic acquisition")]
public class Acquisition1 : IAcquisition<BasicData>
{
public BasicData Acquire()
{
return new BasicData { Basic = "Acquisition1" };
}
}
[Export(typeof(IAcquisition<>))]
[ExportMetadata("DataKind", typeof(AdvancedData))]
[ExportMetadata("Name", "Advanced acquisition")]
public class Acquisition2 : IAcquisition<AdvancedData>
{
public AdvancedData Acquire()
{
return new AdvancedData { Advanced = "Acquisition2A", Basic = "Acquisition2B" };
}
}
[Export(typeof(ITransformation<>))]
[ExportMetadata("DataKind", typeof(BasicData))]
[ExportMetadata("Name", "Basic transformation")]
public class Transformation1 : ITransformation<BasicData>
{
public BasicData Transform(BasicData data)
{
data.Basic += " - Transformed1";
return data;
}
}
[Export(typeof(ITransformation<>))]
[ExportMetadata("DataKind", typeof(AdvancedData))]
[ExportMetadata("Name", "Advanced transformation")]
public class Transformation2 : ITransformation<AdvancedData>
{
public AdvancedData Transform(AdvancedData data)
{
data.Basic += " - Transformed2";
data.Advanced += " - Transformed2";
return data;
}
}
[Export(typeof(IExpression<>))]
[ExportMetadata("DataKind", typeof(BasicData))]
[ExportMetadata("Name", "Basic expression")]
public class Expression1 : IExpression<BasicData>
{
public void Express(BasicData data)
{
Console.WriteLine("Expression1: {0}", data.Basic);
}
}
[Export(typeof(IExpression<>))]
[ExportMetadata("DataKind", typeof(AdvancedData))]
[ExportMetadata("Name", "Advanced expression")]
public class Expression2 : IExpression<AdvancedData>
{
public void Express(AdvancedData data)
{
Console.WriteLine("Expression2: ({0}) - ({1})", data.Basic, data.Advanced);
}
}
class Program
{
static void Main(string[] args)
{
var pipeline = PipelineBuidler.BuildPipeline(new AssemblyCatalog(typeof(Program).Assembly), StringSelector, StringSelector, StringSelector);
pipeline();
}
static int StringSelector(IEnumerable<string> strings)
{
int i = 0;
foreach (var item in strings)
Console.WriteLine("[{0}] {1}", i++, item);
return int.Parse(Console.ReadLine());
}
}

Why are my screenshots only black?

could someone tell me why my screenshots are only black? I am still learning and couldnt find a clue why they are only black.
This is my Utility class
static class XNAUtilities
{
private static RenderTarget2D ssTexture;
private static KeyboardState currentState, previousState;
private static int counter;
public static void TakeScreenShot(GraphicsDevice device, Keys theKey)
{
// Take Screenshot
currentState = Keyboard.GetState();
if (currentState.IsKeyDown(theKey) && previousState.IsKeyUp(theKey))
{
//device.SetRenderTarget(null);
PresentationParameters pparameters = device.PresentationParameters;
ssTexture = new RenderTarget2D(device, pparameters.BackBufferWidth, pparameters.BackBufferHeight, false, SurfaceFormat.Color, DepthFormat.None); //??
FileStream fileStream = new System.IO.FileStream(#"Screenshot" + "_" + counter + ".png", System.IO.FileMode.CreateNew);
ssTexture.SaveAsPng(fileStream, pparameters.BackBufferWidth, pparameters.BackBufferHeight);
counter++;
}
previousState = currentState;
}
}
}
This is my Update and Draw from Game1.cs
protected override void Update(GameTime gameTime)
{
if (GamePad.GetState(PlayerIndex.One).Buttons.Back == ButtonState.Pressed)
this.Exit();
myModelRotation += MathHelper.ToRadians(1f);
// Take a Screenshot
XNAUtilities.TakeScreenShot(GraphicsDevice, Keys.F8);
base.Update(gameTime);
}
protected override void Draw(GameTime gameTime)
{
GraphicsDevice.Clear(Color.CornflowerBlue);
Matrix[] transforms = new Matrix[myModel.Bones.Count];
myModel.CopyAbsoluteBoneTransformsTo(transforms);
foreach (ModelMesh mesh in myModel.Meshes)
{
foreach (BasicEffect effects in mesh.Effects)
{
effects.EnableDefaultLighting();
effects.World = transforms[mesh.ParentBone.Index]
* Matrix.CreateRotationY(myModelRotation)
* Matrix.CreateTranslation(myModelPosition);
effects.View = Matrix.CreateLookAt(new Vector3(200, 100, 400), Vector3.Zero, Vector3.Up);
effects.Projection = Matrix.CreatePerspectiveFieldOfView(MathHelper.ToRadians(45f),
GraphicsDevice.Viewport.AspectRatio, 1, 5000);
}
mesh.Draw();
}
smileySprite.DrawSprites(GraphicsDevice, spriteBatch, new Vector2(10,10), Color.White);
base.Draw(gameTime);
}
You're not actually rendering to your render target. So you're saving the blank target.
You need to wrap your scene drawing like so:
GraphicsDevice.SetRenderTarget(ssTexture);
// Render your scene here
GraphicsDevice.SetRenderTarget(null);
// Now you can save your render target as a texture