How do I replace text on a Google doc with an image from a cell from Google Sheets. Replace text returns "cellimage" - pdf

I'm currently using a script that uses text from cells in google sheets to populate specific fields in a google doc which is then saved as a pdf.
I want to replace one of the fields in the google doc with an image from a cell in sheets (an auto-generated QR code), but when I use the function body.replaceText the replaced field in the output PDF contains the string 'cellimage' rather than the image.
Below is the script used:
function createBulkPDFs(){
const docFile = DriveApp.getFileById("1y6hduq3CzpM5Nr8WuhlvQNZmcKViHXNWG1zvR9KgCTk");
const tempFolder = DriveApp.getFolderById("1SLj2sldcNixIe_q8RIm_LIAON1L9rImH");
const pdfFolder = DriveApp.getFolderById("1TrhUjonuXodeZWMQMJV7ZjtN4jp8WSRH");
const currentSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName("Customs");
const data = currentSheet.getRange(2,1,currentSheet.getLastRow()-1,20).getValues();
let errors = [];
data.forEach(row => {
try{
createPDF(row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7],row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15],row[16],row[17],row[18],row[19],row [0],docFile,tempFolder,pdfFolder); //row numbers are to name columns //then this //row 0 is the NAMEof the PDF
errors.push([""]);
} catch(err) {
errors.push(["Failed"]);
}
}); //close forEach
currentSheet.getRange(2,21,currentSheet.getLastRow()-1,1).setValues(errors);
}
function createPDF(number,model,fors,length,width,thickness,litres,notes,blank,orders,dates,finSetup,lamination,artwork,finSystem,leash,finish,phoneNumber,shipping,qrcode,pdfName,docFile,tempFolder,pdfFolder) {
const tempFile = docFile.makeCopy(tempFolder);
const tempDocFile = DocumentApp.openById(tempFile.getId());
const body = tempDocFile.getBody();
body.replaceText("{number}",number); //name in doc on left, named column in here on right //need to do this next!!!!
body.replaceText("{model}",model);
body.replaceText("{for}",fors);
body.replaceText("{length}",length);
body.replaceText("{width}",width);
body.replaceText("{thickness}",thickness);
body.replaceText("{litres}",litres);
body.replaceText("{notes}",notes);
body.replaceText("{blank}",blank);
body.replaceText("{orderTakenBy}",orders);
body.replaceText("{dateTaken}",dates);
body.replaceText("{finSetup}",finSetup);
body.replaceText("{lamination}",lamination);
body.replaceText("{artwork}",artwork);
body.replaceText("{finSystem}",finSystem);
body.replaceText("{leash}",leash);
body.replaceText("{finish}",finish);
body.replaceText("{phoneNumber}",phoneNumber);
body.replaceText("{shippingCo}",shipping);
body.replaceText("{qrcode}",qrcode);
tempDocFile.saveAndClose();
const pdfContentBlob = tempFile.getAs(MimeType.PDF);
pdfFolder.createFile(pdfContentBlob).setName(pdfName);
tempFolder.removeFile(tempFile)
Ideally the qrcode text would be replaced with an image (that is big enough to scan - maybe 150 x 150)
Can someone please help me with next steps. I am a n00b to google scripts.

Related

I need to Set Value the get URL in a cell

I am now need to convert the Google Sheet page to PDF, email to user and save the PDF format straightway to Google Drive.
And i need the Google Drive link after save it to Google Drive.
The steps from convert the Google Sheet to PDF, and i've done but I've stuck at getting the URL to be paste on the specific cells.
i know to get the URL using this code Logger.log(fileUrl)
But how to paste on cell the command ?
var changedFlag = false;
var TEMPLATESHEET='Boom-Report';
function emailSpreadsheetAsPDF() {
//Utilities.sleep(300000); //to pause for 60 seconds . Make sure photo completely upload to google sheet
DocumentApp.getActiveDocument();
DriveApp.getFiles();
// This is the link to my spreadsheet with the Form responses and the Invoice Template sheets
// Add the link to your spreadsheet here
// or you can just replace the text in the link between "d/" and "/edit"
// In my case is the text: 17I8-QDce0Nug7amrZeYTB3IYbGCGxvUj-XMt8uUUyvI
const ss = SpreadsheetApp.openByUrl("https://docs.google.com/spreadsheets/d/1NVJOdFLBAgNFqSHhnHJYybjUlSqhv4hKI_HXJyhJ88E/edit");
// We are going to get the email address from the cell "B7" from the "Invoice" sheet
// Change the reference of the cell or the name of the sheet if it is different
const value = ss.getSheetByName("Source Email-Boom").getRange("X3").getValue();
const email = value.toString();
// Subject of the email message
const subject = ss.getSheetByName("Source Email-Boom").getRange("B3").getValue();
// Email Text. You can add HTML code here - see ctrlq.org/html-mail
const body = "Boom Lifts Inspection Report - Sent via Auto Generate PDI Report from Glideapps";
// Again, the URL to your spreadsheet but now with "/export" at the end
// Change it to the link of your spreadsheet, but leave the "/export"
const url = 'https://docs.google.com/spreadsheets/d/1NVJOdFLBAgNFqSHhnHJYybjUlSqhv4hKI_HXJyhJ88E/export?';
const exportOptions =
'exportFormat=pdf&format=pdf' + // export as pdf
'&size=A4' + // paper size letter / You can use A4 or legal
'&portrait=true' + // orientation portal, use false for landscape
'&fitw=true' + // fit to page width false, to get the actual size
'&sheetnames=false&printtitle=false' + // hide optional headers and footers
'&pagenumbers=false&gridlines=false' + // hide page numbers and gridlines
'&fzr=false' + // do not repeat row headers (frozen rows) on each page
'&gid=1832955909'; // the sheet's Id. Change it to your sheet ID.
// You can find the sheet ID in the link bar.
// Select the sheet that you want to print and check the link,
// the gid number of the sheet is on the end of your link.
var params = {method:"GET",headers:{"authorization":"Bearer "+ ScriptApp.getOAuthToken()}};
// Generate the PDF file
var response = UrlFetchApp.fetch(url+exportOptions, params).getBlob();
// Send the PDF file as an attachement
GmailApp.sendEmail("biha#equip-inc.com", subject, body, {
htmlBody: body,
attachments: [{
fileName: ss.getSheetByName("Source Email-Boom").getRange("B3").getValue().toString() +".pdf",
content: response.getBytes(),
mimeType: "application/pdf"
}]
});
// Save the PDF to Drive. (in the folder) The name of the PDF is going to be the name of the Company (cell B5)
const nameFile = ss.getSheetByName("Source Email-Boom").getRange("B3").getValue().toString() +".pdf"
const folderID = "1ZKWq9jWmeEQlxncuTPHssCFXC3Fidmxn";
DriveApp.getFolderById(folderID).createFile(response).setName(nameFile);
// create file URL
var SpreadsheetID = "1NVJOdFLBAgNFqSHhnHJYybjUlSqhv4hKI_HXJyhJ88E";
var ss2 = SpreadsheetApp.openById(SpreadsheetID);
var Sheetname2= "BL-Inspection Report";
var sheet2 = ss2.getSheetByName(Sheetname2);
// Get the last row based on the data range of a single column.
var lastRow2 = sheet2.getLastRow();
var lastColumn2 = sheet2.getLastColumn();
//EXAMPLE: Get the data range based on our selected columns range.
var dataRange2 = sheet2.getRange(1,1, lastRow2, lastColumn2);
var dataValues2 = dataRange2.getValues();
var dataMatch=[];
//***** */
// Loop through array and if condition met, add relevant
// background color.
var p=34 ; //Column No. for Name column AI:AI (Report No)
var filename = encodeURI(nameFile);
var files = DriveApp.getFilesByName(nameFile);
while (files.hasNext()) {
var file = files.next();
if (file) {
var fileUrl = file.getUrl();
};
};
////////////////HELP THIS PART////////////////////////////////
for ( j = 0 ; j < lastRow2 ; j++){
var zz=j;
var yy=dataValues2[j][34];
if(dataValues2[j][34] == subject){
var doclink = Logger.log(fileUrl);
var range = sheet2.getRange(j+1, 128);
range.setValue(doclink);
};
};
}
If cell B3 value in First Source is find in Google Drive, paste the URL in Column DX where the AI is same with First Source.
I believe your goal is as follows.
You want to search the file of filename subject retrieved from the cell "B3" of "Source Email-Boom" sheet from your Google Drive, and when the value of subject is found from the column "AI" of "BL-Inspection Report" sheet, you want to put the URL of the file to the column "AJ".
For my question of For example, you want to put the URL of the just created file?, from Yes of your replying, I understood that you wanted to put the URL of the just created file in this script.
In this case, how about the following modification? I thought that in this case, the file URL of the just created file can be directly retrieved from DriveApp.getFolderById(folderID).createFile(response).setName(nameFile). So, how about the following modification?
From:
DriveApp.getFolderById(folderID).createFile(response).setName(nameFile);
To:
var fileUrl = DriveApp.getFolderById(folderID).createFile(response).setName(nameFile).getUrl();
And also, please modify as follows.
From:
var filename = encodeURI(nameFile);
var files = DriveApp.getFilesByName(nameFile);
while (files.hasNext()) {
var file = files.next();
if (file) {
var fileUrl = file.getUrl();
};
};
////////////////HELP THIS PART////////////////////////////////
for (j = 0; j < lastRow2; j++) {
if (dataValues2[j][34] == subject) {
var doclink = Logger.log(fileUrl);
var range = sheet2.getRange(j + 1, 128);
range.setValue(doclink);
};
};
To:
var range = sheet2.getRange("AI2:AI" + sheet2.getLastRow()).createTextFinder(subject).findNext();
if (range) {
range.offset(0, 1).setValue(fileUrl);
}
In this modification, the cell is searched using TextFinder.
Reference:
createTextFinder(findText)

Why my JSONP Code doesn't save results into PDF file after converting from template in Google App Script

I'm trying to save my results into PDF file after google sheet calculation. Here's my google sheet file: Google sheet(img)
After someone submit my form J Column sum all answers and gets 39. Then I want to convert my template file template(img) to PDF, so I wrote this code:
function afterFormSubmit(e) {
Logger.log(JSON.stringify(e));
var formValues = e.namedValues;
Logger.log(formValues);
const info = e.namedValues;
createPDF(info);
}
function createPDF(info){
const pdfFolder = DriveApp.getFolderById("1YPyX8gv3W-JXR0uovMdr");
const tempFolder = DriveApp.getFolderById("1m9Xcc9hmhJjc83cSgCW26u");
const templateDoc = DriveApp.getFileById("1r4m3-FtzMzlO_SmGMPuGraJhEIBsQA");
const newTempFile = templateDoc.makeCopy(tempFolder);
const openDoc = DocumentApp.openById(newTempFile.getId())
const body = openDoc.getBody();
body.replaceText("{T}", info['Timestamp'][0]);
body.replaceText("{A2}" ,info['answer2'][0]);
body.replaceText("{A3}" ,info['answer3'][0]);
body.replaceText("{A4}" ,info['answer4'][0]);
body.replaceText("{A5}" ,info['answer5'][0]);
body.replaceText("{A6}" ,info['answer6'][0]);
body.replaceText("{A7}" ,info['answer7'][0]);
body.replaceText("{E}", info['Email'][0]);
body.replaceText("{R}" ,info['Results'][0]); //Please fix it: PDF Not showing/Saving Results
openDoc.saveAndClose();
const blobPDF = newTempFile.getAs(MimeType.PDF);
const pdfFile = pdfFolder.createFile(blobPDF).setName("Quiz finish time:"+" "+ info['Timestamp'][0]);
tempFolder.removeFile(newTempFile);
}
But when this code converts my template file to PDF he don't show/save my result {R} PDF(img), but all other body.replaceText(); works well. Maybe someone know how to deal with that (how to save/show Result into PDF file)?
I get response that my google app script code is only for google form responses... So my first code didn't works to get Results... But this one is what I'm looked for:
function afterFormSubmit() {
var ss= SpreadsheetApp.openById("YOUR_SHEET_ID").getSheetByName("SHEET_NAME");
ss.getLastRow();
var info=ss.getRange("A"+ss.getLastRow()+":J"+ss.getLastRow()).getValues()[0];
Logger.log(info);
createPDF(info);
}
function createPDF(info){
const pdfFolder = DriveApp.getFolderById("FOLDER_ID");
const tempFolder = DriveApp.getFolderById("FOLDER_ID");
const templateDoc = DriveApp.getFileById("DOC_ID");
const newTempFile = templateDoc.makeCopy(tempFolder);
const openDoc = DocumentApp.openById(newTempFile.getId())
const body = openDoc.getBody();
var dt=Utilities.formatDate(info[0],"GMT"-5.00,"MM/dd/YYYY HH:mm:ss");
body.replaceText("{T}", dt);
body.replaceText("{A3}" ,info[2]);
body.replaceText("{A4}" ,info[3]);
body.replaceText("{A5}" ,info[4]);
body.replaceText("{A6}" ,info[5]);
body.replaceText("{A7}" ,info[6]);
body.replaceText("{E}", info[7]);
body.replaceText("{A2}" ,info[8]);
body.replaceText("{R}" ,info[9]);
openDoc.saveAndClose();
const blobPDF = newTempFile.getAs(MimeType.PDF);
const pdfFile = pdfFolder.createFile(blobPDF).setName("Quiz finish time:"+" "+ dt);
tempFolder.removeFile(newTempFile);
}
````

pdf is created from google slide, but with the markers populated (via GAS)

The code below basically maps columns from a spreadsheet to a couple of markers I got on a google slide.
It generates copies of the google slide template, updates them with the row's data and I actually need it to be in pdf form to be emailed later.
The pdf files are created in the destination folder, with the right file names, but the markers within them are "empty". Later on, I will have to delete these google slide files, but the challenge here now is to have the pdf files correctly created.
Appreciate your time.
function mailMerge(templateID,ssID, sheetName, mapped, fileNameData, emailCol, rowLen = "auto"){
//Properties Services is Google Script Storage.
//This clears out the storage.
PropertiesService.getScriptProperties().deleteAllProperties();
const ss = SpreadsheetApp.getActiveSpreadsheet();
//const sheet = SpreadsheetApp.openById(ssID);
const sheet = ss.getSheetByName("Lista de Participantes");
//Get number of rows to process
rowLen = (rowLen = "auto") ? getRowLen() : rowLen;
const range = sheet.getRange(7,1,rowLen,sheet.getDataRange().getNumColumns());
const matrix = range.getValues();
const fileNameRows = getFileNameRows()
for(let i = 1; i < rowLen; i++){
if (matrix[i][1] == true && matrix[i][27] != "Sim") {
let row = matrix[i];
//Get the title for the file.
let fileName = buildFileName(row)
//Creates a copy of the template file and names it with the current row's details.
let newDoc = DriveApp.getFileById(templateID).makeCopy(fileName);
//Replaces all the text place markers ({{text}}) with current row information.
updateFileData(row, newDoc.getId());
//Save new File ID and email to Properties service.
PropertiesService.getScriptProperties()
.setProperty(newDoc.getId(),row[emailCol]);
// 5. Export the temporal Google Slides as a PDF file.
newDoc = DriveApp.getFileById(newDoc.getId());
DriveApp.getFolderById("folder ID").createFile(newDoc.getBlob());
}
};
Besides the code above, I go this script file within the same container/Spreadsheet, where I map the columns whose data I want to generate a google Slide for. each column of data I refer to as marker.
/*###################################################################
* Maps the relationship between the Google Sheet header and its location
* for each column along with it's corresponding Google Slide Doc template name.
*
* To update change the sheet, col and doc:
* ***
* {
* sheet: << Your sheet header
* col: << The column on the google sheet with the above header
* doc: << the corresonding name in double braces {{name}} in your Slide template
* }
* ***
*###################################################################
*/
const mappedDocToSheet = [
{
sheet:"Nome",
col:2,
doc:"primeiroNome"
},
{
sheet:"Sobrenome",
col:3,
doc:"sobrenome"
},
{
sheet:"COD. CERTIFICADO",
col:9,
doc:"codigo"
},
{
sheet:"Curso",
col:10,
doc:"curso"
},
];
I believe your goal and situation as follows.
You add the values of Google Slides and create it to PDF data
newDoc is the Google Slides
In order to achieve your goal, please use saveAndClose. For your script, please modify as follows.
Modified script:
Please add the following script to your function of mailMerge as follows.
// 5. Export the temporal Google Slides as a PDF file.
SlidesApp.openById(newDoc.getId()).saveAndClose(); // <--- Added
Reference:
saveAndClose()

Printing to pdf from Google Apps Script HtmlOutput

For years, I have been using Google Cloud Print to print labels in our laboratories on campus (to standardize) using a Google Apps Script custom HtmlService form.
Now that GCP is becoming depreciated, I am in on a search for a solution. I have found a few options but am struggling to get the file to convert to a pdf as would be needed with these other vendors.
Currently, when you submit a text/html blob to the GCP servers in GAS, the backend converts the blob to application/pdf (as evidenced by looking at the job details in the GCP panel on Chrome under 'content type').
That said, because these other cloud print services require pdf printing, I have tried for some time now to have GAS change the file to pdf format before sending to GCP and I always get a strange result. Below, I'll show some of the strategies that I have used and include pictures of one of our simple labels generated with the different functions.
The following is the base code for the ticket and payload that has worked for years with GCP
//BUILD PRINT JOB FOR NARROW TAPES
var ticket = {
version: "1.0",
print: {
color: {
type: "STANDARD_COLOR",
vendor_id: "Color"
},
duplex: {
type: "NO_DUPLEX"
},
copies: {copies: parseFloat(quantity)},
media_size: {
width_microns: 27940,
height_microns:40960
},
page_orientation: {
type: "LANDSCAPE"
},
margins: {
top_microns:0,
bottom_microns:0,
left_microns:0,
right_microns:0
},
page_range: {
interval:
[{start:1,
end:1}]
},
}
};
var payload = {
"printerid" : QL710,
"title" : "Blank Template Label",
"content" : HtmlService.createHtmlOutput(html).getBlob(),
"contentType": 'text/html',
"ticket" : JSON.stringify(ticket)
};
This generates the expected following printout:
When trying to convert to pdf using the following code:
The following is the code used to transform to pdf:
var blob = HtmlService.createTemplate(html).evaluate().getContent();
var newBlob = Utilities.newBlob(html, "text/html", "text.html");
var pdf = newBlob.getAs("application/pdf").setName('tempfile');
var file = DriveApp.getFolderById("FOLDER ID").createFile(pdf);
var payload = {
"printerid" : QL710,
"title" : "Blank Template Label",
"content" : pdf,//HtmlService.createHtmlOutput(html).getBlob(),
"contentType": 'text/html',
"ticket" : JSON.stringify(ticket)
};
an unexpected result occurs:
This comes out the same way for direct coding in the 'content' field with and without .getBlob():
"content" : HtmlService.createHtmlOutput(html).getAs('application/pdf'),
note the createFile line in the code above used to test the pdf. This file is created as expected, of course with the wrong dimensions for label printing (not sure how to convert to pdf with the appropriate margins and page size?): see below
I have now tried to adopt Yuri's ideas; however, the conversion from html to document loses formatting.
var blob = HtmlService.createHtmlOutput(html).getBlob();
var docID = Drive.Files.insert({title: 'temp-label'}, blob, {convert: true}).id
var file = DocumentApp.openById(docID);
file.getBody().setMarginBottom(0).setMarginLeft(0).setMarginRight(0).setMarginTop(0).setPageHeight(79.2).setPageWidth(172.8);
This produces a document looks like this (picture also showing expected output in my hand).
Does anyone have insights into:
How to format the converted pdf to contain appropriate height, width
and margins.
How to convert to pdf in a way that would print correctly.
Here is a minimal code to get a better sense of context https://script.google.com/d/1yP3Jyr_r_FIlt6_aGj_zIf7HnVGEOPBKI0MpjEGHRFAWztGzcWKCJrD0/edit?usp=sharing
I've made the template (80 x 40 mm -- sorry, I don't know your size):
https://docs.google.com/document/d/1vA93FxGXcWLIEZBuQwec0n23cWGddyLoey-h0WR9weY/edit?usp=sharing
And there is the script:
function myFunction() {
// input data
var matName = '<b>testing this to <u>see</u></b> if it <i>actually</i> works <i>e.coli</i>'
var disposeWeek = 'end of semester'
var prepper = 'John Ruppert';
var className = 'Cell and <b>Molecular</b> Biology <u>Fall 2020</u> a few exercises a few exercises a few exercises a few exercises';
var hazards = 'Lots of hazards';
// make a temporary Doc from the template
var copyFile = DriveApp.getFileById('1vA93FxGXcWLIEZBuQwec0n23cWGddyLoey-h0WR9weY').makeCopy();
var doc = DocumentApp.openById(copyFile.getId());
var body = doc.getBody();
// replace placeholders with data
body.replaceText('{matName}', matName);
body.replaceText('{disposeWeek}', disposeWeek);
body.replaceText('{prepper}', prepper);
body.replaceText('{className}', className);
body.replaceText('{hazards}', hazards);
// make Italics, Bold and Underline
handle_tags(['<i>', '</i>'], body);
handle_tags(['<b>', '</b>'], body);
handle_tags(['<u>', '</u>'], body);
// save the temporary Doc
doc.saveAndClose();
// make a PDF
var docblob = doc.getBlob().setName('Label.pdf');
DriveApp.createFile(docblob);
// delete the temporary Doc
copyFile.setTrashed(true);
}
// this function applies formatting to text inside the tags
function handle_tags(tags, body) {
var start_tag = tags[0].toLowerCase();
var end_tag = tags[1].toLowerCase();
var found = body.findText(start_tag);
while (found) {
var elem = found.getElement();
var start = found.getEndOffsetInclusive();
var end = body.findText(end_tag, found).getStartOffset()-1;
switch (start_tag) {
case '<b>': elem.setBold(start, end, true); break;
case '<i>': elem.setItalic(start, end, true); break;
case '<u>': elem.setUnderline(start, end, true); break;
}
found = body.findText(start_tag, found);
}
body.replaceText(start_tag, ''); // remove tags
body.replaceText(end_tag, '');
}
The script just changes the {placeholders} with the data and saves the result as a PDF file (Label.pdf). The PDF looks like this:
There is one thing, I'm not sure if it's possible -- to change a size of the texts dynamically to fit them into the cells, like it's done in your 'autosize.html'. Roughly, you can take a length of the text in the cell and, in case it is bigger than some number, to make the font size a bit smaller. Probably you can use the jquery texfill function from the 'autosize.html' to get an optimal size and apply the size in the document.
I'm not sure if I got you right. Do you need make PDF and save it on Google Drive? You can do in Google Docs.
As example:
Make a new document with your table and text. Something like this
Add this script into your doc:
function myFunction() {
var copyFile = DriveApp.getFileById(ID).makeCopy();
var newFile = DriveApp.createFile(copyFile.getAs('application/pdf'));
newFile.setName('label');
copyFile.setTrashed(true);
}
Every time you run this script it makes the file 'label.pdf' on your Google Drive.
The size of this pdf will be the same as the page size of your Doc. You can make any size of page with add-on: Page Sizer https://webapps.stackexchange.com/questions/129617/how-to-change-the-size-of-paper-in-google-docs-to-custom-size
If you need to change the text in your label before generate pdf or/and you need change the name of generated file, you can do it via script as well.
Here is a variant of the script that changes a font size in one of the cells if the label doesn't fit into one page.
function main() {
// input texts
var text = {};
text.matName = '<b>testing this to <u>see</u></b> if it <i>actually</i> works <i>e.coli</i>';
text.disposeWeek = 'end of semester';
text.prepper = 'John Ruppert';
text.className = 'Cell and <b>Molecular</b> Biology <u>Fall 2020</u> a few exercises a few exercises a few exercises a few exercises';
text.hazards = 'Lots of hazards';
// initial max font size for the 'matName'
var size = 10;
var doc_blob = set_text(text, size);
// if we got more than 1 page, reduce the font size and repeat
while ((size > 4) && (getNumPages(doc_blob) > 1)) {
size = size-0.5;
doc_blob = set_text(text, size);
}
// save pdf
DriveApp.createFile(doc_blob);
}
// this function takes texts and a size and put the texts into fields
function set_text(text, size) {
// make a copy
var copyFile = DriveApp.getFileById('1vA93FxGXcWLIEZBuQwec0n23cWGddyLoey-h0WR9weY').makeCopy();
var doc = DocumentApp.openById(copyFile.getId());
var body = doc.getBody();
// replace placeholders with data
body.replaceText('{matName}', text.matName);
body.replaceText('{disposeWeek}', text.disposeWeek);
body.replaceText('{prepper}', text.prepper);
body.replaceText('{className}', text.className);
body.replaceText('{hazards}', text.hazards);
// set font size for 'matName'
body.findText(text.matName).getElement().asText().setFontSize(size);
// make Italics, Bold and Underline
handle_tags(['<i>', '</i>'], body);
handle_tags(['<b>', '</b>'], body);
handle_tags(['<u>', '</u>'], body);
// save the doc
doc.saveAndClose();
// delete the copy
copyFile.setTrashed(true);
// return blob
return docblob = doc.getBlob().setName('Label.pdf');
}
// this function formats the text beween html tags
function handle_tags(tags, body) {
var start_tag = tags[0].toLowerCase();
var end_tag = tags[1].toLowerCase();
var found = body.findText(start_tag);
while (found) {
var elem = found.getElement();
var start = found.getEndOffsetInclusive();
var end = body.findText(end_tag, found).getStartOffset()-1;
switch (start_tag) {
case '<b>': elem.setBold(start, end, true); break;
case '<i>': elem.setItalic(start, end, true); break;
case '<u>': elem.setUnderline(start, end, true); break;
}
found = body.findText(start_tag, found);
}
body.replaceText(start_tag, '');
body.replaceText(end_tag, '');
}
// this funcion takes saved doc and returns the number of its pages
function getNumPages(doc) {
var blob = doc.getAs('application/pdf');
var data = blob.getDataAsString();
var pages = parseInt(data.match(/ \/N (\d+) /)[1], 10);
Logger.log("pages = " + pages);
return pages;
}
It looks rather awful and hopeless. It turned out that Google Docs has no page number counter. You need to convert your document into a PDF and to count pages of the PDF file. Gross!
Next problem, even if you managed somehow to count the pages, you have no clue which of the cells was overflowed. This script takes just one cell, changes its font size, counts pages, changes the font size again, etc. But it doesn't granted a success, because there can be another cell with long text inside. You can reduce font size of all the texts, but it doesn't look like a great idea as well.

Get pdf-attachments from Gmail as text

I searched around the web & Stack Overflow but didn't find a solution. What I try to do is the following: I get certain attachments via mail that I would like to have as (Plain) text for further processing. My script looks like this:
function MyFunction() {
var threads = GmailApp.search ('label:templabel');
var messages = GmailApp.getMessagesForThreads(threads);
for (i = 0; i < messages.length; ++i)
{
j = messages[i].length;
var messageBody = messages[i][0].getBody();
var messageSubject = messages [i][0].getSubject();
var attach = messages [i][0].getAttachments();
var attachcontent = attach.getContentAsString();
GmailApp.sendEmail("mail", messageSubject, "", {htmlBody: attachcontent});
}
}
Unfortunately this doesn't work. Does anybody here have an idea how I can do this? Is it even possible?
Thank you very much in advance.
Best, Phil
Edit: Updated for DriveApp, as DocsList deprecated.
I suggest breaking this down into two problems. The first is how to get a pdf attachment from an email, the second is how to convert that pdf to text.
As you've found out, getContentAsString() does not magically change a pdf attachment to plain text or html. We need to do something a little more complicated.
First, we'll get the attachment as a Blob, a utility class used by several Services to exchange data.
var blob = attachments[0].getAs(MimeType.PDF);
So with the second problem separated out, and maintaining the assumption that we're interested in only the first attachment of the first message of each thread labeled templabel, here is how myFunction() looks:
/**
* Get messages labeled 'templabel', and send myself the text contents of
* pdf attachments in new emails.
*/
function myFunction() {
var threads = GmailApp.search('label:templabel');
var threadsMessages = GmailApp.getMessagesForThreads(threads);
for (var thread = 0; thread < threadsMessages.length; ++thread) {
var message = threadsMessages[thread][0];
var messageBody = message.getBody();
var messageSubject = message.getSubject();
var attachments = message.getAttachments();
var blob = attachments[0].getAs(MimeType.PDF);
var filetext = pdfToText( blob, {keepTextfile: false} );
GmailApp.sendEmail(Session.getActiveUser().getEmail(), messageSubject, filetext);
}
}
We're relying on a helper function, pdfToText(), to convert our pdf blob into text, which we'll then send to ourselves as a plain text email. This helper function has a variety of options; by setting keepTextfile: false, we've elected to just have it return the text content of the PDF file to us, and leave no residual files in our Drive.
pdfToText()
This utility is available as a gist. Several examples are provided there.
A previous answer indicated that it was possible to use the Drive API's insert method to perform OCR, but it didn't provide code details. With the introduction of Advanced Google Services, the Drive API is easily accessible from Google Apps Script. You do need to switch on and enable the Drive API from the editor, under Resources > Advanced Google Services.
pdfToText() uses the Drive service to generate a Google Doc from the content of the PDF file. Unfortunately, this contains the "pictures" of each page in the document - not much we can do about that. It then uses the regular DocumentService to extract the document body as plain text.
/**
* See gist: https://gist.github.com/mogsdad/e6795e438615d252584f
*
* Convert pdf file (blob) to a text file on Drive, using built-in OCR.
* By default, the text file will be placed in the root folder, with the same
* name as source pdf (but extension 'txt'). Options:
* keepPdf (boolean, default false) Keep a copy of the original PDF file.
* keepGdoc (boolean, default false) Keep a copy of the OCR Google Doc file.
* keepTextfile (boolean, default true) Keep a copy of the text file.
* path (string, default blank) Folder path to store file(s) in.
* ocrLanguage (ISO 639-1 code) Default 'en'.
* textResult (boolean, default false) If true and keepTextfile true, return
* string of text content. If keepTextfile
* is false, text content is returned without
* regard to this option. Otherwise, return
* id of textfile.
*
* #param {blob} pdfFile Blob containing pdf file
* #param {object} options (Optional) Object specifying handling details
*
* #returns {string} id of text file (default) or text content
*/
function pdfToText ( pdfFile, options ) {
// Ensure Advanced Drive Service is enabled
try {
Drive.Files.list();
}
catch (e) {
throw new Error( "To use pdfToText(), first enable 'Drive API' in Resources > Advanced Google Services." );
}
// Set default options
options = options || {};
options.keepTextfile = options.hasOwnProperty("keepTextfile") ? options.keepTextfile : true;
// Prepare resource object for file creation
var parents = [];
if (options.path) {
parents.push( getDriveFolderFromPath (options.path) );
}
var pdfName = pdfFile.getName();
var resource = {
title: pdfName,
mimeType: pdfFile.getContentType(),
parents: parents
};
// Save PDF to Drive, if requested
if (options.keepPdf) {
var file = Drive.Files.insert(resource, pdfFile);
}
// Save PDF as GDOC
resource.title = pdfName.replace(/pdf$/, 'gdoc');
var insertOpts = {
ocr: true,
ocrLanguage: options.ocrLanguage || 'en'
}
var gdocFile = Drive.Files.insert(resource, pdfFile, insertOpts);
// Get text from GDOC
var gdocDoc = DocumentApp.openById(gdocFile.id);
var text = gdocDoc.getBody().getText();
// We're done using the Gdoc. Unless requested to keepGdoc, delete it.
if (!options.keepGdoc) {
Drive.Files.remove(gdocFile.id);
}
// Save text file, if requested
if (options.keepTextfile) {
resource.title = pdfName.replace(/pdf$/, 'txt');
resource.mimeType = MimeType.PLAIN_TEXT;
var textBlob = Utilities.newBlob(text, MimeType.PLAIN_TEXT, resource.title);
var textFile = Drive.Files.insert(resource, textBlob);
}
// Return result of conversion
if (!options.keepTextfile || options.textResult) {
return text;
}
else {
return textFile.id
}
}
The conversion to DriveApp is helped with this utility from Bruce McPherson:
// From: http://ramblings.mcpher.com/Home/excelquirks/gooscript/driveapppathfolder
function getDriveFolderFromPath (path) {
return (path || "/").split("/").reduce ( function(prev,current) {
if (prev && current) {
var fldrs = prev.getFoldersByName(current);
return fldrs.hasNext() ? fldrs.next() : null;
}
else {
return current ? null : prev;
}
},DriveApp.getRootFolder());
}