Extract in-text citation inside word documents - vba

my purpose is to find author and the year (four digit) inside parenthesis in a word document through regex or wildcards and format the color (change from black to red) to select them through select similar text
my sample to change italic and only all text inside parenthesis:
Dim Rng As Range
Dim n As Long
Application.ScreenUpdating = False
n = Selection.End
With Selection.Find
.MatchWildcards = True
.ClearFormatting
.Wrap = wdFindStop
.text = "\(*\)"
Do While .Execute
Set Rng = Selection.Range
If Rng.Start > n Then Exit Do
Rng.MoveStart unit:=wdCharacter, count:=1
Rng.MoveEnd unit:=wdCharacter, count:=-1
Rng.Font.Italic = True
Loop
End With
Application.ScreenUpdating = True
End Sub

The following code may point you in the right direction
Option Explicit
Sub Test()
SetAuthorTextColour WdColorIndex.wdRed
End Sub
Sub SetAuthorTextColour(ByVal ipColour As WdColorIndex)
Dim myText As Word.Range
With ActiveDocument.StoryRanges(wdMainTextStory)
Do
With .Find
.MatchWildcards = True
.Text = "([(])(*)([0123456789]{4,4})(*)([)])"
.ClearFormatting
.Format = True
.Wrap = wdFindStop
.Execute
End With
If .Find.Found Then
Set myText = .Duplicate
myText.MoveStart unit:=wdCharacter, Count:=1
myText.MoveEnd unit:=wdCharacter, Count:=-1
' you may wish to change to using colorindex as
' I think color is deprecated.
myText.Font.ColorIndex = ipColour
End If
Loop While .Find.Found
End With
End Sub

Related

Word VBA copy text formatted text in a certain font to a file and other formatting in other file

From a comparison docx file I need to extract into two word files the text formatted as strikethrough in one docx file and the text formatted as double underline in another docx file to be able to perform the wordcount of newly inserted and deleted text separately.
To do this, I wrote this macro, that actually activates the correct files, but only copies and pastes the formatting resulting from the first search.
Sub WSC_extraction_for_wordcount()
'This macro extracts double underlined text to the file "target_ins"
'This macro extracts strikethrough text to the file "target_del"
Application.ScreenUpdating = False
Selection.HomeKey Unit:=wdStory
Selection.Find.ClearFormatting
'STRIKETHROUGH processing
Do
With Selection.Find.Font
.StrikeThrough = True 'Then
Selection.Find.Execute FindText:="", Forward:=True, Format:=True
Selection.Cut
Windows("target_del.docx").Activate
Selection.PasteAndFormat (wdPasteDefault)
Selection.TypeParagraph
Windows("source.docx").Activate
End With
'DOUBLE UNDERLINE processing
With Selection.Find.Font
.Underline = wdUnderlineDouble = True 'Then
Selection.Find.Execute FindText:="", Forward:=True, Wrap:=wdFindContinue, Format:=True
Selection.Cut
Windows("target_ins.docx").Activate
Selection.PasteAndFormat (wdPasteDefault)
Selection.TypeParagraph
Windows("source.docx").Activate
End With
Loop
End Sub
I would be grateful if someone could help me in transforming the options into something like: if the next sentence you encounter is formatted as strikethrough, copy it to file target_del, if the next sentence you encounter is formatted as double underlined, copy it to the file target_ins.
Thank you in advance!
The code below avoids the use of the Selection object. It also assumes that the documents the text is to be moved to are already open.
Sub WSC_extraction_for_wordcount()
'This macro extracts double underlined text to the file "target_ins"
'This macro extracts strikethrough text to the file "target_del"
Application.ScreenUpdating = False
Dim source As Document: Set source = ActiveDocument
Dim targetDel As Document: Set targetDel = Documents("target_del.docx")
Dim targetIns As Document: Set targetIns = Documents("target_ins.docx")
'STRIKETHROUGH processing
With source.Content
With .Find
.ClearFormatting
.Text = ""
.Replacement.ClearFormatting
.Text = ""
.Forward = True
.Format = True
.Wrap = wdFindStop
.Font.StrikeThrough = True
End With
Do While .Find.Execute
targetDel.Characters.Last.FormattedText = .FormattedText
targetDel.Characters.Last.InsertParagraphAfter
.Delete
Loop
End With
'DOUBLE UNDERLINE processing
With source.Content
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Format = True
.Font.Underline = wdUnderlineDouble
End With
Do While .Find.Execute
targetIns.Characters.Last.FormattedText = .FormattedText
targetIns.Characters.Last.InsertParagraphAfter
.Delete
Loop
End With
End Sub
Without the overhead of creating new documents:
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long, j As Long
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Font.Underline = wdUnderlineDouble
.Text = ""
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
End With
Do While .Find.Execute
i = i + .ComputeStatistics(wdStatisticWords)
If .Information(wdWithInTable) = True Then
If .End = .Cells(1).Range.End - 1 Then
.End = .Cells(1).Range.End
.Collapse wdCollapseEnd
If .Information(wdAtEndOfRowMarker) = True Then
.End = .End + 1
End If
End If
End If
If .End = ActiveDocument.Range.End Then Exit Do
.Collapse wdCollapseEnd
Loop
End With
With ActiveDocument.Range
With .Find
.ClearFormatting
.Font.StrikeThrough = True
.Forward = True
.Wrap = wdFindStop
End With
Do While .Find.Execute
j = j + .ComputeStatistics(wdStatisticWords)
If .Information(wdWithInTable) = True Then
If .End = .Cells(1).Range.End - 1 Then
.End = .Cells(1).Range.End
.Collapse wdCollapseEnd
If .Information(wdAtEndOfRowMarker) = True Then
.End = .End + 1
End If
End If
End If
If .End = ActiveDocument.Range.End Then Exit Do
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
MsgBox i & " words added." & vbCr & j & " words deleted."
End Sub

Highlighting specific words within a selected range

I am trying to select a range between two words, find a word within the found range and finally color that word.
In the image I want to select range between "Observation" and "Supporting Information" and then search for "Management" words and color them red.
With my code I am able to highlight the first occurrence of the word.
Sub RevisedFindIt4()
' Purpose: highlight the text between (but not including)
' the words "Observation:" and "Supporting Information:" if they both appear.
Dim rng1 As Range
Dim rng2 As Range
Dim rngFound As Range
On Error Resume Next
Application.DisplayAlerts = False
Application.ScreenUpdating = False
Set rng1 = ActiveDocument.Range
If rng1.Find.Execute(FindText:="Observation:") Then
Set rng2 = ActiveDocument.Range(rng1.End, ActiveDocument.Range.End)
If rng2.Find.Execute(FindText:="Supporting Information:") Then
Set rngFound = ActiveDocument.Range(rng1.End, rng2.Start)
If rngFound.Find.Execute(FindText:="Management") Then
rngFound.Select
Selection.Range.HighlightColorIndex = wdRed
End If
End If
End If
Selection.HomeKey wdStory
Application.ScreenUpdating = True
Application.DisplayAlerts = True
End Sub
A modified version of your code using Find to highlight the text.
Sub RevisedFindIt4()
' Purpose: highlight the text between (but not including)
' the words "Observation:" and "Supporting Information:" if they both appear.
Dim rng1 As Range
Dim rng2 As Range
Dim rngFound As Range
Dim highlightIndex As Long
Application.DisplayAlerts = False
Application.ScreenUpdating = False
'capture current highlight color so that it can be reset later
highlightIndex = Options.DefaultHighlightColorIndex
Options.DefaultHighlightColorIndex = wdRed
Set rng1 = ActiveDocument.Range
If rng1.Find.Execute(FindText:="Observation:") Then
Set rng2 = ActiveDocument.Range(rng1.End, ActiveDocument.Range.End)
If rng2.Find.Execute(FindText:="Supporting Information:") Then
Set rngFound = ActiveDocument.Range(rng1.End, rng2.Start)
With rngFound.Find
.Replacement.highlight = True
.Execute Replace:=wdReplaceAll, Forward:=True, FindText:="Management", ReplaceWith:="", Format:=True
End With
End If
End If
Options.DefaultHighlightColorIndex = highlightIndex
Application.ScreenUpdating = True
Application.DisplayAlerts = True
End Sub
The Find method in word can be a bit tricky to manage. What you want to achieve must be done with two searches inside a loop. The first search finds the next 'Observation:', the Second Finds the following 'Supporting Information:'. You then use the end of the first search and the start of the second search to generate the range that needs to be made 'wdRed'
The following code works well on my PC
Option Explicit
Sub RevisedFindIt4()
' Purpose: highlight the text between (but not including)
' the words "Observation:" and "Supporting Information:" if they both appear.
'Application.DisplayAlerts = False
'Application.ScreenUpdating = False
Dim myOuterRange As Word.Range
Set myOuterRange = ActiveDocument.StoryRanges(wdMainTextStory)
With myOuterRange
Do
With .Find
.ClearFormatting
.MatchWildcards = True
.Text = "(Observation)([: ]{1,})(^13)"
.Wrap = wdFindStop
If Not .Execute Then Exit Do
End With
Dim mystart As Long
mystart = .End
.Collapse direction:=wdCollapseEnd
.Move unit:=wdCharacter, Count:=1
myOuterRange.End = ActiveDocument.StoryRanges(wdMainTextStory).End
With .Find
.ClearFormatting
.MatchWildcards = True
.Text = "^13Supporting Information"
.Wrap = wdFindStop
If Not .Execute Then Exit Do
End With
Dim myEnd As Long
myEnd = .Start
ActiveDocument.Range(mystart, myEnd).Font.ColorIndex = wdRed
.Collapse direction:=wdCollapseEnd
.Move unit:=wdCharacter, Count:=1
myOuterRange.End = ActiveDocument.StoryRanges(wdMainTextStory).End
Loop
End With
'Application.ScreenUpdating = True
'Application.DisplayAlerts = True
End Sub
UPDATE
This is the code I first wrote. I blame a biscuit (cookie) shortage for misreading the post the second time and revising my code to the first provided.
Sub RevisedFindIt4()
' Purpose: highlight the text between (but not including)
' the words "Observation:" and "Supporting Information:" if they both appear.
'Application.DisplayAlerts = False
'Application.ScreenUpdating = False
Dim myOuterRange As Word.Range
Set myOuterRange = ActiveDocument.StoryRanges(wdMainTextStory)
With myOuterRange
Do
With .Find
.ClearFormatting
.MatchWildcards = True
.Text = "(Observation:)(*)(Supporting Information:)"
.Wrap = wdFindStop
If Not .Execute Then Exit Do
End With
Dim myInnerRange As Word.Range
Set myInnerRange = .Duplicate
With myInnerRange
With .Find
.Text = "Management"
.Replacement.Font.ColorIndex = wdRed
.Wrap = wdFindStop
.Execute Replace:=wdReplaceAll
End With
End With
.Collapse Direction:=wdCollapseEnd
.Move unit:=wdCharacter, Count:=1
myOuterRange.End = ActiveDocument.StoryRanges(wdMainTextStory).End
Loop
End With
'Application.ScreenUpdating = True
'Application.DisplayAlerts = True
End Sub

Removing hyperlinks in captions from text

I'm trying to remove all captions within a specific range (Page 4 until the end of the document).
I would like to remove only the hyperlink and not the text itself.
For example:
Some text here -> Some text here
after removing the hyperlink caption.
I tried:
Sub removeCaptions()
Dim rgePages As Range
Selection.GoTo What:=wdGoToPage, Which:=wdGoToAbsolute, Count:=4
Set rgePages = Selection.Range
lastPage = ActiveDocument.ActiveWindow.Panes(1).Pages.Count
Selection.GoTo What:=wdGoToPage, Which:=wdGoToAbsolute, Count:=lastPage
rgePages.End = Selection.Bookmarks("\Page").Range.End
With rgePages.Select
If Range.Style = "Caption" Then
Range.Delete
End If
End With
End Sub
I only get the range without removing the captions.
For example:
Sub Demo()
Application.ScreenUpdating = False
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = ""
.Replacement.Text = ""
.Format = True
.Forward = True
.Style = wdStyleCaption
.Wrap = wdFindStop
End With
Do While .Find.Execute
If .Information(wdActiveEndAdjustedPageNumber) > 3 Then .Fields.Unlink
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
End Sub

VBA Word: Applying Character Style to first two words of specific paragraph style

I would like to apply a character style ("Bold Italics") to the first two words of all paragraphs set in the "3 Species" style in MS Word (and later, I'd also like another macro to do same for all the words after the second tab in a different style). I know how to do all this in InDesign, but I'd like it set up in the original Word documents before they get flowed into InDesign.
I'm new at this and can't figure out how to apply it to only the first two words. I did get it to apply the character style to the whole paragraph or to a specific word in that style. It seems like it ought to be simple, but I've only learned to use find and replace type functions so far, and I imagine I will have to use the Range functions, which I don't understand yet. Thanks for any help!
Sub Add_Character_Style()
'
' Add_Character_Style Macro
'
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Style = "3 Species"
.Text = ""
.Replacement.Text = ""
.Replacement.Style = "Bold Italics"
.Forward = True
.Wrap = wdFindContinue
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
End Sub
For example:
Sub Demo()
Application.ScreenUpdating = False
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<*>[,. ^s^t]#<*>"
.Style = "3 Species"
.Replacement.Text = ""
.Format = True
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
End With
Do While .Find.Execute
.Style = "Bold Italics"
.Start = .Paragraphs(1).Range.End
Loop
End With
Application.ScreenUpdating = True
End Sub
Try this:
Sub Add_Character_Style()
Dim p As Paragraph
Dim doc As Document: Set doc = ActiveDocument
For Each p In doc.Paragraphs
p.Range.Select
Selection.Collapse Direction:=wdCollapseStart
Selection.MoveRight Unit:=wdWord, Count:=2, Extend:=wdExtend
With Selection
If .Style = "3 Species" Then .Style = "Bold Italic"
End With
Next p
End Sub
EDIT:
To avoid use of the Selection object (Timothy Rylatt)
Sub Add_Character_Style()
Dim p As Paragraph
Dim doc As Document: Set doc = ActiveDocument
Dim rng As Range
For Each p In doc.Paragraphs
If p.Range.Style = "3 Species" Then
Set rng = p.Range
With rng
.Collapse Direction:=wdCollapseStart
.MoveEnd Unit:=wdWord, Count:=2
.Style = "Bold Italics"
End With
End If
Next p
End Sub
FURTHER EDIT per macropod:
Sub Add_Character_Style()
Application.ScreenUpdating = False
Dim Para As Paragraph, Rng As Range
For Each Para In ActiveDocument.Paragraphs
With Para
If .Style = "3 Species" Then
If .Range.ComputeStatistics(wdStatisticWords) > 1 Then
Set Rng = .Range.Words.First
With Rng
Do While .ComputeStatistics(wdStatisticWords) < 2
.MoveEnd wdWord, 1
Loop
.Style = "Bold Italic"
End With
End If
End If
End With
Next
Application.ScreenUpdating = True
End Sub

Word VBA: Moving textstring from the end of a paragraph to the beginning of the paragraph

I'm new to VBA. I have several long documents where a citation or a document number appears at the end of a paragraph. Luckily, these citations and document are enclosed in parentheses, which should make it easy to isolate. I need to move the content of those parentheses (including the parentheses themselves) to the front of each paragraph and then add two spaces after the closing parenthesis.
For example:
This is my text in Paragraph 1. (http://nytimes.com)
This is my text in Paragraph 2. (1.b.3B)
Should look like:
(http://nytimes.com) This is my text in Paragraph 1.
(1.b.3B) This is my text in Paragraph 2.
I found the answer in the following link useful, but can't seem to apply it to my case: Get paragraph no where txt is found, and move text to end of paragraph using Word 2010 vba
Many thanks in advance.
Here's what I have up to now, but the script just doesn't seem to run:
Sub Test1()
Dim currDoc As Document
Set currDoc = ActiveDocument
Dim docRng As Range, currRng As Range, strRng As Range
Set docRng = ActiveDocument.Content
Dim currPara As Paragraph
Dim strText As String
Selection.HomeKey Unit:=wdStory ' Start from the beginning of the doc.
For Each currPara In docRng.Paragraphs ' Loop through the paragraphs in the active document.
Set currRng = currDoc.Range(currPara.Range.Start, currPara.Range.End) ' Selects the current paragraph, so that the search is conducted paragraph by paragraph.
With Selection.Find
.ClearFormatting
.Text = "\(*\)"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
End With
If currRng.Find.Execute Then
With Selection
.Select
.Cut
.StartOf Unit:=wdParagraph
.Paste
.InsertAfter " "
End With
End If
Next currPara
End Sub
You were very close to correct solution to move simple text. But, what I realised, it was a problem to move hyperlinks as syntax "\(*\)" didn't recognise hyperlinks. Therefore I put some additional small modifications. That works for me in Word 2010:
Sub Test1_Tested_incl_Hyper()
Dim currDoc As Document
Set currDoc = ActiveDocument
Dim docRng As Range, currRng As Range, strRng As Range
Set docRng = ActiveDocument.Content
Dim currPara As Paragraph
Dim strText As String
Selection.HomeKey Unit:=wdStory ' Start from the beginning of the doc.
For Each currPara In docRng.Paragraphs ' Loop through the paragraphs in the active document.
Set currRng = currDoc.Range(currPara.Range.Start, currPara.Range.End) ' Selects the current paragraph, so that the search is conducted paragraph by paragraph.
currRng.Select
With Selection.Find
.ClearFormatting
.Text = "\("
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
.Execute
End With
If Selection.Find.Found Then
With currDoc.Range(Selection.Range.Start, currPara.Range.End - 1)
.Select
.Cut
.StartOf Unit:=wdParagraph
.Paste
.InsertAfter " "
End With
End If
Next currPara
End Sub
EDIT- code for footers
Sub Test1_for_Footers()
Dim currDoc As Document
Set currDoc = ActiveDocument
Dim docRng As Range, currRng As Range, strRng As Range
Set docRng = ActiveDocument.StoryRanges(wdPrimaryFooterStory)
Dim currPara As Paragraph
Dim strText As String
For Each currPara In docRng.Paragraphs
currPara.Range.Select
With Selection.Find
.ClearFormatting
.Text = "\("
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = True
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
.Execute
End With
If Selection.Find.Found Then
Selection.Extend ")"
With Selection
.Select
.Cut
.StartOf Unit:=wdParagraph
.Paste
.InsertAfter " "
End With
End If
Next currPara
End Sub