Monday 28 September 2015

Powershell script to convert .doc to .docx in Sharepoint library

Problem/Issue

Most of the word documents migrated from SharePoint 2007 to SharePoint 2010 have .doc extension. I want to roll out Office Web Apps to our SharePoint instance. The only trouble is that a lot of files were still in .doc format, and when we try to edit/open them in Office Web Apps it has to convert them to .docx first. If we do it manually , it makes a copy of the file and adds (converted) to the filename . we will have to remove the file with .doc extension and rename the newly copied file which was quite difficult, as we have thousands of documents in each library.

Solution:

I have written a powershell script/ tool using word automation service to acheive this functionality and it worked like a charm!

The below are the steps , followed with the necessary scripts to achieve this functionality.

Documentation Steps:

  • Copy the below files to WFE server
1.    ScriptToConvertDoctoDocx.bat
2.    ScriptToConvertDoctoDocx.ps1
3.    updateWordautomationservice.bat
4.    updateWordautomationservice.ps1

  • Execute the batch file “updateWordautomationservice.bat”. This will disable the Word automation service to execute in sandbox mode.
  •  Do an IIS Reset
  • Execute the batch file “updateWordautomationservice.bat”
During its execution it will prompt user to enter site URL, Library Name
For Ex: If we want to convert documents in the Library “MigratedImages” which is located in site https://test.com/departments/test, then we need to enter the values as below
 

It will result as below
 


If the library has no doc files to convert, it will display with an error saying service cannot start as it have no files to convert.

Note: This document does not lose its original metadata while conversion
This script automatically deletes the original doc file after conversion.

Code Files:
1.    ScriptToConvertDoctoDocx.bat


 cd /d %~dp0

powershell -noexit -file    ".\ScriptToConvertDoctoDocx.ps1"

pause


2.    ScriptToConvertDoctoDocx.ps1
 
 Add-PsSnapin Microsoft.SharePoint.PowerShell

# Input parameters for the script
$wasp = Get-SPServiceApplicationProxy | where { $_.TypeName -eq "Word Automation Services Proxy" }
$job = New-Object Microsoft.Office.Word.Server.Conversions.ConversionJob($wasp)
$websiteurl = read-host "Enter site URL"
$web = Get-SPWeb $websiteurl
$job.UserToken = $web.CurrentUser.UserToken
$job.Name = "Convert Documents from DOC to DOCX"
$job.Settings.OutputFormat = [Microsoft.Office.Word.Server.Conversions.SaveFormat]::Document
$job.Settings.OutputSaveBehavior = [Microsoft.Office.Word.Server.Conversions.SaveBehavior]::AlwaysOverwrite

$LibraryName = read-host "Enter Library Name"
Function CopyMetadata($MetaFile)
{

$filenametosearch = $MetaFile.name
$filetosearch = $filenametosearch+"x"

$folder = $web.GetFolder($LibraryName)
$library = $folder.DocumentLibrary


$spQuery = New-Object Microsoft.SharePoint.SPQuery
$spQuery.ViewAttributes = "Scope='Recursive'"
$listItems = $library.GetItems($spQuery)
$targetfileItem = $MetaFile.item
foreach($item in $listItems)
{


$fileName = $item.name
if($fileName -eq $filetosearch)
{

$item["Author"] = $targetfileItem["Author"]
$item["Editor"] = $targetfileItem["Editor"]
$item["Created"] = $targetfileItem["Created"]
$item["Modified"] = $targetfileItem["Modified"]
$item.Update()
}
}

}

Function ProcessFolder([string]$folder)
 {

$inputFolder = $web.GetFolder($folder)
foreach ($file in $inputFolder.Files)
{
$inputFile = $file.name
$fileextension = $inputFile.substring($inputFile.lastindexof('.'),($inputFile.length - $inputFile.lastindexof('.')))
if($fileextension -eq ".doc")
{
$outputFile = $inputFile.Replace(".doc",".docx")
$inputfileurl = $web.Url+"/"+$inputFolder.Url+"/"+$inputFile
$outputfileurl = $web.Url+"/"+$inputFolder.Url+"/"+$outputFile
#Add input and output folders and start conversion
$job.AddFile($inputfileurl, $outputfileurl)

}

}
foreach ($subfolder in $inputFolder.SubFolders)
{
ProcessFolder($subfolder)

}

}
ProcessFolder($LibraryName)

$job.Start()

Start-SPTimerJob "Word Automation Services"

[bool]$done = $false
write-host "Converting files - Please wait..."
while(!$done)
{
    Start-Sleep -s 5
    $status = New-Object Microsoft.Office.Word.Server.Conversions.ConversionJobStatus($wasp.Id, $job.JobId, $null)
  
    if ($status.Count -eq ($status.Succeeded + $status.Failed + $status.Canceled))
    {
        $done = $true
      
        #Delete original Word files successfully converted to PDF
        #Remove this code if you want to keep the documents in their original location
        $itemType = [Microsoft.Office.Word.Server.Conversions.ItemTypes]::Succeeded
        $items = $status.GetItems($itemType)
    write-host "Below files Successfully converted from doc to docx"
        foreach($item in $items) {
            $file = $web.GetFile($item.InputFile)
      
#$fileName = $file.Name
#$fileextension = $fileName.substring($fileName.lastindexof('.'),($fileName.length - $fileName.lastindexof('.')))
#if ($fileextension -eq ".doc")
#{  
    write-host -Fore Green $file.Name
CopyMetadata($file)
    $file.Delete()
#}

        }
    $itemType = [Microsoft.Office.Word.Server.Conversions.ItemTypes]::Failed
        $items = $status.GetItems($itemType)
    if($items.Count    -gt 0)
    {
        write-host "Below files failed to convert from doc to docx"
            foreach($item in $items) {
                    $file = $web.GetFile($item.InputFile)
            write-host -Fore Red $file
         
            }
    }
    }
}
write-host "Conversion operation complete - Status report:"
$status
$web.Dispose()


Remove-PsSnapin Microsoft.SharePoint.PowerShell


3.    updateWordautomationservice.bat

cd /d %~dp0

powershell -noexit -file    ".\updateWordautomationservice.ps1"

pause

 
4.    updateWordautomationservice.ps1

 Add-PsSnapin Microsoft.SharePoint.PowerShell

$sp = Get-SPServiceApplication | where {$_.TypeName.Equals("Word Automation Services")}
$sp.DisableSandbox = $true
$sp.Update()
 
Remove-PsSnapin Microsoft.SharePoint.PowerShell

2 comments:

  1. Can use this script in Sharepoint 2013 on-premise?

    ReplyDelete
    Replies
    1. You can use it on on-premise SharePoint 2013 as well. But, try on dev site first before executing on prod :)

      Delete