Monday 28 September 2015

Powershell script to convert .xls to xlsx in Sharepoint document library

Problem/Issue

Most of the excel files migrated from SharePoint 2007 to SharePoint 2010 have .xls extension. I want to roll out Office Web Apps to our SharePoint instance. The only trouble is that a lot of files were still in .xls format, and when we try to edit/open them in Office Web Apps it has to convert them to .xlsx first. If we do it manually , it makes a copy of the file and adds (converted) to the filename . We will have to remove the file with .xls extension and rename the newly copied file which was quite difficult, as we have thousands of documents in each library.

Solution:

I have written a powershell script/ tool to acheive this functionality and it worked like a charm!

The below are the steps , followed with the necessary scripts to achieve this functionality.


Execution Steps:
  • Copy the below files to WFE server
1.    Xlsfileconversionscript.bat
2.    Xlsfileconversionscript.ps1

  • Before executing the script we need to install excel application on the server(WFE) where we are executing the script
·       Execute the batch file “Xlsfileconversionscript.bat”.
·       During its execution it will prompt user to enter site URL, Library Name
For Ex: If we want to convert xls files in the Library “MigratedImages” which is located in site https://test.com/departments/test, then we need to enter the values as below


Code Files:
1.    Xlsfileconversionscript.bat


cd /d %~dp0

powershell -noexit -file    ".\
Xlsfileconversionscript.ps1"

pause


2.    Xlsfileconversionscript.ps1
 
 Add-PsSnapin Microsoft.SharePoint.PowerShell

# Input parameters for the script
$xlWorkbookDefault = 51
$xlExcel8 = 56
$xlFixedFormat = $xlWorkbookDefault


#$xlFixedFormat = [Microsoft.Office.Interop.Excel.XlFileFormat]::$xlExcel8


$excel = New-Object -ComObject excel.application
$excel.visible = $false


$websiteurl = read-host "Enter site URL"
$web = Get-SPWeb $websiteurl

$LibraryName = read-host "Enter Library Name"

Function CopyMetadata($MetaFile)
{

$searchfile = $MetaFile.file
$searchfileurl = $searchfile.Url
$searchfilename = $searchfileurl+"x"

$folder = $web.GetFolder($LibraryName)
$library = $folder.DocumentLibrary
$spQuery = New-Object Microsoft.SharePoint.SPQuery
$spQuery.ViewAttributes = "Scope='Recursive'"
$spQuery.Query = "<Where><Contains><FieldRef Name='FileLeafRef' /><Value Type='File'>.xls</Value></Contains></Where>";
$listItems = $library.GetItems($spQuery)
$targetfileItem = $MetaFile
foreach($item in $listItems)
{
$sourcefile = $item.file
$sourcefileurl = $sourcefile.Url

if($sourcefileurl -eq $searchfilename)
{

$item["Author"] = $targetfileItem["Author"]
$item["Editor"] = $targetfileItem["Editor"]
$item["Created"] = $targetfileItem["Created"]
$item["Modified"] = $targetfileItem["Modified"]
$item.Update()
$searchfile.Delete()
}
}

}
Function getallfiles($test)
{
write-host "copying Metadata information and deleting Original File"

$folder = $web.GetFolder($LibraryName)
$library = $folder.DocumentLibrary
$spQuery = New-Object Microsoft.SharePoint.SPQuery
$spQuery.ViewAttributes = "Scope='Recursive'"
$spQuery.Query = "<Where><Contains><FieldRef Name='FileLeafRef' /><Value Type='File'>.xls</Value></Contains></Where>";

$listItems = $library.GetItems($spQuery)

foreach($item in $listItems)
{
$filename = $item.file
$inputFile = $filename.name
$fileextension = $inputFile.substring($inputFile.lastindexof('.'),($inputFile.length - $inputFile.lastindexof('.')))
if($fileextension -eq ".xls")
{
CopyMetadata($item)

}
}

}

Function ProcessFolder([string]$folder)
 {
write-host "conversion job started"
$inputFolder = $web.GetFolder($folder)
$library = $inputFolder.DocumentLibrary
$spQuery = New-Object Microsoft.SharePoint.SPQuery
$spQuery.ViewAttributes = "Scope='Recursive'"
$spQuery.Query = "<Where><Contains><FieldRef Name='FileLeafRef' /><Value Type='File'>.xls</Value></Contains></Where>";

$listItems = $library.GetItems($spQuery)
foreach ($item in $listItems)
{
$fileobj = $item.file
$fileurl = $fileobj.Url
$inputFile = $fileobj.name
$fileextension = $inputFile.substring($inputFile.lastindexof('.'),($inputFile.length - $inputFile.lastindexof('.')))
if($fileextension -eq ".xls")
{

$outputFile = $inputFile.Replace(".xls",".xlsx")
$inputfileurl = $web.Url+"/"+$fileurl
$outputfileurl = $inputfileurl.Replace(".xls",".xlsx")
#Add input and output folders and start conversion
try
{
write-host -Fore Green "converting file :     $inputfileurl"
$workbook = $excel.workbooks.open($inputfileurl)
$workbook.saveas($outputfileurl, $xlFixedFormat)
$workbook.save()  
$workbook.close()
}
catch [system.exception]
{
   #Write-host "Exception occured while converting file : $($_.Exception.Message)"
write-host -Fore Red "File failed to convert : $inputfileurl"
   #do some error handling
}
catch
{
Write-error -Fore Red "Error occured while converting file $inputFile"
}


}

}

}
ProcessFolder($LibraryName)
$excel.displayalerts = $False
$excel.Quit()
$excel = $null
[gc]::collect()
[gc]::WaitForPendingFinalizers()
getallfiles("welcome")
write-host "Conversion operation complete "
$web.Dispose()
Remove-PsSnapin Microsoft.SharePoint.PowerShell


No comments:

Post a Comment