blob: 8dee28c02a5bd83553858fe7937885e5673be801 [file] [log] [blame]
# *************************************************************
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# *************************************************************
import getopt,sys
import uno
from unohelper import Base,systemPathToFileUrl, absolutize
from os import getcwd
from com.sun.star.beans import PropertyValue
from com.sun.star.beans.PropertyState import DIRECT_VALUE
from com.sun.star.uno import Exception as UnoException
from com.sun.star.io import IOException,XInputStream, XOutputStream
class OutputStream( Base, XOutputStream ):
def __init__( self ):
self.closed = 0
def closeOutput(self):
self.closed = 1
def writeBytes( self, seq ):
sys.stdout.write( seq.value )
def flush( self ):
pass
def main():
retVal = 0
doc = None
try:
opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"])
format = None
url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
filterName = "Text (Encoded)"
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
if o in ("-c", "--connection-string" ):
url = "uno:" + a + ";urp;StarOffice.ComponentContext"
if o == "--html":
filterName = "HTML (StarWriter)"
print(filterName)
if not len( args ):
usage()
sys.exit()
ctxLocal = uno.getComponentContext()
smgrLocal = ctxLocal.ServiceManager
resolver = smgrLocal.createInstanceWithContext(
"com.sun.star.bridge.UnoUrlResolver", ctxLocal )
ctx = resolver.resolve( url )
smgr = ctx.ServiceManager
desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx )
cwd = systemPathToFileUrl( getcwd() )
outProps = (
PropertyValue( "FilterName" , 0, filterName , 0 ),
PropertyValue( "OutputStream",0, OutputStream(),0))
inProps = PropertyValue( "Hidden" , 0 , True, 0 ),
for path in args:
try:
fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) )
doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps)
if not doc:
raise UnoException( "Couldn't open stream for unknown reason", None )
doc.storeToURL("private:stream",outProps)
except IOException as e:
sys.stderr.write( "Error during conversion: " + e.Message + "\n" )
retVal = 1
except UnoException as e:
sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" )
retVal = 1
if doc:
doc.dispose()
except UnoException as e:
sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" )
retVal = 1
except getopt.GetoptError as e:
sys.stderr.write( str(e) + "\n" )
usage()
retVal = 1
sys.exit(retVal)
def usage():
sys.stderr.write( "usage: ooextract.py --help |\n"+
" [-c <connection-string> | --connection-string=<connection-string>\n"+
" file1 file2 ...\n"+
"\n" +
"Extracts plain text from documents and prints it to stdout.\n" +
"Requires an OpenOffice.org instance to be running. The script and the\n"+
"running OpenOffice.org instance must be able to access the file with\n"+
"by the same system path.\n"
"\n"+
"-c <connection-string> | --connection-string=<connection-string>\n" +
" The connection-string part of a uno url to where the\n" +
" the script should connect to in order to do the conversion.\n" +
" The strings defaults to socket,host=localhost,port=2002\n"
"--html \n"
" Instead of the text filter, the writer html filter is used\n"
)
main()