| # ************************************************************* |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| # ************************************************************* |
| |
| import getopt,sys |
| import uno |
| from unohelper import Base,systemPathToFileUrl, absolutize |
| from os import getcwd |
| |
| from com.sun.star.beans import PropertyValue |
| from com.sun.star.beans.PropertyState import DIRECT_VALUE |
| from com.sun.star.uno import Exception as UnoException |
| from com.sun.star.io import IOException,XInputStream, XOutputStream |
| |
| class OutputStream( Base, XOutputStream ): |
| def __init__( self ): |
| self.closed = 0 |
| |
| def closeOutput(self): |
| self.closed = 1 |
| |
| def writeBytes( self, seq ): |
| sys.stdout.write( seq.value ) |
| |
| def flush( self ): |
| pass |
| |
| |
| def main(): |
| retVal = 0 |
| doc = None |
| |
| try: |
| opts, args = getopt.getopt(sys.argv[1:], "hc:",["help", "connection-string=" , "html"]) |
| format = None |
| url = "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext" |
| filterName = "Text (Encoded)" |
| for o, a in opts: |
| if o in ("-h", "--help"): |
| usage() |
| sys.exit() |
| if o in ("-c", "--connection-string" ): |
| url = "uno:" + a + ";urp;StarOffice.ComponentContext" |
| if o == "--html": |
| filterName = "HTML (StarWriter)" |
| |
| print(filterName) |
| if not len( args ): |
| usage() |
| sys.exit() |
| |
| ctxLocal = uno.getComponentContext() |
| smgrLocal = ctxLocal.ServiceManager |
| |
| resolver = smgrLocal.createInstanceWithContext( |
| "com.sun.star.bridge.UnoUrlResolver", ctxLocal ) |
| ctx = resolver.resolve( url ) |
| smgr = ctx.ServiceManager |
| |
| desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx ) |
| |
| cwd = systemPathToFileUrl( getcwd() ) |
| outProps = ( |
| PropertyValue( "FilterName" , 0, filterName , 0 ), |
| PropertyValue( "OutputStream",0, OutputStream(),0)) |
| inProps = PropertyValue( "Hidden" , 0 , True, 0 ), |
| for path in args: |
| try: |
| fileUrl = uno.absolutize( cwd, systemPathToFileUrl(path) ) |
| doc = desktop.loadComponentFromURL( fileUrl , "_blank", 0,inProps) |
| |
| if not doc: |
| raise UnoException( "Couldn't open stream for unknown reason", None ) |
| |
| doc.storeToURL("private:stream",outProps) |
| except IOException as e: |
| sys.stderr.write( "Error during conversion: " + e.Message + "\n" ) |
| retVal = 1 |
| except UnoException as e: |
| sys.stderr.write( "Error ("+repr(e.__class__)+") during conversion:" + e.Message + "\n" ) |
| retVal = 1 |
| if doc: |
| doc.dispose() |
| |
| except UnoException as e: |
| sys.stderr.write( "Error ("+repr(e.__class__)+") :" + e.Message + "\n" ) |
| retVal = 1 |
| except getopt.GetoptError as e: |
| sys.stderr.write( str(e) + "\n" ) |
| usage() |
| retVal = 1 |
| |
| sys.exit(retVal) |
| |
| def usage(): |
| sys.stderr.write( "usage: ooextract.py --help |\n"+ |
| " [-c <connection-string> | --connection-string=<connection-string>\n"+ |
| " file1 file2 ...\n"+ |
| "\n" + |
| "Extracts plain text from documents and prints it to stdout.\n" + |
| "Requires an OpenOffice.org instance to be running. The script and the\n"+ |
| "running OpenOffice.org instance must be able to access the file with\n"+ |
| "by the same system path.\n" |
| "\n"+ |
| "-c <connection-string> | --connection-string=<connection-string>\n" + |
| " The connection-string part of a uno url to where the\n" + |
| " the script should connect to in order to do the conversion.\n" + |
| " The strings defaults to socket,host=localhost,port=2002\n" |
| "--html \n" |
| " Instead of the text filter, the writer html filter is used\n" |
| ) |
| |
| main() |