examples/char-rnn/data.py - singa - Git at Google

 #!/usr/bin/env python

 #/************************************************************
 #*
 #* Licensed to the Apache Software Foundation (ASF) under one
 #* or more contributor license agreements.  See the NOTICE file
 #* distributed with this work for additional information
 #* regarding copyright ownership.  The ASF licenses this file
 #* to you under the Apache License, Version 2.0 (the
 #* "License"); you may not use this file except in compliance
 #* with the License.  You may obtain a copy of the License at
 #*
 #*   http://www.apache.org/licenses/LICENSE-2.0
 #*
 #* Unless required by applicable law or agreed to in writing,
 #* software distributed under the License is distributed on an
 #* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 #* KIND, either express or implied.  See the License for the
 #* specific language governing permissions and limitations
 #* under the License.
 #*
 #*************************************************************/


 # pls get linux_input.txt from http://cs.stanford.edu/people/karpathy/char-rnn/
 data = open('linux_input.txt', 'r').read() # should be simple plain text file
 chars = list(set(data))
 data_size, vocab_size = len(data), len(chars)
 print 'data has %d characters, %d unique.' % (data_size, vocab_size)
 with open('vocab.txt', 'w') as fd:
   fd.write("".join(chars))
   fd.flush()
	#!/usr/bin/env python

	#/************************************************************
	#*
	#* Licensed to the Apache Software Foundation (ASF) under one
	#* or more contributor license agreements. See the NOTICE file
	#* distributed with this work for additional information
	#* regarding copyright ownership. The ASF licenses this file
	#* to you under the Apache License, Version 2.0 (the
	#* "License"); you may not use this file except in compliance
	#* with the License. You may obtain a copy of the License at
	#*
	#* http://www.apache.org/licenses/LICENSE-2.0
	#*
	#* Unless required by applicable law or agreed to in writing,
	#* software distributed under the License is distributed on an
	#* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	#* KIND, either express or implied. See the License for the
	#* specific language governing permissions and limitations
	#* under the License.
	#*
	#*************************************************************/


	# pls get linux_input.txt from http://cs.stanford.edu/people/karpathy/char-rnn/
	data = open('linux_input.txt', 'r').read() # should be simple plain text file
	chars = list(set(data))
	data_size, vocab_size = len(data), len(chars)
	print 'data has %d characters, %d unique.' % (data_size, vocab_size)
	with open('vocab.txt', 'w') as fd:
	fd.write("".join(chars))
	fd.flush()