sections.py

########################################################################## ## ## copyright 2002 Paul Henry Tremblay ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA ## 02111-1307 USA ## ## ##########################################################################import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
00020class Sections:
"""=================Purpose=================Write section tags for a tokenized file. (This module won't be any use to useto you unless you use it as part of the other modules.)---------------logic---------------The tags for the first section breaks have already been written.RTF stores section breaks with the \sect tag. Each time this tag isencountered, add one to the counter.When I encounter the \sectd tag, I want to collect all the appropriate tokensthat describe the section. When I reach a \pard, I know I an stop collectingtokens and write the section tags.The exception to this method occurs when sections occur in field blocks, suchas the index. Normally, two section break occur within the index and otherfield-blocks. (If less or more section breaks occurr, this code may not work.)I want the sections to occurr outside of the index. That is, the indexshould be nested inside one section tag. After the index is complete, a newsection should begin.In order to write the sections outside of the field blocks, I have to storeall of the field block as a string. When I ecounter the \sect tag, add one tothe section counter, but store this number in a list. Likewise, store theinformation describing the section in another list.When I reach the end of the field block, choose the first item from thenumbered list as the section number. Choose the first item in the descriptionlist as the values and attributes of the section. Enclose the field stringbetween the section tags.Start a new section outside the field-block strings. Use the second number inthe list; use the second item in the description list.CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.Instead, ingore all section information in a field-block. """00055def __init__(self,
in_file,
bug_handler,
copy = None,
run_level = 1):
""" Required: 'file'--file to parse Optional: 'copy'-- whether to make a copy of result for debugging 'temp_dir' --where to output temporary results (default is directory from which the script is run.) Returns: nothing """
self.__file = in_file
self.__bug_handler = bug_handler
self.__copy = copy
self.__run_level = run_level
self.__write_to = tempfile.mktemp()
00075def __initiate_values(self):
""" Initiate all values. """
self.__mark_start = 'mi<mk<sect-start\n'
self.__mark_end = 'mi<mk<sect-end__\n'
self.__in_field = 0
self.__section_values = {}
self.__list_of_sec_values = []
self.__field_num = []
self.__section_num = 0
self.__state = 'before_body'
self.__found_first_sec = 0
self.__text_string = ''
self.__field_instruction_string = ''
self.__state_dict = {
'before_body' : self.__before_body_func,
'body' : self.__body_func,
'before_first_sec' : self.__before_first_sec_func,
'section' : self.__section_func,
'section_def' : self.__section_def_func,
'sec_in_field' : self.__sec_in_field_func,
}
# cw<sc<sect-defin<nu<true
self.__body_dict = {
'cw<sc<section___' : self.__found_section_func,
'mi<mk<sec-fd-beg' : self.__found_sec_in_field_func,
'cw<sc<sect-defin' : self.__found_section_def_bef_sec_func,
}
self.__section_def_dict = {
'cw<pf<par-def___' : (self.__end_sec_def_func, None),
'mi<mk<body-open_' : (self.__end_sec_def_func, None),
'cw<tb<columns___' : (self.__attribute_func, 'columns'),
'cw<pa<margin-lef' : (self.__attribute_func, 'margin-left'),
'cw<pa<margin-rig' : (self.__attribute_func, 'margin-right'),
'mi<mk<header-ind' : (self.__end_sec_def_func, None),
# premature endings#__end_sec_premature_func'tx<nu<__________' : (self.__end_sec_premature_func, None),
'cw<ci<font-style' : (self.__end_sec_premature_func, None),
'cw<ci<font-size_' : (self.__end_sec_premature_func, None),
}
self.__sec_in_field_dict = {
'mi<mk<sec-fd-end' : self.__end_sec_in_field_func,
# changed this 2004-04-26# two lines# 'cw<sc<section___' : self.__found_section_in_field_func,# 'cw<sc<sect-defin' : self.__found_section_def_in_field_func,
}
00124def __found_section_def_func(self, line):
""" Required: line -- the line to parse Returns: nothing Logic: I have found a section definition. Change the state to setion_def (so subsequent lines will be processesed as part of the section definition), and clear the section_values dictionary. """
self.__state = 'section_def'
self.__section_values.clear()
00137def __attribute_func(self, line, name):
""" Required: line -- the line to be parsed name -- the changed, readable name (as opposed to the abbreviated one) Returns: nothing Logic: I need to add the right data to the section values dictionary so I can retrive it later. The attribute (or key) is the name; the value is the last part of the text string. ex: cw<tb<columns___<nu<2 """
attribute = name
value = line[20:-1]
self.__section_values[attribute] = value
00154def __found_section_func(self, line):
""" Requires: line -- the line to parse Returns: nothing Logic: I have found the beginning of a section, so change the state accordingly. Also add one to the section counter. """
self.__state = 'section'
self.__write_obj.write(line)
self.__section_num += 1
00167def __found_section_def_bef_sec_func(self, line):
""" Requires: line -- the line to parse Returns: nothing Logic: I have found the beginning of a section, so change the state accordingly. Also add one to the section counter. """
self.__section_num += 1
self.__found_section_def_func(line)
self.__write_obj.write(line)
00180def __section_func(self, line):
""" Requires: line --the line to parse Returns: nothing Logic: """if self.__token_info == 'cw<sc<sect-defin':
self.__found_section_def_func(line)
self.__write_obj.write(line)
00191def __section_def_func(self, line):
""" Required: line --line to parse Returns: nothing Logic: I have found a section definition. Check if the line is the end of the defnition (a paragraph defintion), or if it contains info that should be added to the values dictionary. If neither of these cases are true, output the line to a file. """
action, name = self.__section_def_dict.get(self.__token_info, (None, None))
if action:
action(line, name)
if self.__in_field:
self.__sec_in_field_string += line
else:
self.__write_obj.write(line)
else:
self.__write_obj.write(line)
00212def __end_sec_def_func(self, line, name):
""" Requires: line --the line to parse name --changed, readable name Returns: nothing Logic: The end of the section definition has been found. Reset the state. Call on the write_section method. """ifnot self.__in_field:
self.__state = 'body'else:
self.__state = 'sec_in_field'
self.__write_section(line)
00228def __end_sec_premature_func(self, line, name):
""" Requires: line --the line to parse name --changed, readable name Returns: nothing Logic: Text or control words indicating text have been found before \pard. This shoud indicate older RTF. Reset the state Write the section defintion. Insert a paragraph definition. Insert {} to mark the end of a paragraph defintion """ifnot self.__in_field:
self.__state = 'body'else:
self.__state = 'sec_in_field'
self.__write_section(line)
self.__write_obj.write('cw<pf<par-def___<nu<true\n')
self.__write_obj.write('ob<nu<open-brack<0000\n')
self.__write_obj.write('cb<nu<clos-brack<0000\n')
00249def __write_section(self, line):
""" Requires: nothing Returns: nothing Logic: Form a string of attributes and values. If you are not in a field block, write this string to the output file. Otherwise, call on the handle_sec_def method to handle this string. """
my_string = self.__mark_startif self.__found_first_sec:
my_string += 'mi<tg<close_____<section\n'else:
self.__found_first_sec = 1
my_string += 'mi<tg<open-att__<section<num>%s' % str(self.__section_num)
my_string += '<num-in-level>%s' % str(self.__section_num)
my_string += '<type>rtf-native'
my_string += '<level>0'
keys = self.__section_values.keys()
if len(keys) > 0:
for key in keys:
my_string += '<%s>%s' % (key, self.__section_values[key])
my_string += '\n'
my_string += self.__mark_end# # my_string += lineif self.__state == 'body':
self.__write_obj.write(my_string)
elif self.__state == 'sec_in_field':
self.__handle_sec_def(my_string)
elif self.__run_level > 3:
msg = 'missed a flag\n'raise self.__bug_handler, msg
00283def __handle_sec_def(self, my_string):
""" Requires: my_string -- the string of attributes and values. (Do I need this?) Returns: nothing Logic: I need to append the dictionary of attributes and values to list so I can use it later when I reach the end of the field-block. """
values_dict = self.__section_values
self.__list_of_sec_values.append(values_dict)
00295def __body_func(self, line):
""" Requires: line --the line to parse Returns: nothing Logic: Look for the beginning of a section. Otherwise, print the line to the output file. """
action = self.__body_dict.get(self.__token_info)
if action:
action(line)
else:
self.__write_obj.write(line)
00310def __before_body_func(self, line):
""" Requires: line --line to parse Returns: nothing Logic: Look for the beginning of the body. Always print out the line. """if self.__token_info == 'mi<mk<body-open_':
self.__state = 'before_first_sec'
self.__write_obj.write(line)
00322def __before_first_sec_func(self, line):
""" Requires: line -- line to parse Returns: nothing Logic: Look for the beginning of the first section. This can be \\sectd, but in older RTF it could mean the any paragraph or row definition """if self.__token_info == 'cw<sc<sect-defin':
self.__state = 'section_def'
self.__section_num += 1
self.__section_values.clear()
elif self.__token_info == 'cw<pf<par-def___':
self.__state = 'body'
self.__section_num += 1
self.__write_obj.write (
'mi<tg<open-att__<section<num>%s''<num-in-level>%s''<type>rtf-native''<level>0\n'
% (str(self.__section_num), str(self.__section_num))
)
self.__found_first_sec = 1
elif self.__token_info == 'tx<nu<__________':
self.__state = 'body'
self.__section_num += 1
self.__write_obj.write (
'mi<tg<open-att__<section<num>%s''<num-in-level>%s''<type>rtf-native''<level>0\n'
% (str(self.__section_num), str(self.__section_num))
)
self.__write_obj.write(
'cw<pf<par-def___<true\n'
)
self.__found_first_sec = 1
self.__write_obj.write(line)
00362def __found_sec_in_field_func(self, line):
""" Requires: line --line to parse Returns: nothing Logic: I have found the beginning of a field that has a section (or really, two) inside of it. Change the state, and start adding to one long string. """
self.__state = 'sec_in_field'
self.__sec_in_field_string = line
self.__in_field = 1
00376def __sec_in_field_func(self, line):
""" Requires: line --the line to parse Returns: nothing Logic: Check for the end of the field, or the beginning of a section definition. CHANGED! Just print out each line. Ignore any sections or section definition info. """
action = self.__sec_in_field_dict.get(self.__token_info)
if action:
action(line)
else:
# change this 2004-04-26# self.__sec_in_field_string += line
self.__write_obj.write(line)
00395def __end_sec_in_field_func(self, line):
""" Requires: line --line to parse Returns: nothing Logic: Add the last line to the field string. Call on the method print_field_sec_attributes to write the close and beginning of a section tag. Print out the field string. Call on the same method to again write the close and beginning of a section tag. Change the state. """# change this 2004-04-26# Don't do anyting""" self.__sec_in_field_string += line self.__print_field_sec_attributes() self.__write_obj.write(self.__sec_in_field_string) self.__print_field_sec_attributes() """
self.__state = 'body'
self.__in_field = 0
# this is changed too
self.__write_obj.write(line)
00420def __print_field_sec_attributes(self):
""" Requires: nothing Returns: nothing Logic: Get the number and dictionary of values from the lists. The number and dictionary will be the first item of each list. Write the close tag. Write the start tag. Write the attribute and values in the dictionary. Get rid of the first item in each list. keys = self.__section_values.keys() if len(keys) > 0: my_string += 'mi<tg<open-att__<section-definition' for key in keys: my_string += '<%s>%s' % (key, self.__section_values[key]) my_string += '\n' else: my_string += 'mi<tg<open______<section-definition\n' """
num = self.__field_num[0]
self.__field_num = self.__field_num[1:]
self.__write_obj.write(
'mi<tg<close_____<section\n''mi<tg<open-att__<section<num>%s' % str(num)
)
if self.__list_of_sec_values:
keys = self.__list_of_sec_values[0].keys()
for key in keys:
self.__write_obj.write(
'<%s>%s\n' % (key, self.__list_of_sec_values[0][key]))
self.__list_of_sec_values = self.__list_of_sec_values[1:]
self.__write_obj.write('<level>0')
self.__write_obj.write('<type>rtf-native')
self.__write_obj.write('<num-in-level>%s' % str(self.__section_num))
self.__write_obj.write('\n')
# Look here00457def __found_section_in_field_func(self, line):
""" Requires: line --line to parse Returns: nothing Logic: I have found a section in a field block. Add one to section counter, and append this number to a list. """
self.__section_num += 1
self.__field_num.append(self.__section_num)
self.__sec_in_field_string += line
00470def __found_section_def_in_field_func(self, line):
""" Requires: line --line to parse Returns: nothing Logic: I have found a section definition in a filed block. Change the state and clear the values dictionary. """
self.__state = 'section_def'
self.__section_values.clear()
00482def make_sections(self):
""" Requires: nothing Returns: nothing (changes the original file) Logic: Read one line in at a time. Determine what action to take based on the state. If the state is before the body, look for the beginning of the body. If the state is body, send the line to the body method. """
self.__initiate_values()
read_obj = open(self.__file, 'r') self.__write_obj = open(self.__write_to, 'w')
line_to_read = 1
while line_to_read:
line_to_read = read_obj.readline()
line = line_to_read
self.__token_info = line[:16]
action = self.__state_dict.get(self.__state)
if action == None:
sys.stderr.write('no no matching state in module sections.py\n')
sys.stderr.write(self.__state + '\n')
action(line)
read_obj.close()
self.__write_obj.close()
copy_obj = copy.Copy(bug_handler = self.__bug_handler)
if self.__copy:
copy_obj.copy_file(self.__write_to, "sections.data")
copy_obj.rename(self.__write_to, self.__file)
os.remove(self.__write_to)