# encoding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah,
All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Git
* Ralph Amissah
** Description: system environment, resource control and configuration details
=end
module SiSU_AO_Numbering
class Numbering
attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment
def initialize(md,data)
@md,@data=md,data
@obj=@type=@ocn=@lv=@name=@index=@comment=nil
end
def numbering_song
data=@data
data=number_plaintext_para(data)
data=auto_number_heading_ie_title(data.compact) #tr issue
data=ocn(data.compact) #watch
data=xml(data.compact)
data=minor_numbering(data.compact)
data,tags_map,ocn_html_seg_map=name_para_seg_filename(data)
data=set_heading_top(data) unless @md.set_heading_top
[data,tags_map,ocn_html_seg_map]
end
def set_tags(tags,tag)
tags=if not tag.empty? \
and tag !~/^\d+$/
tag=tag.gsub(/[^a-z0-9._-]/,'')
[tag,tags].flatten
else tags
end
end
def number_plaintext_para(data)
@tuned_file=[]
data.each do |dob|
if (dob.of !=:block \
&& dob.of !=:comment \
&& dob.of !=:layout) \
&& dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX
dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
end
unless dob.obj.is_a?(Array)
dob.obj=dob.obj.gsub(/^\s+/,'').
gsub(/\s$/,"\n")
end
@tuned_file << dob
end
@tuned_file=@tuned_file.flatten
end
def number_sub_heading(dob,num,title_no)
unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix
dob.obj=case dob.name
when /-/ then dob.obj.gsub(/^/,"#{title_no} ")
when /^#/ then dob.obj.gsub(/^/,"#{title_no} ")
when /^[a-z_\.]+/ then dob.obj.gsub(/^/,"#{title_no} ")
else
dob.name=title_no if dob.name=~/^$/ #where title contains title number
dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement
end
if @md.toc_lev_limit \
and @md.toc_lev_limit < num
dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch
end
end
dob
end
def heading_tag_clean(heading_tag)
heading_tag=heading_tag.gsub(/[ ]+/,'_').
gsub(/["']/,'').
gsub(/[\/]/,'-').
gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,'').
gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,'').
gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,'').
gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,'').
gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,'').
gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,'').
gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,'').
gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,'').
gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,'').
gsub(/#{Mx[:gl_bullet]}/,'')
end
def auto_number_heading_ie_title(data) #also does some segment naming
@tuned_file=[]
if defined? @md.make.num_top \
and @md.make.num_top \
and @md.make.num_top !~/^$/
input||=@md.make.num_top
end
num_top=(input ? input.to_i : nil)
t_no1=t_no2=t_no3=0
if num_top
no1=num_top; no2=(num_top + 1); no3=(num_top + 2)
end
chapter_number_counter=0
data=data.compact
data.each do |dob| #@md.seg_names << [additions to segment names]
title_no=nil
dob=SiSU_AO_DocumentStructureExtract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require
if dob.is ==:heading \
&& dob.autonum_ \
and defined? @md.make.num_top \
and @md.make.num_top !~/^$/
if dob.lv=='1' \
and dob.obj =~/^#\s|\s#(?:\s|$)/
chapter_number_counter +=1
dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} ").
gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1")
end
if dob.ln==no1
@subnumber=1
@subnumber=0 if dob.ln==no1
end
if dob.ln.to_s =~/^[0-6]/ \
and not dob.use_ ==:dummy \
and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix
if dob.ln==no1
t_no1+=1; t_no2=0; t_no3=0
title_no="#{t_no1}"
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(title_no)
if dob.ln==no1
dob.name="#{title_no}" if not dob.name
dob.tags=set_tags(dob.tags,title_no)
tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase
tag=heading_tag_clean(tag)
dob.tags=set_tags(dob.tags,tag)
dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \
? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
: (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later
end
if dob.ln !=no1 \
and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
dob.name ="#{title_no}" if not dob.name
dob.tags=set_tags(dob.tags,title_no)
dob.obj=dob.obj.gsub(/^/,"#{title_no}. ")
end
@md.seg_names << title_no
end
if dob.ln!=no1 \
and dob.name!~/^[a-z_\.]+$/ \
and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
dob.tags=set_tags(dob.tags,title_no)
dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ")
end
end
if dob.ln==no1 #watch because here you change dob.name
dob.tags=set_tags(dob.tags,"h#{title_no}")
end
if dob.ln==no2 #watch because here you change dob.name
t_no2+=1; t_no3=0
title_no="#{t_no1}.#{t_no2}"
dob.tags=set_tags(dob.tags,"h#{title_no}")
dob=number_sub_heading(dob,no2,title_no)
end
if dob.ln==no3 #watch because here you change dob.name
t_no3+=1
title_no="#{t_no1}.#{t_no2}.#{t_no3}"
dob.tags=set_tags(dob.tags,"h#{title_no}")
dob=number_sub_heading(dob,no3,title_no)
end
elsif dob.ln.to_s =~/^[0-6]/ \
and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
dob.tags=set_tags(dob.tags,dob.name)
dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
end
elsif dob.is ==:heading \
and dob.autonum_ \
and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
#here lies a bug, as is nil when run from -Dv --update, FIX
if (dob.name.nil? or dob.name.empty?) \
and dob.ln.to_s =~/^[0-9]/ \
and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
dob.name=$1
dob.tags=set_tags(dob.tags,dob.name)
end
if @md.toc_lev_limit
end
elsif defined? dob.name \
and dob.name
dob.tags=set_tags(dob.tags,dob.name)
end
dob.tags=dob.tags.uniq if defined? dob.tags
@tuned_file << dob
end
@tuned_file=@tuned_file.flatten
end
def ocn(data) #and auto segment numbering increment
@tuned_file=SiSU_AO_DocumentStructureExtract::OCN.new(@md,data).ocn
@tuned_file
end
def xml(data)
@tuned_file=SiSU_AO_DocumentStructureExtract::XML.new(@md,data).dom
@tuned_file
end
def minor_numbering(data) #and auto segment numbering increment
@tuned_file=[]
number_small,letter_small=0,0
letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
data.each do |dob|
if dob.of ==:heading \
|| dob.of ==:heading_insert \
|| dob.of ==:para \
|| dob.of ==:block
if dob.is ==:heading \
and dob.ln.to_s=~/^[0-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
number_small,letter_small=0,0
elsif dob.is ==:para
if dob.obj =~/^#[ 1]/ \
and dob.obj !~/^#\s+(?:~#)?$/
letter_small=0
number_small=0 if dob.obj =~ /^#1/
number_small+=1
dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ")
end
if dob.obj =~/^_# /
dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ")
dob.indent='1'
letter_small+=1
end
end
end
@tuned_file << dob
end
@tuned_file=@tuned_file.flatten
end
def name_para_seg_filename(data) #segment naming, remaining
# paragraph name/numbering rules
# manual naming overrides, manual naming may be
# alpha-numeric characters mixed,
# numeric only (a number), if
# all segments have been named,
# the numbers used are over 1000 or
# it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
# [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
# auto-naming takes the form of giving numbers to segments
# the rules for which are as follows
# if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
# otherwise the level 4 segment number from the embedded document structure info is used
# if there is none a sequential number is designated, preceded by an underscore
@tuned_file,@unique_auto_name=[],[]
tags={}
art_filename_auto=1
@counter=1
if not @md.seg_autoname_safe \
and (@md.opt.act[:verbose_plus][:set]==:on \
|| @md.opt.act[:maintenance][:set]==:on)
puts 'manual segment names, numbers used as names, risk warning (segmented html)'
end
ocn_html_seg=[]
data.each do |dob|
if dob.is==:heading \
&& dob.ln \
and dob.ln.to_s =~/^[456]/
if dob.ln==4 \
and not dob.name \
and not @md.set_heading_seg
@md.set_heading_seg=true
end
if dob.name !~/^\S+/ \
and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
possible_seg_name=$1
possible_seg_name=possible_seg_name.gsub(/(?:[:,-]|\W)/,'.').
gsub(/\.$/,'')
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(possible_seg_name)
dob.name=possible_seg_name
dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << possible_seg_name
elsif (@md.opt.act[:verbose_plus][:set]==:on \
or @md.opt.act[:maintenance][:set]==:on)
puts 'warn, there may be a conflicting numbering scheme'
end
end
if dob.ln==4 \
and dob.name #extract segment name from embedded document structure info
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(dob.name)
dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << dob.name
end
end
if dob.ln==4 \
and not dob.name #if still no segment name, provide a numerical one
pf='_' #pg='' #may use e.g. '' or '~' or '_'
segn_auto="#{pf}#{art_filename_auto.to_s}"
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(segn_auto)
dob.name=segn_auto
dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << segn_auto
else puts 'segment name (numbering) error'
end
art_filename_auto+=1
end
if dob.ln==4 \
and not dob.name #should not occur
puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
end
end
if (dob.is ==:heading \
|| dob.is ==:heading_insert) \
&& dob.ln==4
@seg=dob.name
end
@tuned_file << if dob.is==:heading \
&& (@md.pagenew || @md.pagebreak || @md.pageline)
m=dob.ln.to_s
dob_tmp=[]
if @md.pagenew.inspect =~/#{m}/
dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob
elsif @md.pagebreak.inspect =~/#{m}/
dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob
elsif @md.pageline.inspect =~/#{m}/
dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) << dob
end
unless dob_tmp.length > 0; dob
else dob_tmp
end
else dob
end
if defined? dob.ocn \
and dob.ocn
@segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \
? (dob.name)
: @segname
tags["#{dob.ocn}"]={ segname: @segname }
ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert)
if dob.ln =~/[0-3]/ then { seg: nil, level: dob.ln }
#elsif dob.ln =~/[4-6]/
else { seg: @seg, level: dob.ln }
end
else { seg: @seg, level: nil }
end
end
dob.tags=dob.tags.uniq if defined? dob.tags
if defined? dob.tags \
and dob.tags.length > 0
#@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \
#? (dob.name) \
#: @segname
dob.tags.each do |y|
tags[y]={ ocn: dob.ocn.to_s, segname: @segname }
end
end
dob
end
ocn_html_seg.each_with_index do |ocn,i|
if ocn \
and ocn[:level].to_s=~/[1-3]/
(1..4).each do |x|
if ocn_html_seg[i+x] \
and ocn_html_seg[i+x][:level]==4
ocn[:seg]=ocn_html_seg[i+x][:seg]
end
end
end
end
if @md.seg_names.length > 0
@md.set_heading_seg=true
end
tuned_file=@tuned_file.flatten
[tuned_file,tags,ocn_html_seg]
end
def set_heading_top(data) #% make sure no false positives
unless @md.set_heading_top
if (@md.opt.act[:verbose_plus][:set]==:on \
or @md.opt.act[:maintenance][:set]==:on)
puts "\tdocument contains no top level heading, (will have to manufacture one)"
end
@tuned_file=[]
data.each do |t_o|
unless @md.set_heading_top
if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \
and t_o !~/\A\s*\Z/m
@md.set_heading_top=true
if defined? @md.title \
and @md.title \
and defined? @md.title.full \
and defined? @md.creator \
and @md.creator
head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]'])
@tuned_file << head
end
end
end
@tuned_file << t_o
end
@tuned_file=@tuned_file.flatten
end
end
def set_heading_seg(data) #% make sure no false positives
unless @md.set_heading_seg
if (@md.opt.act[:verbose_plus][:set]==:on \
or @md.opt.act[:maintenance][:set]==:on)
puts "\tdocument contains no segment level, (will have to manufacture one)"
end
@tuned_file=[]
data.each do |dob|
unless @md.set_heading_seg
if defined? dob.ln and dob.ln.to_s !~/^[0-3]/m \
and dob.obj !~/\A\s*\Z/m \
and dob.is !=:layout
@md.set_heading_seg=true
head=@md.title.main \
? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main)
: (dob.ln,dob.name,dob.obj=4,'seg','[segment]')
@tuned_file << head
end
end
@tuned_file << dob
end
@tuned_file=@tuned_file.flatten
end
end
def set_header_title(data) #% make sure no false positives
unless @md.set_header_title
if (@md.opt.act[:verbose_plus][:set]==:on \
or @md.opt.act[:maintenance][:set]==:on)
puts "\t no document title provided, (will have to manufacture one)"
end
@tuned_file=[]
data.each do |t_o|
unless @md.set_header_title
if t_o !~/^%{1,2}\s/m \
and t_o !~/\A\s*\Z/m
@tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
@md.title.main=@md.heading_seg_first
@md.set_header_title=true
end
end
@tuned_file << t_o
end
@tuned_file=@tuned_file.flatten
end
end
end
end
__END__