#!/bin/sh
#
# download the AllInOne page of a manual on wiki.debian.org as docbook
# and transform it into proper XML
#
# very loosly based on the moinmoin2pdf script from Petter Reinholdtsen
#
# Author/Copyright:	Holger Levsen
# Licence:		GPL2+
# first edited:		2006-07-06
# last edited:		2020-08-16
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

#set -x

exit_loud_and_clearly() {
	echo "-----------------------------------------------------"
	echo
	echo "$1"
	echo
	echo "-----------------------------------------------------"
	exit 1
}

if [ ! $(which xmllint) ] ; then
	exit_loud_and_clearly "Please install libxml2-utils."
fi

if [ ! $(which GET) ] ; then
	exit_loud_and_clearly "Please install libwww-perl."
fi

if [ "$name" = "" ] ; then
	exit_loud_and_clearly "error: missing \$name variable, not exported from Makefile?"
fi
URL=https://wiki.debian.org
TMPFILE=$(mktemp)
xmlfile=$name.xml

# Get the docbook type manual from the wiki and store a copy as source before
# modifying the file.
PERL_LWP_SSL_VERIFY_HOSTNAME=0 GET -H User-Agent: "${url}AllInOne?action=show&mimetype=text/docbook"|
tee source/AllInOne-$name.xml |

# remove AllInOne page related additions from links
sed "s%$path1/AllInOne/%%g" |

# replace &quot; with \" (like it has been before the docbook export change) to
# avoid a lot of fuzzy strings.
sed "s%&quot;%\"%g" |

# replace tags:
sed "s%code>%computeroutput>%g" |
sed "s%/htdocs/rightsidebar/img/%./images/%g" |

# remove initial and final tags:
perl -pe "s%</?article>%%g" |

# remove tags and enclosed content:
sed "s#<articleinfo>\(.*\)</articleinfo>##g" |

# Comment useless remarks from XML: they just show an ugly drawing in XML
perl -pe "s%<remark>.*?</remark>%<!-- $& -->%g" |

# Make wiki self links actually local
perl -pe "s%<ulink url=\"$url/?(\w+)#\">(.*?)</ulink>%<link linkend=\"\1\">\2</link>%g" |
perl -pe "s%<ulink url=\"$URL/?(\w+)#\">(.*?)</ulink>%<link linkend=\"\1\">\2</link>%g" |
perl -pe "s%<ulink url=\"https://wiki.debian.org/$path1/(HowTo/)?(\w+)#\">(.*?)</ulink>%<link linkend=\"\2\">\3</link>%g" |
perl -pe "s%<ulink url=\"https://wiki.debian.org/$path1/(HowTo/)?(\w+)#(.*?)\">(.*?)</ulink>%<link linkend=\"\2--\3\">\4</link>%g" |

# introduce line breaks:
sed "s%<title>%\n<title>%g" |
sed "s%<\/title>%\n<\/title>%g" |
sed "s%<section%\n\n<section%g" |
sed "s%<\/section>%\n<\/section>%g" |
sed "s%<para>%\n<para>%g" |
sed "s%<para><anchor%<para>\n<anchor%g" |
# next one commented to catch Catagory links easier
#sed "s%<\/para>%\n<\/para>%g" |
sed "s%FIXME%\nFIXME%g" |
sed "s%<itemizedlist>%\n<itemizedlist>%" |
sed "s%<listitem>%\n<listitem>%" |

# remove lines from file to get rid of the Catagory links
# need to terminate piping here because of the next sed
sed "/CategoryPermalink/d" > $xmlfile

# fix some remaining URLs (see manual specific Makefile for $url vs. $URL)
sed -i "s%$url/DebianEdu/Status/%$URL/DebianEdu/Status/%g" "$xmlfile"
sed -i "s%$url/$path1/%$URL%g" "$xmlfile"

# introduce one more line break now that Catagory links ar gone
sed -i "s%<\/para>%\n<\/para>%g" $xmlfile

# terminate file correctly.
sed -i  "$ s#>#>\n</article>#" $xmlfile

# next sed block is needed to get internal links working in bullseye+.
# Note: on wiki.d.o pages, link anchors must be set above the title like e.g.
# for the Architectur page, level 1, 2 and 3:
#
# <<Anchor(Architecture)>>
# = Architecture =
#
# <<Anchor(Architecture--Network)>>
# == Network ==
#
# <<Anchor(Architecture--File_system_access_configuration)>>
# === File system access configuration ===
#
# the added anchor tag is inserted at/after the end of the previous section and
# included in <para></para>; also the tags' endings differ.
# we want to get out of this mess, we need <section id="...."> above <title>:
#
# modify begin of line
sed -i '/anchor/s%<anchor%<section%' $xmlfile
# modify end of line
sed -i '/id="/s%\/>%>%' $xmlfile
# delete para lines before and after matched line
sed -ni '/id="/{x;n;d;};1h;1!{x;p;};${x;p;}' $xmlfile
# read next lines and delete them conditionally (for two different cases)
sed -i '/id="/{N;N;N;s/<section>//;s/<\/section>//}' $xmlfile
# fix subsection and sub-subsection cases (add closing tag for previous section,
# and also newline to separate the sections)
sed -i '/<\/para>/{n;s%<section id=%</section>\n\n<section id=%}' $xmlfile
# end of internal linking block

######
###### Begin legacy manuals' workaround
######

# use unique section IDs.
if [ "debian-edu-buster-manual" = "$name" ] || \
		[ "audacity-manual" = "$name" ] || [ "rosegarden-manual" = "$name" ] || \
		[ "debian-edu-itil-manual" = "$name" ] ; then
	seq 0 $(grep "<section>" $xmlfile | wc -l) > id
	for n in `cat id` ; do
		sed -i "0,/<section>/s/<section>/<section id='$n'>/" "$xmlfile"
	done
	rm id
fi

###### Begin legacy manuals' workaround
###
### the sed blocks below are obviously suboptimal but only needed for legacy
### documentation where the sections ids won't change.
###
### ids are collected from old docbook export, these are needed for links inside
### the manual, i.e. from one section to another one. ids are not sequential
### because only the needed ones have been filtered out.
###

# buster manual specific issues
if [ "debian-edu-buster-manual" = "$name" ] ; then
	sed -i "s%Installation_over_the_network%Installation--Installation_over_the_network%g" $xmlfile
	# ugly: rename ids
	sed -i "s%id='2'>%id='AboutDebianEdu--Some_history_and_why_two_names'>%" $xmlfile
	sed -i "s%id='3'>%id='Architecture'>%" $xmlfile
	sed -i "s%id='4'>%id='Architecture--Network'>%" $xmlfile
	sed -i "s%id='14'>%id='Architecture--File_system_access_configuration'>%" $xmlfile
	sed -i "s%id='15'>%id='Requirements'>%" $xmlfile
	sed -i "s%id='16'>%id='Requirements--Hardware_requirements'>%" $xmlfile
	sed -i "s%id='18'>%id='Requirements--Requirements_for_network_setup'>%" $xmlfile
	sed -i "s%id='20'>%id='Requirements--Internet_router'>%" $xmlfile
	sed -i "s%id='21'>%id='Installation'>%" $xmlfile
	sed -i "s%id='32'>%id='Installation--Desktop_choice'>%" $xmlfile
	sed -i "s%id='33'>%id='Installation--Modular_installation'>%" $xmlfile
	sed -i "s%id='42'>%id='Installation--Installation_over_the_network_.28PXE.29_and_booting_diskless_clients'>%" $xmlfile
	sed -i "s%id='43'>%id='Installation--Modifying_PXE_installations'>%" $xmlfile
	sed -i "s%id='46'>%id='GettingStarted'>%" $xmlfile
	sed -i "s%id='58'>%id='GettingStarted--Machine_Management_with_GOsa.2BALI-'>%" $xmlfile
	sed -i "s%id='65'>%id='Maintenance'>%" $xmlfile
	sed -i "s%id='66'>%id='Maintenance--Updating_the_software'>%" $xmlfile
	sed -i "s%id='79'>%id='Upgrades'>%" $xmlfile
	sed -i "s%id='88'>%id='HowTo'>%" $xmlfile
	sed -i "s%id='89'>%id='Administration'>%" $xmlfile
	sed -i "s%id='92'>%id='Administration--Resizing_Partitions'>%" $xmlfile
	sed -i "s%id='94'>%id='Administration--Installing_a_graphical_environment_on_the_main-server_to_use_GOsa.2BALI-'>%" $xmlfile
	sed -i "s%id='96'>%id='Administration--Kerberized_NFS'>%" $xmlfile
	sed -i "s%id='98'>%id='Administration--Standardskriver'>%" $xmlfile
	sed -i "s%id='111'>%id='AdvancedAdministration'>%" $xmlfile
	sed -i "s%id='112'>%id='AdvancedAdministration--User_Customisations_with_GOsa.2BALI-'>%" $xmlfile
	sed -i "s%id='113'>%id='AdvancedAdministration--Create_Users_in_Year_Groups'>%" $xmlfile
	sed -i "s%id='123'>%id='Desktop'>%" $xmlfile
	sed -i "s%id='124'>%id='Desktop--Set_up_a_multi-language_desktop_environment'>%" $xmlfile
	sed -i "s%id='127'>%id='NetworkClients'>%" $xmlfile
	sed -i "s%id='130'>%id='NetworkClients--Configuring_the_PXE_menu'>%" $xmlfile
	sed -i "s%id='135'>%id='NetworkClients--Use_a_different_LTSP_client_network'>%" $xmlfile
	sed -i "s%id='141'>%id='NetworkClients--Desktop_autoloader'>%" $xmlfile
	sed -i "s%id='152'>%id='NetworkClients--Connecting_Windows_machines_to_the_network_.2BAC8_Windows_integration'>%" $xmlfile
	sed -i "s%id='158'>%id='Samba'>%" $xmlfile
	sed -i "s%id='164'>%id='TeachAndLearn'>%" $xmlfile
	sed -i "s%id='168'>%id='Users'>%" $xmlfile
	sed -i "s%id='172'>%id='Users--Using_email'>%" $xmlfile
	sed -i "s%id='180'>%id='Support'>%" $xmlfile
	sed -i "s%id='187'>%id='Features'>%" $xmlfile
	sed -i "s%id='194'>%id='CopyRight'>%" $xmlfile
	sed -i "s%id='195'>%id='CopyRight--Translation_copyright_and_authors'>%" $xmlfile
	sed -i "s%id='196'>%id='Translations'>%" $xmlfile
	sed -i "s%id='23'>%id='Installation--Download_the_installation_media_for_Debian_Edu_10.2B-edu0_Codename_Buster'>%" $xmlfile
	sed -i "s%id='188'>%id='Features--New_features_for_Debian_Edu_10.2B-edu0_Codename_Buster'>%" $xmlfile
	sed -i "s%id='201'>%id='AppendixA--Manual_for_Debian_Edu_10.2B-edu0_Codename_Buster'>%" $xmlfile
fi

# common issues for debian-edu-itil, audacity and rosegarden manuals
if [ "audacity-manual" = "$name" ] || [ "rosegarden-manual" = "$name" ] || [ "debian-edu-itil-manual" = "$name" ] ; then
	# avoid docbook export changes for ITIL, audacity and rosegarden images
	# issues (due to alt tag missing?)
	sed -i "s%https://wiki.debian.org/.*?action=AttachFile&amp;do=get&amp;target=%%"  $xmlfile
fi

# audacity manual specific
if [ "audacity-manual" = "$name" ] ; then
	# rename section ids
	sed -i "s%id='11'>%id='CopyRight'>%" $xmlfile
	sed -i "s%id='13'>%id='Translations'>%" $xmlfile
fi

# rosegarden manual specific
if [ "rosegarden-manual" = "$name" ] ; then
	# rename section ids
	sed -i "s%id='44'>%id='SoundCreation--Sample_Tuning_for_Advanced_Users'>%" $xmlfile
	sed -i "s%id='45'>%id='CopyRight'>%" $xmlfile
	sed -i "s%id='47'>%id='Translations'>%" $xmlfile
fi

# ITIL manual specific
if [ "debian-edu-itil-manual" = "$name" ] ; then
	# fix links containing /en/ (seems to be there out of historic reasons)
	sed -i "s%/en/%/%" $xmlfile
	# no section ids renaming here because most internal links on the wiki seem
	# to be broken anyway
fi
######
###### End legacy manuals' workaround
######

# remove FIXMEs
if [ "$(grep -v FIXMEs $xmlfile | grep FIXME | grep -v 'FIXME&gt;' | grep -v 'status ignore')" != "" ] ; then
	echo "----------------------------------" > $TMPFILE
	echo $xmlfile > $TMPFILE
	grep -v FIXMEs $xmlfile | grep FIXME | grep -v 'FIXME&gt;' | grep -v 'status ignore' > $TMPFILE
fi

# get images and modify $xmlfile
echo "calling ../scripts/get_images $xmlfile $path1"
../scripts/get_images $xmlfile $path1

# turn links into internal references if appropriate
# this needs to run after ./get_images
#
#  -0\777  read multiple lines
perl -0\777 -pi -e "s/<ulink url=\"$path2(.*)\/(.*)\">(.*)\n<\/ulink>/<link linkend=\"\2\">\3<\/link>/g" $xmlfile

# weird hack for subtitle to get a translatable string and a separate space for the date, see Makefile.common.
sed -i "1,/</ s#<#<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\"><article lang=\"en\"><articleinfo><title>$DEBIAN_EDU_DOC_TITLE</title><subtitle><para>Publish date: </para><para>\ </para></subtitle></articleinfo>\n<#" $xmlfile

# remove the first empty lines
sed -i "1,3d" $xmlfile

# motivate
if [ "$(grep -v FIXMEs $xmlfile |grep FIXME |grep -v 'status ignore'|uniq)" != "" ] ; then
	echo "====================" >> $TMPFILE
	echo `grep -v FIXMEs $xmlfile |grep FIXME |grep -v 'status ignore'|uniq|wc -l` FIXMEs left to fix >> $TMPFILE
	echo "====================" >> $TMPFILE
fi
mv $TMPFILE fixme-status.txt

######
###### remove some untranslatable strings from $xmlfile
######

# create $stripped_xmlfile which will have some non-translatable strings
# removed and will be used for POT and PO file creation via po4a.
stripped_xmlfile=$name-stripped.xml

  # --remove untranslatable image names
  sed -e 's#<imagedata.*</imageobject>#</imageobject>#g' $xmlfile > $stripped_xmlfile

  # --remove paragraphs that just have a <ulink> and no other text
    #---first copy those paragraphs to a tempfile,
    TMPFILE3=$(mktemp)
    cat $xmlfile | sed -n '/^<para><ulink/p' | sed -n '/> *$/p' > $TMPFILE3
    #---and then replace those links with an empty string
    #---but keep the <para> tag to prevent xml from being broken
    while read line ;
      do sed -i "s#$line#<para>#" $stripped_xmlfile
    done < $TMPFILE3

  # --remove FIXME: paragraphs
  sed -i '/^FIXME:/d' $stripped_xmlfile
  # --remove GPL; exclude the ITIL manual which doesn't contain it.
  if [ ! "$name" = "debian-edu-itil-manual" ] ; then
  sed -ni '1h; 1!H; ${ g; s#<para>This program is free.*CONDITIONS</emphasis>#<para>#p }' $stripped_xmlfile
  fi
