<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># $Id: TeXCatalogue.pm 61372 2021-12-21 22:46:16Z karl $
# TeXLive::TeXCatalogue - module for accessing the TeX Catalogue
# Copyright 2007-2021 Norbert Preining
# This file is licensed under the GNU General Public License version 2
# or any later version.
# 
# Loads of code adapted from the catalogue checking script of Robin Fairbairns.

use strict; use warnings;

use XML::Parser;
use XML::XPath;
use XML::XPath::XMLParser;
use Text::Unidecode;

package TeXLive::TeXCatalogue::Entry;

my $svnrev = '$Revision: 61372 $';
my $_modulerevision = ($svnrev =~ m/: ([0-9]+) /) ? $1 : "unknown";
sub module_revision { return $_modulerevision; }

=pod

=head1 NAME

C&lt;TeXLive::TeXCatalogue&gt; - TeX Live access to the TeX Catalogue from CTAN

=head1 SYNOPSIS

  use TeXLive::TeXCatalogue;
  my $texcat = TeXLive::TLTREE-&gt;new();

  $texcat-&gt;initialize();
  $texcat-&gt;beautify();
  $texcat-&gt;name();
  $texcat-&gt;license();
  $texcat-&gt;version();
  $texcat-&gt;caption();
  $texcat-&gt;description();
  $texcat-&gt;ctan();
  $texcat-&gt;texlive();
  $texcat-&gt;miktex();
  $texcat-&gt;docs();
  $texcat-&gt;entry();
  $texcat-&gt;alias();
  $texcat-&gt;also();
  $texcat-&gt;topics();
  $texcat-&gt;contact();
  $texcat-&gt;new(); 
  $texcat-&gt;initialize();
  $texcat-&gt;quest4texlive();
  $texcat-&gt;location();
  $texcat-&gt;entries();

=head1 DESCRIPTION

The L&lt;TeXLive::TeXCatalogue&gt; module provides access to the data stored
in the TeX Catalogue.

DOCUMENTATION MISSING, SORRY!!!

=cut

my $_parser = XML::Parser-&gt;new(
  ErrorContext =&gt; 2,
  ParseParamEnt =&gt; 1,
  NoLWP =&gt; 1
);

sub new {
  my $class = shift;
  my %params = @_;
  my $self = {
    ioref =&gt; $params{'ioref'},
    entry =&gt; defined($params{'entry'}) ? $params{'entry'} : {},
    docs =&gt; defined($params{'docs'}) ? $params{'docs'} : {},
    name =&gt; $params{'name'},
    caption =&gt; $params{'caption'},
    description =&gt; $params{'description'},
    license =&gt; $params{'license'},
    ctan =&gt; $params{'ctan'},
    texlive =&gt; $params{'texlive'},
    miktex =&gt; $params{'miktex'},
    version =&gt; $params{'version'},
    also =&gt; defined($params{'also'}) ? $params{'also'} : [],
    topic =&gt; defined($params{'topic'}) ? $params{'topic'} : [],
    alias =&gt; defined($params{'alias'}) ? $params{'alias'} : [],
    contact =&gt; defined($params{'contact'}) ? $params{'contact'} : {},
  };
  bless $self, $class;
  if (defined($self-&gt;{'ioref'})) {
    $self-&gt;initialize();
  }
  return $self;
}

sub initialize {
  my $self = shift;
  # parse all the files
  my $parser
    = new XML::XPath-&gt;new(ioref =&gt; $self-&gt;{'ioref'}, parser =&gt; $_parser)
      || die "Failed to parse given ioref";
  $self-&gt;{'entry'}{'id'} = $parser-&gt;findvalue('/entry/@id')-&gt;value();
  $self-&gt;{'entry'}{'date'} = $parser-&gt;findvalue('/entry/@datestamp')-&gt;value();
  $self-&gt;{'entry'}{'modder'} = $parser-&gt;findvalue('/entry/@modifier')-&gt;value();
  $self-&gt;{'name'} = $parser-&gt;findvalue("/entry/name")-&gt;value();
  $self-&gt;{'caption'} = beautify($parser-&gt;findvalue("/entry/caption")-&gt;value());
  $self-&gt;{'description'} = beautify($parser-&gt;findvalue("/entry/description")-&gt;value());
  # there can be multiple entries of licenses, collected them all
  # into one string
  my $licset = $parser-&gt;find('/entry/license');
  my @liclist;
  foreach my $node ($licset-&gt;get_nodelist) {
    my $lictype = $parser-&gt;find('./@type',$node);
    push @liclist, "$lictype";
  }
  $self-&gt;{'license'} = join(' ', @liclist);
  # was before
  # $self-&gt;{'license'} = $parser-&gt;findvalue('/entry/license/@type')-&gt;value();
  $self-&gt;{'version'} = Text::Unidecode::unidecode(
                          $parser-&gt;findvalue('/entry/version/@number')-&gt;value());
  $self-&gt;{'ctan'} = $parser-&gt;findvalue('/entry/ctan/@path')-&gt;value();
  if ($parser-&gt;findvalue('/entry/texlive/@location') ne "") {
    $self-&gt;{'texlive'} = $parser-&gt;findvalue('/entry/texlive/@location')-&gt;value();
  }
  if ($parser-&gt;findvalue('/entry/miktex/@location') ne "") {
    $self-&gt;{'miktex'} = $parser-&gt;findvalue('/entry/miktex/@location')-&gt;value();
  }
  # parse all alias entries
  my $alset = $parser-&gt;find('/entry/alias');
  for my $node ($alset-&gt;get_nodelist) {
    my $id = $parser-&gt;find('./@id', $node);
    push @{$self-&gt;{'alias'}}, "$id";
  }
  # parse the documentation entries
  my $docset = $parser-&gt;find('/entry/documentation');
  foreach my $node ($docset-&gt;get_nodelist) {
    my $docfileparse = $parser-&gt;find('./@href',$node);
    # convert to string
    my $docfile = "$docfileparse";
    # see comments at end of beautify()
    my $details
      = Text::Unidecode::unidecode($parser-&gt;find('./@details',$node));
    my $language = $parser-&gt;find('./@language',$node);
    $self-&gt;{'docs'}{$docfile}{'available'} = 1;
    if ($details) { $self-&gt;{'docs'}{$docfile}{'details'} = "$details"; }
    if ($language) { $self-&gt;{'docs'}{$docfile}{'language'} = "$language"; }
  }
  # parse the also entries
  foreach my $node ($parser-&gt;find('/entry/also')-&gt;get_nodelist) {
    my $alsoid = $parser-&gt;find('./@refid',$node);
    push @{$self-&gt;{'also'}}, "$alsoid";
  }
  # parse the contact entries
  foreach my $node ($parser-&gt;find('/entry/contact')-&gt;get_nodelist) {
    my $contacttype = $parser-&gt;findvalue('./@type',$node);
    my $contacthref = $parser-&gt;findvalue('./@href',$node);
    if ($contacttype &amp;&amp; $contacthref) {
      $self-&gt;{'contact'}{$contacttype} = $contacthref;
    }
  }
  # parse the keyval/topic entries
  foreach my $node ($parser-&gt;find('/entry/keyval')-&gt;get_nodelist) {
    my $k = $parser-&gt;findvalue('./@key',$node);
    my $v = $parser-&gt;findvalue('./@value',$node);
    # for now we only support evaluating the 'topic' key
    if ("$k" eq 'topic') {
      push @{$self-&gt;{'topic'}}, "$v";
    }
  }
}

sub beautify {
  my ($txt) = @_;
  # transliterate to ascii: it allows the final tlpdb to be pure ascii,
  # avoiding problems since we don't control the user's terminal encoding
  # Do first in case spaces are output by the transliteration.
  $txt = Text::Unidecode::unidecode($txt);
  #
  $txt =~ s/\n/ /g;  # make one line
  $txt =~ s/^\s+//g; # rm leading whitespace
  $txt =~ s/\s+$//g; # rm trailing whitespace
  $txt =~ s/\s\s+/ /g; # collapse multiple whitespace characters to one
  $txt =~ s/\t/ /g;    # tabs to spaces
  
  # one last bit of horribleness: there is one url in the descriptions
  # which is longer than our multilineformat format (in TLPOBJ). The
  # result is that it is forcibly broken. Apparently there is no way in
  # Perl to override that. This makes it impossible to get identical
  # longdesc results. Turns out that removing the "http://" prefix
  # shortens it enough to fit, so do that. The better solution would be
  # to use Text::Wrap or some other text-filling code, but going for
  # quick and dirty here.
  $txt =~ s,http://grants.nih.gov/,grants.nih.gov/,g;

  return $txt;
}

sub name {
  my $self = shift;
  if (@_) { $self-&gt;{'name'} = shift }
  return $self-&gt;{'name'};
}
sub license {
  my $self = shift;
  if (@_) { $self-&gt;{'license'} = shift }
  return $self-&gt;{'license'};
}
sub version {
  my $self = shift;
  if (@_) { $self-&gt;{'version'} = shift }
  return $self-&gt;{'version'};
}
sub caption {
  my $self = shift;
  if (@_) { $self-&gt;{'caption'} = shift }
  return $self-&gt;{'caption'};
}
sub description {
  my $self = shift;
  if (@_) { $self-&gt;{'description'} = shift }
  return $self-&gt;{'description'};
}
sub ctan {
  my $self = shift;
  if (@_) { $self-&gt;{'ctan'} = shift }
  return $self-&gt;{'ctan'};
}
sub texlive {
  my $self = shift;
  if (@_) { $self-&gt;{'texlive'} = shift }
  return $self-&gt;{'texlive'};
}
sub miktex {
  my $self = shift;
  if (@_) { $self-&gt;{'miktex'} = shift }
  return $self-&gt;{'miktex'};
}
sub docs {
  my $self = shift;
  my %newdocs = @_;
  if (@_) { $self-&gt;{'docs'} = \%newdocs }
  return $self-&gt;{'docs'};
}
sub entry {
  my $self = shift;
  my %newentry = @_;
  if (@_) { $self-&gt;{'entry'} = \%newentry }
  return $self-&gt;{'entry'};
}
sub alias {
  my $self = shift;
  my @newalias = @_;
  if (@_) { $self-&gt;{'alias'} = \@newalias }
  return $self-&gt;{'alias'};
}
sub also {
  my $self = shift;
  my @newalso = @_;
  if (@_) { $self-&gt;{'also'} = \@newalso }
  return $self-&gt;{'also'};
}
sub topics {
  my $self = shift;
  my @newtopics = @_;
  if (@_) { $self-&gt;{'topic'} = \@newtopics }
  return $self-&gt;{'topic'};
}
sub contact {
  my $self = shift;
  my %newcontact = @_;
  if (@_) { $self-&gt;{'contact'} = \%newcontact }
  return $self-&gt;{'contact'};
}


################################################################
#
# TeXLive::TeXCatalogue
#
################################################################
package TeXLive::TeXCatalogue;

sub new { 
  my $class = shift;
  my %params = @_;
  my $self = {
    location =&gt; $params{'location'},
    entries =&gt; defined($params{'entries'}) ? $params{'entries'} : {},
  };
  bless $self, $class;
  if (defined($self-&gt;{'location'})) {
    $self-&gt;initialize();
    $self-&gt;quest4texlive();
  }
  return $self;
}

sub initialize {
  my $self = shift;
  # chdir to the location of the DTD file, otherwise it cannot be found
  # furthermore we have to open the xml file from a file handle otherwise
  # the catalogue.dtd is searched in a/catalogue.dtd etc, see above
  my $cwd = `pwd`;
  chomp($cwd);
  chdir($self-&gt;{'location'} . "/entries")
  || die "chdir($self-&gt;{location}/entries failed: $!";
  # parse all the files
  foreach (glob("?/*.xml")) {
    # for debugging, nice to skip everything but: next unless /pst-node/;
    open(my $io,"&lt;$_") or die "open($_) failed: $!";
    our $tce;
    # the XML parser die's on malformed xml entries, so we catch
    # that and continue, simply skipping the entry
    eval { $tce = TeXLive::TeXCatalogue::Entry-&gt;new( 'ioref' =&gt; $io ); };
    if ($@) {
      warn "TeXCatalogue.pm:$_: cannot parse, skipping: $@\n";
      close($io);
      next;
    }
    close($io);
    $self-&gt;{'entries'}{lc($tce-&gt;{'entry'}{'id'})} = $tce;
  }
  chdir($cwd) || die ("Cannot change back to $cwd: $!");
}

# Copy every catalogue $entry under the name $entry-&gt;{'texlive'}
# if it makes sense.
# 
sub quest4texlive {
  my $self = shift;

  # The catalogue has a partial mapping from catalogue entries to
  # texlive packages: $id --&gt; $texcat-&gt;{$id}{'texlive'}
  my $texcat = $self-&gt;{'entries'};

  # Try to build the inverse mapping:
  my (%inv, %count);
  for my $id (keys %{$texcat}) {
    my $tl = $texcat-&gt;{$id}{'texlive'};
    if (defined($tl)) {
      $tl =~ s/^bin-//;
      $count{$tl}++;
      $inv{$tl} = $id;
    }
  }
  # Go through texlive names
  for my $name (keys %inv) {
    # If this name is free and there is only one corresponding catalogue
    # entry then copy the entry under this name
    if (!exists($texcat-&gt;{$name}) &amp;&amp; $count{$name} == 1) {
      $texcat-&gt;{$name} = $texcat-&gt;{$inv{$name}};
    }
  }
}

sub location {
  my $self = shift;
  if (@_) { $self-&gt;{'location'} = shift }
  return $self-&gt;{'location'};
}

sub entries {
  my $self = shift;
  my %newentries = @_;
  if (@_) { $self-&gt;{'entries'} = \%newentries }
  return $self-&gt;{'entries'};
}

1;
__END__

=head1 SEE ALSO

The other modules in C&lt;Master/tlpkg/TeXLive/&gt; (L&lt;TeXLive::TLConfig&gt; and
the rest), and the scripts in C&lt;Master/tlpkg/bin/&gt; (especially
C&lt;tl-update-tlpdb&gt;), the documentation in C&lt;Master/tlpkg/doc/&gt;, etc.

=head1 AUTHORS AND COPYRIGHT

This script and its documentation were written for the TeX Live
distribution (L&lt;https://tug.org/texlive&gt;) and both are licensed under the
GNU General Public License Version 2 or later.

=cut

### Local Variables:
### perl-indent-level: 2
### tab-width: 2
### indent-tabs-mode: nil
### End:
# vim:set tabstop=2 expandtab: #
</pre></body></html>