<html>
  <head>
    <meta content="text/html; charset=ISO-8859-1"
      http-equiv="Content-Type">
  </head>
  <body bgcolor="#FFFFFF" text="#000000">
    Hi<br>
    <br>
    I had always thought stemming was made by Zebra, and only in
    english!<br>
    <br>
    In fact the algorithm for french language is here:<br>
    <a class="moz-txt-link-freetext" href="http://snowball.tartarus.org/algorithms/french/stemmer.html">http://snowball.tartarus.org/algorithms/french/stemmer.html</a><br>
    <br>
    <meta http-equiv="content-type" content="text/html;
      charset=ISO-8859-1">
    (Lingua::Stem::Snowball is a Perl interface to the C version of the
    Snowball stemmers)<br>
    <br>
    <br>
    Mathieu Saby<br>
    <br>
    <br>
    <br>
    <div class="moz-cite-prefix">Le 27/08/2014 10:22, David Cook a
      écrit :<br>
    </div>
    <blockquote
      cite="mid:010f01cfc1d0$0295e510$07c1af30$@prosentient.com.au"
      type="cite">
      <meta http-equiv="Content-Type" content="text/html;
        charset=ISO-8859-1">
      <meta name="Generator" content="Microsoft Word 15 (filtered
        medium)">
      <style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"Trebuchet MS";
        panose-1:2 11 6 3 2 2 2 2 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
p
        {mso-style-priority:99;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.moz-signature, li.moz-signature, div.moz-signature
        {mso-style-name:moz-signature;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:white;}
p.siginlibro, li.siginlibro, div.siginlibro
        {mso-style-name:sig_inlibro;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:11.0pt;
        font-family:"Trebuchet MS","sans-serif";
        color:#888888;}
p.sigcontent, li.sigcontent, div.sigcontent
        {mso-style-name:sig_content;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        background:#F6F6F6;
        border:none;
        padding:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.nom, li.nom, div.nom
        {mso-style-name:nom;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:#005B85;
        font-weight:bold;}
p.inlibro, li.inlibro, div.inlibro
        {mso-style-name:inlibro;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:#BFD13D;}
p.in, li.in, div.in
        {mso-style-name:in;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:14.5pt;
        font-family:"Times New Roman","serif";
        color:#BFD13D;}
p.libro, li.libro, div.libro
        {mso-style-name:libro;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:14.5pt;
        font-family:"Times New Roman","serif";
        color:#005B85;}
p.desc, li.desc, div.desc
        {mso-style-name:desc;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        margin-bottom:0cm;
        margin-left:0cm;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.small, li.small, div.small
        {mso-style-name:small;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:9.5pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.tagline, li.tagline, div.tagline
        {mso-style-name:tagline;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:#00BCE4;}
p.sigfooter, li.sigfooter, div.sigfooter
        {mso-style-name:sig_footer;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        background:#EEEFEA;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
span.nom1
        {mso-style-name:nom1;
        color:#005B85;
        font-weight:bold;}
span.tagline1
        {mso-style-name:tagline1;
        color:#00BCE4;}
span.in1
        {mso-style-name:in1;
        color:#BFD13D;}
span.libro1
        {mso-style-name:libro1;
        color:#005B85;}
span.EmailStyle33
        {mso-style-type:personal-reply;
        font-family:"Calibri","sans-serif";
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:1335691936;
        mso-list-template-ids:-1684797560;}
@list l0:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:36.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:72.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:"Courier New";
        mso-bidi-font-family:"Times New Roman";}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:108.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:144.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:180.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:216.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:252.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:288.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:324.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
ol
        {margin-bottom:0cm;}
ul
        {margin-bottom:0cm;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
      <div class="WordSection1">
        <p class="MsoNormal"><a moz-do-not-send="true"
            name="_MailEndCompose"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">Hi
              Francois:<o:p></o:p></span></a></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">I
            wrote an email earlier on my tablet, but not 100% sure if it
            got sent. In any case, I’m writing again now!<br>
            <br>
            You’ll want to look at C4::Search::_build_stemmed_operand().<o:p></o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">Zebra
            doesn’t actually do any stemming itself. If you read through
            the Zebra docs (if you’re masochistic), you’ll notice that
            they say explicitly that Zebra doesn’t do any stemming, but
            that you can do stemming (using a stemmer like Snowball)
            while building a query. That’s exactly what we do in Koha.<o:p></o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">The
            Perl module that does the stemming is
            Lingua::Stem::Snowball. <o:p></o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">However,
            you might notice that your query’s operands aren’t always
            stemmed properly. I haven’t looked in a while, but I think
            it’s because we don’t build our queries very well at all
            (when not using QueryParser). <o:p></o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">If
            you want to understand why you’re getting “skills” and
            “fishxsdfe” in your results, I would suggest running some
            tests ( using “Data::Dumper” and “warn” ) so that you can
            see your query as it is built.<o:p></o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">I
            have a lot of work I want to do on C4::Search::buildQuery,
            but just don’t have the time :/.<o:p></o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US">Unfortunately,
            at the moment, there is no stemming when using the
            QueryParser. However, fortunately, using
            Lingua::Stem::Snowball with QueryParser would be really
            really easy. I think that I’ve written a note on how to do
            that somewhere on Bugzilla or maybe on Trello…<br>
            <br>
            I hope that helps! Feel free to send me an email or shout at
            me on IRC if you want any clarification. I know I probably
            didn’t make it any clearer but hopefully this might help you
            on your path to understanding.<o:p></o:p></span></p>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <div>
          <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext">David
              Cook<o:p></o:p></span></p>
          <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext">Systems
              Librarian<o:p></o:p></span></p>
          <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext">Prosentient
              Systems<o:p></o:p></span></p>
          <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext">72/330
              Wattle St, Ultimo, NSW 2007<o:p></o:p></span></p>
        </div>
        <p class="MsoNormal"><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US"><o:p> </o:p></span></p>
        <div style="border:none;border-left:solid blue 1.5pt;padding:0cm
          0cm 0cm 4.0pt">
          <div>
            <div style="border:none;border-top:solid #E1E1E1
              1.0pt;padding:3.0pt 0cm 0cm 0cm">
              <p class="MsoNormal"><b><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext"
                    lang="EN-US">From:</span></b><span
style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext"
                  lang="EN-US">
                  <a class="moz-txt-link-abbreviated" href="mailto:koha-devel-bounces@lists.koha-community.org">koha-devel-bounces@lists.koha-community.org</a>
                  [<a class="moz-txt-link-freetext" href="mailto:koha-devel-bounces@lists.koha-community.org">mailto:koha-devel-bounces@lists.koha-community.org</a>] <b>On
                    Behalf Of </b>Francois Charbonnier<br>
                  <b>Sent:</b> Wednesday, 27 August 2014 2:09 AM<br>
                  <b>To:</b> <a class="moz-txt-link-abbreviated" href="mailto:koha-devel@lists.koha-community.org">koha-devel@lists.koha-community.org</a><br>
                  <b>Subject:</b> [Koha-devel] Stemming and zebra<o:p></o:p></span></p>
            </div>
          </div>
          <p class="MsoNormal"><o:p> </o:p></p>
          <p class="MsoNormal">Hello,<br>
            <br>
            I have tested the QueryStemming system preference on Koha
            3.14 (my local installation) and I'm wondering, does zebra
            just right truncate the words or is there an algorithm to
            find the stems?<br>
            <br>
            I use ICU and I have enabled "QueryWeightFields". I don't
            have automatic truncation or fuzzy search on. I use these
            words for my tests:<o:p></o:p></p>
          <ul type="disc">
            <li class="MsoNormal"
              style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
              level1 lfo1">ski, skiing, skills<o:p></o:p></li>
            <li class="MsoNormal"
              style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
              level1 lfo1">fish, fished, fishing, fisher, fishxsdfe<o:p></o:p></li>
          </ul>
          <p>Each time, with QueryStemming on, skills and fishxsdfe come
            out in the search results. Is it what I should expect?
            "Skills", maybe but "fishxsdfe"?<o:p></o:p></p>
          <p>Do you know how it works? or have a good example that would
            help me to understand?<o:p></o:p></p>
          <p>Thanks!<o:p></o:p></p>
          <div>
            <p class="MsoNormal">-- <o:p></o:p></p>
            <div>
              <div style="border-top:solid #DDDDDD
                1.5pt;border-left:none;border-bottom:solid #BFD13D
                1.5pt;border-right:none;padding:0cm 0cm 0cm 0cm">
                <p class="MsoNormal" style="background:#F6F6F6"><span
                    class="nom1"><span
                      style="font-size:11.0pt;font-family:"Trebuchet
                      MS","sans-serif"">François
                      Charbonnier,</span></span><span
                    style="font-size:11.0pt;font-family:"Trebuchet
                    MS","sans-serif";color:#888888"><br>
                  </span><span class="tagline1"><span
                      style="font-size:11.0pt;font-family:"Trebuchet
                      MS","sans-serif"">Bibl. prof. /
                      Chef de produits</span></span><span
                    style="font-size:11.0pt;font-family:"Trebuchet
                    MS","sans-serif";color:#888888"><o:p></o:p></span></p>
                <p class="desc" style="background:#F6F6F6"><span
                    style="font-size:11.0pt;font-family:"Trebuchet
                    MS","sans-serif";color:#888888">Tél.  :
                    (888) 604-2627<br>
                    <a moz-do-not-send="true"
                      href="mailto:francois.charbonnier@inLibro.com"><span
                        style="color:#005B85;text-decoration:none">francois.charbonnier@inLibro.com</span></a>
                    <o:p></o:p></span></p>
              </div>
              <div>
                <p class="MsoNormal" style="background:#EEEFEA"><span
                    class="in1"><span
                      style="font-size:13.0pt;font-family:"Trebuchet
                      MS","sans-serif"">in</span></span><span
                    class="libro1"><span
                      style="font-size:13.0pt;font-family:"Trebuchet
                      MS","sans-serif"">Libro</span></span><span
                    style="font-size:11.0pt;font-family:"Trebuchet
                    MS","sans-serif";color:#888888"> </span><span
                    class="tagline1"><span
                      style="font-size:11.0pt;font-family:"Trebuchet
                      MS","sans-serif"">| pour esprit
                      libre |</span></span><span
                    style="font-size:11.0pt;font-family:"Trebuchet
                    MS","sans-serif";color:#888888"> <a
                      moz-do-not-send="true"
                      href="http://www.inLibro.com"><span
                        style="font-size:9.0pt;color:#005B85;text-decoration:none">www.inLibro.com</span></a>
                    <o:p></o:p></span></p>
              </div>
            </div>
          </div>
        </div>
      </div>
      <br>
      <fieldset class="mimeAttachmentHeader"></fieldset>
      <br>
      <pre wrap="">_______________________________________________
Koha-devel mailing list
<a class="moz-txt-link-abbreviated" href="mailto:Koha-devel@lists.koha-community.org">Koha-devel@lists.koha-community.org</a>
<a class="moz-txt-link-freetext" href="http://lists.koha-community.org/cgi-bin/mailman/listinfo/koha-devel">http://lists.koha-community.org/cgi-bin/mailman/listinfo/koha-devel</a>
website : <a class="moz-txt-link-freetext" href="http://www.koha-community.org/">http://www.koha-community.org/</a>
git : <a class="moz-txt-link-freetext" href="http://git.koha-community.org/">http://git.koha-community.org/</a>
bugs : <a class="moz-txt-link-freetext" href="http://bugs.koha-community.org/">http://bugs.koha-community.org/</a></pre>
    </blockquote>
    <br>
  </body>
</html>