<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"><meta name=Generator content="Microsoft Word 15 (filtered medium)"><style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"Trebuchet MS";
        panose-1:2 11 6 3 2 2 2 2 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:purple;
        text-decoration:underline;}
p
        {mso-style-priority:99;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.moz-signature, li.moz-signature, div.moz-signature
        {mso-style-name:moz-signature;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:white;}
p.siginlibro, li.siginlibro, div.siginlibro
        {mso-style-name:sig_inlibro;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:11.0pt;
        font-family:"Trebuchet MS","sans-serif";
        color:#888888;}
p.sigcontent, li.sigcontent, div.sigcontent
        {mso-style-name:sig_content;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        background:#F6F6F6;
        border:none;
        padding:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.nom, li.nom, div.nom
        {mso-style-name:nom;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:#005B85;
        font-weight:bold;}
p.inlibro, li.inlibro, div.inlibro
        {mso-style-name:inlibro;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:#BFD13D;}
p.in, li.in, div.in
        {mso-style-name:in;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:14.5pt;
        font-family:"Times New Roman","serif";
        color:#BFD13D;}
p.libro, li.libro, div.libro
        {mso-style-name:libro;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:14.5pt;
        font-family:"Times New Roman","serif";
        color:#005B85;}
p.desc, li.desc, div.desc
        {mso-style-name:desc;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        margin-bottom:0cm;
        margin-left:0cm;
        margin-bottom:.0001pt;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.small, li.small, div.small
        {mso-style-name:small;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:9.5pt;
        font-family:"Times New Roman","serif";
        color:black;}
p.tagline, li.tagline, div.tagline
        {mso-style-name:tagline;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:#00BCE4;}
p.sigfooter, li.sigfooter, div.sigfooter
        {mso-style-name:sig_footer;
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        background:#EEEFEA;
        font-size:12.0pt;
        font-family:"Times New Roman","serif";
        color:black;}
span.nom1
        {mso-style-name:nom1;
        color:#005B85;
        font-weight:bold;}
span.tagline1
        {mso-style-name:tagline1;
        color:#00BCE4;}
span.in1
        {mso-style-name:in1;
        color:#BFD13D;}
span.libro1
        {mso-style-name:libro1;
        color:#005B85;}
span.EmailStyle33
        {mso-style-type:personal-reply;
        font-family:"Calibri","sans-serif";
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:1335691936;
        mso-list-template-ids:-1684797560;}
@list l0:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:36.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:72.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:"Courier New";
        mso-bidi-font-family:"Times New Roman";}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:108.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:144.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:180.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:216.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:252.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:288.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:324.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
ol
        {margin-bottom:0cm;}
ul
        {margin-bottom:0cm;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]--></head><body bgcolor=white lang=EN-AU link=blue vlink=purple><div class=WordSection1><p class=MsoNormal><a name="_MailEndCompose"><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>Hi Francois:<o:p></o:p></span></a></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>I wrote an email earlier on my tablet, but not 100% sure if it got sent. In any case, I’m writing again now!<br><br>You’ll want to look at C4::Search::_build_stemmed_operand().<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>Zebra doesn’t actually do any stemming itself. If you read through the Zebra docs (if you’re masochistic), you’ll notice that they say explicitly that Zebra doesn’t do any stemming, but that you can do stemming (using a stemmer like Snowball) while building a query. That’s exactly what we do in Koha.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>The Perl module that does the stemming is Lingua::Stem::Snowball. <o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>However, you might notice that your query’s operands aren’t always stemmed properly. I haven’t looked in a while, but I think it’s because we don’t build our queries very well at all (when not using QueryParser). <o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>If you want to understand why you’re getting “skills” and “fishxsdfe” in your results, I would suggest running some tests ( using “Data::Dumper” and “warn” ) so that you can see your query as it is built.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>I have a lot of work I want to do on C4::Search::buildQuery, but just don’t have the time :/.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'>Unfortunately, at the moment, there is no stemming when using the QueryParser. However, fortunately, using Lingua::Stem::Snowball with QueryParser would be really really easy. I think that I’ve written a note on how to do that somewhere on Bugzilla or maybe on Trello…<br><br>I hope that helps! Feel free to send me an email or shout at me on IRC if you want any clarification. I know I probably didn’t make it any clearer but hopefully this might help you on your path to understanding.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><div><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext'>David Cook<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext'>Systems Librarian<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext'>Prosentient Systems<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext'>72/330 Wattle St, Ultimo, NSW 2007<o:p></o:p></span></p></div><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext;mso-fareast-language:EN-US'><o:p> </o:p></span></p><div style='border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt'><div><div style='border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0cm 0cm 0cm'><p class=MsoNormal><b><span lang=EN-US style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext'>From:</span></b><span lang=EN-US style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:windowtext'> koha-devel-bounces@lists.koha-community.org [mailto:koha-devel-bounces@lists.koha-community.org] <b>On Behalf Of </b>Francois Charbonnier<br><b>Sent:</b> Wednesday, 27 August 2014 2:09 AM<br><b>To:</b> koha-devel@lists.koha-community.org<br><b>Subject:</b> [Koha-devel] Stemming and zebra<o:p></o:p></span></p></div></div><p class=MsoNormal><o:p> </o:p></p><p class=MsoNormal>Hello,<br><br>I have tested the QueryStemming system preference on Koha 3.14 (my local installation) and I'm wondering, does zebra just right truncate the words or is there an algorithm to find the stems?<br><br>I use ICU and I have enabled "QueryWeightFields". I don't have automatic truncation or fuzzy search on. I use these words for my tests:<o:p></o:p></p><ul type=disc><li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0 level1 lfo1'>ski, skiing, skills<o:p></o:p></li><li class=MsoNormal style='mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0 level1 lfo1'>fish, fished, fishing, fisher, fishxsdfe<o:p></o:p></li></ul><p>Each time, with QueryStemming on, skills and fishxsdfe come out in the search results. Is it what I should expect? "Skills", maybe but "fishxsdfe"?<o:p></o:p></p><p>Do you know how it works? or have a good example that would help me to understand?<o:p></o:p></p><p>Thanks!<o:p></o:p></p><div><p class=MsoNormal>-- <o:p></o:p></p><div><div style='border-top:solid #DDDDDD 1.5pt;border-left:none;border-bottom:solid #BFD13D 1.5pt;border-right:none;padding:0cm 0cm 0cm 0cm'><p class=MsoNormal style='background:#F6F6F6'><span class=nom1><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif"'>François Charbonnier,</span></span><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif";color:#888888'><br></span><span class=tagline1><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif"'>Bibl. prof. / Chef de produits</span></span><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif";color:#888888'><o:p></o:p></span></p><p class=desc style='background:#F6F6F6'><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif";color:#888888'>Tél.  : (888) 604-2627<br><a href="mailto:francois.charbonnier@inLibro.com"><span style='color:#005B85;text-decoration:none'>francois.charbonnier@inLibro.com</span></a> <o:p></o:p></span></p></div><div><p class=MsoNormal style='background:#EEEFEA'><span class=in1><span style='font-size:13.0pt;font-family:"Trebuchet MS","sans-serif"'>in</span></span><span class=libro1><span style='font-size:13.0pt;font-family:"Trebuchet MS","sans-serif"'>Libro</span></span><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif";color:#888888'> </span><span class=tagline1><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif"'>| pour esprit libre |</span></span><span style='font-size:11.0pt;font-family:"Trebuchet MS","sans-serif";color:#888888'> <a href="http://www.inLibro.com"><span style='font-size:9.0pt;color:#005B85;text-decoration:none'>www.inLibro.com</span></a> <o:p></o:p></span></p></div></div></div></div></div></body></html>