Feature List
boolean inEntry;
boolean allWordsCapitalized;
boolean inLink;
boolean inMetaSection;
boolean inTitle;
boolean inURL;
// queryFreq is currently unused
double termFreq, docFreq, queryFreq;
// docLength is a word count, phraseLength is a character count
double docLength, phraseLength;
// posInDoc is the ratio of (word position / document word count)
double posInDoc;
// this is number of phrases that are a (non-proper) subset of this phrase
double numberOfSubPhrases;
// probablity that this is a keyphrase (output)
double probability;
Back to top