|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.knowceans.dirichlet.lda.LdaQueryClient
public class LdaQueryClient
LdaQueryClient is the central class for querying lda parameter sets. The type of supported queries is full-text, document and term similarities.
TODO: handle reset of lda model before / after sampling
TODO: handle parametrisation
TODO: if the index is split, which largely improves scalability, handle low-frequency terms: if a term is not in the lda index and for instance has a document frequency of 1, its topics could defined to be the topics of that document. For a mindf>2 and a low-frequency terms with df>1, a document could be sampled. This would, however, need an inverted index.
Field Summary | |
---|---|
private TermCorpus |
corpus
|
private java.lang.String |
corpusbase
|
private java.lang.String |
ldabase
|
private ExtLdaConfiguration |
ldac
|
private LdaGibbsQuerySampler |
ldaq
|
private LdaTopicSimilarities |
ldat
|
private double |
maxdistance
|
private int |
maxresults
|
private LdaMarkovState |
mcmc
|
private double |
minlikelihood
|
private boolean |
usePredLikelihood
|
Constructor Summary | |
---|---|
LdaQueryClient(java.lang.String corpusbase,
java.lang.String ldabase)
|
Method Summary | |
---|---|
void |
describeCorpus()
Show the topic distribution in the corpus |
void |
describeDoc(int doc)
|
void |
describeTerm(int term)
|
void |
describeTopics(double[] pzx)
|
private void |
docQueries(java.lang.String file,
java.lang.String[] queries)
Find documents for the queries array. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getDocResults(double[] topics)
Get a list of terms that matches the topics. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getDocResults(int[] words)
Get a list of documents that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getDocResults(java.lang.String query)
Get a list of documents that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getDocSimilarities(int doc)
Get similar documents to this one |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getDocTermSimilarities(int doc)
Get similar terms to arg document |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermDocSimilarities(int term)
Get similar terms to arg document |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermResults(double[] topics)
Get a list of documents that matches the topics. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermResults(int[] words)
Get a list of terms that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermResults(java.lang.String query)
Get a list of terms that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermSimilarities(int term)
Get similar terms to this one |
double[] |
getTopics(int[] query)
Get the topics for the query sampled from the model. |
int[] |
getWords(java.lang.String query)
Get the query as indices of terms. |
private void |
init()
Initialise the query client. |
private static java.lang.String[] |
load(java.lang.String file)
Load a file with queries. |
static void |
main(java.lang.String[] args)
|
private void |
test(java.lang.String[] queries)
|
private void |
test2()
Test document and term similarities |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
private java.lang.String ldabase
private java.lang.String corpusbase
private LdaMarkovState mcmc
private ExtLdaConfiguration ldac
private LdaGibbsQuerySampler ldaq
private TermCorpus corpus
private LdaTopicSimilarities ldat
private int maxresults
private double maxdistance
private double minlikelihood
private boolean usePredLikelihood
Constructor Detail |
---|
public LdaQueryClient(java.lang.String corpusbase, java.lang.String ldabase)
Method Detail |
---|
public static void main(java.lang.String[] args)
private void init()
public int[] getWords(java.lang.String query)
query
-
public double[] getTopics(int[] query)
query
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermResults(java.lang.String query)
query
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermResults(int[] words)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermResults(double[] topics)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getDocResults(java.lang.String query)
query
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getDocResults(int[] words)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getDocResults(double[] topics)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getDocSimilarities(int doc)
doc
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermSimilarities(int term)
doc
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getDocTermSimilarities(int doc)
doc
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermDocSimilarities(int term)
doc
-
public void describeCorpus()
public void describeDoc(int doc)
public void describeTerm(int term)
public void describeTopics(double[] pzx)
private void test(java.lang.String[] queries)
private void test2()
private static java.lang.String[] load(java.lang.String file)
file
-
private void docQueries(java.lang.String file, java.lang.String[] queries)
file
- queries
-
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |