|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.knowceans.dirichlet.atm.AtmQueryClient
public class AtmQueryClient
LdaQueryClient is the central class for querying lda parameter sets. The type of supported queries is full-text, document and term similarities.
TODO: handle reset of lda model before / after sampling
TODO: handle parametrisation
TODO: if the index is split, which largely improves scalability, handle low-frequency terms: if a term is not in the lda index and for instance has a document frequency of 1, its topics could defined to be the topics of that document. For a mindf>2 and a low-frequency terms with df>1, a document could be sampled. This would, however, need an inverted index.
Field Summary | |
---|---|
private java.lang.String |
atmbase
|
private AtmGibbsQuerySampler |
atmq
|
private AtmTopicSimilarities |
atmt
|
private AmqCorpus |
corpus
|
private java.lang.String |
corpusbase
|
private ExtLdaConfiguration |
ldac
|
private double |
maxdistance
|
private int |
maxresults
|
private AtmMarkovState |
mcmc
|
private double |
minlikelihood
|
private boolean |
usePredLikelihood
|
Constructor Summary | |
---|---|
AtmQueryClient(java.lang.String corpusbase,
java.lang.String ldabase)
|
Method Summary | |
---|---|
private void |
authorQueries(java.lang.String file,
java.lang.String[] queries)
Find authors for the queries array. |
void |
describeAuthor(int author)
|
void |
describeCorpus()
Show the topic distribution in the corpus |
void |
describeTerm(int term)
|
void |
describeTopics(double[] pzx)
|
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getAuthorResults(double[] topics)
Get a list of terms that matches the topics. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getAuthorResults(int[] words)
Get a list of documents that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getAuthorResults(java.lang.String query)
Get a list of documents that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getAuthorSimilarities(int author)
Get similar documents to this one |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getAuthorTermSimilarities(int doc)
Get similar terms to arg document |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermAuthorSimilarities(int term)
Get similar terms to arg document |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermResults(double[] topics)
Get a list of documents that matches the topics. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermResults(int[] words)
Get a list of terms that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermResults(java.lang.String query)
Get a list of terms that matches the query. |
java.util.List<org.knowceans.map.IndexRanking.IndexEntry> |
getTermSimilarities(int term)
Get similar terms to this one |
double[] |
getTopics(int[] query)
Get the topics for the query sampled from the model. |
int[] |
getWords(java.lang.String query)
Get the query as indices of terms. |
private void |
init()
Initialise the query client. |
private static java.lang.String[] |
load(java.lang.String file)
Load a file with queries. |
static void |
main(java.lang.String[] args)
|
private void |
test(java.lang.String[] queries)
|
private void |
test2()
Test document and term similarities |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
private java.lang.String atmbase
private java.lang.String corpusbase
private AtmMarkovState mcmc
private ExtLdaConfiguration ldac
private AtmGibbsQuerySampler atmq
private AmqCorpus corpus
private AtmTopicSimilarities atmt
private int maxresults
private double maxdistance
private double minlikelihood
private boolean usePredLikelihood
Constructor Detail |
---|
public AtmQueryClient(java.lang.String corpusbase, java.lang.String ldabase)
Method Detail |
---|
public static void main(java.lang.String[] args)
private void init()
public int[] getWords(java.lang.String query)
query
-
public double[] getTopics(int[] query)
query
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermResults(java.lang.String query)
query
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermResults(int[] words)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermResults(double[] topics)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getAuthorResults(java.lang.String query)
query
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getAuthorResults(int[] words)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getAuthorResults(double[] topics)
words
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getAuthorSimilarities(int author)
author
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermSimilarities(int term)
doc
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getAuthorTermSimilarities(int doc)
doc
-
public java.util.List<org.knowceans.map.IndexRanking.IndexEntry> getTermAuthorSimilarities(int term)
doc
-
public void describeCorpus()
public void describeAuthor(int author)
public void describeTerm(int term)
public void describeTopics(double[] pzx)
private void test(java.lang.String[] queries)
private void test2()
private static java.lang.String[] load(java.lang.String file)
file
-
private void authorQueries(java.lang.String file, java.lang.String[] queries)
file
- queries
-
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |