|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.knowceans.corpus.xpert.XptDb2Corpus
public final class XptDb2Corpus
Field Summary | |
---|---|
private static java.util.HashMap<java.lang.Integer,XptAuthor> |
allAuthors
|
private static java.util.Vector<XptDocument> |
allDocs
|
private static IgdCategories |
cat
|
private java.sql.Connection |
con
|
private static XptDocument |
curDoc
|
private static boolean |
inSentence
|
private static java.io.Writer |
out
|
private static java.lang.String |
prevWord
|
private static Stemmer |
stem
|
private static StopWordFilter |
stop
|
boolean |
useBigrams
|
boolean |
useStemming
|
Constructor Summary | |
---|---|
XptDb2Corpus(java.lang.String modelFile)
get properties for indexing and create database connection. |
Method Summary | |
---|---|
void |
configure(boolean useStemming,
boolean useBigrams)
configure the db reader. |
java.lang.String |
convertToEntities(java.lang.String in)
|
java.lang.String |
convertToUnicode(java.lang.String in)
|
XptAuthor |
getAuthorData(int id)
returns the list of authors. |
java.util.Vector<java.lang.Integer> |
getAuthorIds()
gets all valid author ids from the database. |
XptDocument |
getProjectData(int pid)
returns a single ProjectRec for the id. |
java.util.Vector<java.lang.Integer> |
getProjectIds()
gets all valid project ids from the database. |
static void |
main(java.lang.String[] args)
|
private void |
parseText(java.lang.String s,
java.util.Vector<java.lang.String> words)
Parse the given text and add terms to the model. |
private void |
read()
|
private java.lang.String |
removePunct(java.lang.String s)
Remove all punctuation |
private AuthorTermCorpus |
toCorpus()
|
private void |
write(AuthorTermCorpus f,
java.lang.String corpusname)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
private java.sql.Connection con
private static java.io.Writer out
private static XptDocument curDoc
private static boolean inSentence
private static StopWordFilter stop
public boolean useStemming
public boolean useBigrams
private static java.lang.String prevWord
private static IgdCategories cat
private static Stemmer stem
private static java.util.Vector<XptDocument> allDocs
private static java.util.HashMap<java.lang.Integer,XptAuthor> allAuthors
Constructor Detail |
---|
public XptDb2Corpus(java.lang.String modelFile)
Method Detail |
---|
public static void main(java.lang.String[] args)
public void configure(boolean useStemming, boolean useBigrams)
useStemming
- use stemminguseBigrams
- use bigramsprivate void read()
private void write(AuthorTermCorpus f, java.lang.String corpusname)
private AuthorTermCorpus toCorpus()
public java.util.Vector<java.lang.Integer> getProjectIds()
public java.util.Vector<java.lang.Integer> getAuthorIds()
public XptDocument getProjectData(int pid)
pid
-
java.sql.SQLException
public XptAuthor getAuthorData(int id)
pid
-
java.sql.SQLException
private void parseText(java.lang.String s, java.util.Vector<java.lang.String> words)
s
- private java.lang.String removePunct(java.lang.String s)
s
-
public java.lang.String convertToEntities(java.lang.String in)
public java.lang.String convertToUnicode(java.lang.String in)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |