Code with Finding: |
class DocFieldProcessorPerThread {
public DocumentsWriter.DocWriter processDocument() throws IOException {
consumer.startDocument();
fieldsWriter.startDocument();
final Document doc = docState.doc;
assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
fieldCount = 0;
final int thisFieldGen = fieldGen++;
final List docFields = doc.getFields();
final int numDocFields = docFields.size();
// Absorb any new fields first seen in this document.
// Also absorb any changes to fields we had already
// seen before (eg suddenly turning on norms or
// vectors, etc.):
for(int i=0;i<numDocFields;i++) {
Fieldable field = (Fieldable) docFields.get(i);
final String fieldName = field.name();
// Make sure we have a PerField allocated
final int hashPos = fieldName.hashCode() & hashMask;
DocFieldProcessorPerField fp = fieldHash[hashPos];
while(fp != null && !fp.fieldInfo.name.equals(fieldName))
fp = fp.next;
if (fp == null) {
// TODO FI: we need to genericize the "flags" that a
// field holds, and, how these flags are merged; it
// needs to be more "pluggable" such that if I want
// to have a new "thing" my Fields can do, I can
// easily add it
FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTf());
fp = new DocFieldProcessorPerField(this, fi);
fp.next = fieldHash[hashPos];
fieldHash[hashPos] = fp;
totalFieldCount++;
if (totalFieldCount >= fieldHash.length/2)
rehash();
} else
fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getOmitTf());
if (thisFieldGen != fp.lastGen) {
// First time we're seeing this field for this doc
fp.fieldCount = 0;
if (fieldCount == fields.length) {
final int newSize = fields.length*2;
DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
System.arraycopy(fields, 0, newArray, 0, fieldCount);
fields = newArray;
}
fields[fieldCount++] = fp;
fp.lastGen = thisFieldGen;
}
if (fp.fieldCount == fp.fields.length) {
Fieldable[] newArray = new Fieldable[fp.fields.length*2];
System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);
fp.fields = newArray;
}
fp.fields[fp.fieldCount++] = field;
if (field.isStored()) {
fieldsWriter.addField(field, fp.fieldInfo);
}
}
// If we are writing vectors then we must visit
// fields in sorted order so they are written in
// sorted order. TODO: we actually only need to
// sort the subset of fields that have vectors
// enabled; we could save [small amount of] CPU
// here.
quickSort(fields, 0, fieldCount-1);
for(int i=0;i<fieldCount;i++)
fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
if (docState.maxTermPrefix != null && docState.infoStream != null)
docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
final DocumentsWriter.DocWriter two = consumer.finishDocument();
if (one == null) {
return two;
} else if (two == null) {
return one;
} else {
PerDoc both = getPerDoc();
both.docID = docState.docID;
assert one.docID == docState.docID;
assert two.docID == docState.docID;
both.one = one;
both.two = two;
return both;
}
}
}
|