Code with Finding: |
class DocFieldProcessorPerThread { public DocumentsWriter.DocWriter processDocument() throws IOException {
consumer.startDocument(); fieldsWriter.startDocument();
final Document doc = docState.doc;
assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
fieldCount = 0; final int thisFieldGen = fieldGen++;
final List docFields = doc.getFields(); final int numDocFields = docFields.size();
// Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.):
for(int i=0;i<numDocFields;i++) { Fieldable field = (Fieldable) docFields.get(i); final String fieldName = field.name();
// Make sure we have a PerField allocated final int hashPos = fieldName.hashCode() & hashMask; DocFieldProcessorPerField fp = fieldHash[hashPos]; while(fp != null && !fp.fieldInfo.name.equals(fieldName)) fp = fp.next;
if (fp == null) {
// TODO FI: we need to genericize the "flags" that a // field holds, and, how these flags are merged; it // needs to be more "pluggable" such that if I want // to have a new "thing" my Fields can do, I can // easily add it FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTf());
fp = new DocFieldProcessorPerField(this, fi); fp.next = fieldHash[hashPos]; fieldHash[hashPos] = fp; totalFieldCount++;
if (totalFieldCount >= fieldHash.length/2) rehash(); } else fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTf());
if (thisFieldGen != fp.lastGen) {
// First time we're seeing this field for this doc fp.fieldCount = 0;
if (fieldCount == fields.length) { final int newSize = fields.length*2; DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; System.arraycopy(fields, 0, newArray, 0, fieldCount); fields = newArray; }
fields[fieldCount++] = fp; fp.lastGen = thisFieldGen; }
if (fp.fieldCount == fp.fields.length) { Fieldable[] newArray = new Fieldable[fp.fields.length*2]; System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount); fp.fields = newArray; }
fp.fields[fp.fieldCount++] = field; if (field.isStored()) { fieldsWriter.addField(field, fp.fieldInfo); } }
// If we are writing vectors then we must visit // fields in sorted order so they are written in // sorted order. TODO: we actually only need to // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. quickSort(fields, 0, fieldCount-1);
for(int i=0;i<fieldCount;i++) fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
if (docState.maxTermPrefix != null && docState.infoStream != null) docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument(); final DocumentsWriter.DocWriter two = consumer.finishDocument(); if (one == null) { return two; } else if (two == null) { return one; } else { PerDoc both = getPerDoc(); both.docID = docState.docID; assert one.docID == docState.docID; assert two.docID == docState.docID; both.one = one; both.two = two; return both; } }
}
|