| 1 | <?php |
|---|
| 2 | |
|---|
| 3 | /** |
|---|
| 4 | * |
|---|
| 5 | */ |
|---|
| 6 | class SolrBridge { |
|---|
| 7 | |
|---|
| 8 | /** |
|---|
| 9 | * |
|---|
| 10 | * @var array<Apache_Solr_Document> |
|---|
| 11 | */ |
|---|
| 12 | protected $documents = array(); |
|---|
| 13 | |
|---|
| 14 | /** |
|---|
| 15 | * |
|---|
| 16 | * @var SolrService |
|---|
| 17 | */ |
|---|
| 18 | protected $solr = null; |
|---|
| 19 | |
|---|
| 20 | /** |
|---|
| 21 | * |
|---|
| 22 | */ |
|---|
| 23 | public function __construct() { |
|---|
| 24 | |
|---|
| 25 | // load search type objects |
|---|
| 26 | SearchEngne::getSearchTypes(); |
|---|
| 27 | |
|---|
| 28 | $path = parse_url(SOLR_URL); |
|---|
| 29 | $this->solr = new SolrService($path['host'], isset($path['port']) ? $path['port'] : 80, $path['path']); |
|---|
| 30 | } |
|---|
| 31 | |
|---|
| 32 | /** |
|---|
| 33 | * |
|---|
| 34 | */ |
|---|
| 35 | protected function commit() { |
|---|
| 36 | $this->solr->addDocuments( $this->documents ); |
|---|
| 37 | $this->solr->commit(); |
|---|
| 38 | |
|---|
| 39 | // mark items as done |
|---|
| 40 | $sql = "INSERT IGNORE INTO |
|---|
| 41 | wcf".WCF_N."_solr_index |
|---|
| 42 | (typeName, messageID)"; |
|---|
| 43 | foreach($this->documents as $doc) { |
|---|
| 44 | $sql .= ""; //TODO: mark as done |
|---|
| 45 | } |
|---|
| 46 | $result = WCF::getDB()->sendQuery($sql); |
|---|
| 47 | |
|---|
| 48 | // reset array |
|---|
| 49 | $this->documents = array(); |
|---|
| 50 | |
|---|
| 51 | $this->solr->optimize(); |
|---|
| 52 | } |
|---|
| 53 | |
|---|
| 54 | /** |
|---|
| 55 | * |
|---|
| 56 | */ |
|---|
| 57 | protected function getTotals(array $types) { |
|---|
| 58 | |
|---|
| 59 | $sql = ''; |
|---|
| 60 | foreach ($types as $type) { |
|---|
| 61 | |
|---|
| 62 | // get search type object |
|---|
| 63 | $doc = SearchEngine::$searchTypeObjects[$type]; |
|---|
| 64 | if (!$doc->isAccessible()) continue; |
|---|
| 65 | if (!empty($sql)) $sql .= "\nUNION\n"; |
|---|
| 66 | |
|---|
| 67 | // get field names |
|---|
| 68 | $messageIDFieldName = $doc->getIDFieldName(); |
|---|
| 69 | $messageIDFieldName = strpos($messageIDFieldName, '.') !== false ? $messageIDFieldName : "messageTable.".$messageIDFieldName; |
|---|
| 70 | |
|---|
| 71 | $sql .= "( |
|---|
| 72 | SELECT MAX(".$messageIDFieldName.") AS messageID, |
|---|
| 73 | '".$type."' AS messageType |
|---|
| 74 | FROM ".$doc->getTableName()." messageTable |
|---|
| 75 | ".$doc->getJoins()." |
|---|
| 76 | WHERE 1 |
|---|
| 77 | ".(!empty($conditions[$type]) ? " ".(!empty($q) ? "AND" : "")." (".$conditions[$type].")" : "")." |
|---|
| 78 | GROUP BY messageID |
|---|
| 79 | )"; |
|---|
| 80 | } |
|---|
| 81 | |
|---|
| 82 | // send search query |
|---|
| 83 | $types = array(); |
|---|
| 84 | $result = WCF::getDB()->sendQuery($sql, $limit); |
|---|
| 85 | while ($row = WCF::getDB()->fetchArray($result)) { |
|---|
| 86 | $types[$row['messageType']] = $row['messageID']; |
|---|
| 87 | } |
|---|
| 88 | |
|---|
| 89 | return $types; |
|---|
| 90 | } |
|---|
| 91 | |
|---|
| 92 | /** |
|---|
| 93 | * |
|---|
| 94 | */ |
|---|
| 95 | public function loadDocuments($type, $min, $max) { |
|---|
| 96 | // get search type object |
|---|
| 97 | $doc = SearchEngine::$searchTypeObjects[$type]; |
|---|
| 98 | if (!$doc->isAccessible()) continue; |
|---|
| 99 | |
|---|
| 100 | // get field names |
|---|
| 101 | $messageIDFieldName = $doc->getIDFieldName(); |
|---|
| 102 | $messageIDFieldName = strpos($messageIDFieldName, '.') !== false ? $messageIDFieldName : "messageTable.".$messageIDFieldName; |
|---|
| 103 | $subjectFieldNames = $doc->getSubjectFieldNames(); |
|---|
| 104 | $messageFieldNames = $doc->getMessageFieldNames(); |
|---|
| 105 | $userIDFieldName = $doc->getUserIDFieldName(); |
|---|
| 106 | $usernameFieldName = $doc->getUsernameFieldName(); |
|---|
| 107 | $timeFieldName = $doc->getTimeFieldName(); |
|---|
| 108 | |
|---|
| 109 | $sql = "SELECT |
|---|
| 110 | '".$type."' AS messageType, |
|---|
| 111 | ".$messageIDFieldName." AS messageID, |
|---|
| 112 | CAST(messageTable.".reset($subjectFieldNames)." AS CHAR CHARACTER SET ".WCF::getDB()->getCharset().") AS subject, |
|---|
| 113 | CAST(messageTable.".reset($messageFieldNames)." AS CHAR CHARACTER SET ".WCF::getDB()->getCharset().") AS message, |
|---|
| 114 | ".$userIDFieldName." AS userID, |
|---|
| 115 | CAST(".$usernameFieldName." AS CHAR CHARACTER SET ".WCF::getDB()->getCharset().") AS username, |
|---|
| 116 | ".$timeFieldName." AS time |
|---|
| 117 | FROM ".$doc->getTableName()." messageTable |
|---|
| 118 | ".$doc->getJoins()." |
|---|
| 119 | WHERE ".$messageIDFieldName." BETWEEN $min AND $max |
|---|
| 120 | ".(!empty($conditions[$type]) ? " ".(!empty($q) ? "AND" : "")." (".$conditions[$type].")" : "")." |
|---|
| 121 | GROUP BY messageID"; |
|---|
| 122 | |
|---|
| 123 | $result = WCF::getDB()->sendQuery($sql, $limit); |
|---|
| 124 | while ($row = WCF::getDB()->fetchArray($result)) { |
|---|
| 125 | $this->addDocument($row); |
|---|
| 126 | } |
|---|
| 127 | } |
|---|
| 128 | |
|---|
| 129 | /** |
|---|
| 130 | * |
|---|
| 131 | */ |
|---|
| 132 | protected function addDocument($fields) { |
|---|
| 133 | $part = new Apache_Solr_Document(); |
|---|
| 134 | foreach ( $fields as $key => $value ) { |
|---|
| 135 | if ( is_array( $value ) ) { |
|---|
| 136 | foreach ( $value as $deppval ) { |
|---|
| 137 | $part->setMultiValue( $key, $deppval ); |
|---|
| 138 | } |
|---|
| 139 | } |
|---|
| 140 | else { |
|---|
| 141 | $part->$key = $value; |
|---|
| 142 | } |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | $this->documents[] = $part; |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | /** |
|---|
| 149 | * |
|---|
| 150 | */ |
|---|
| 151 | public function doCrawl($types, $limit) { |
|---|
| 152 | foreach($this->getIndexStatus($types) as $typeName => $status) { |
|---|
| 153 | |
|---|
| 154 | // nothing to do? |
|---|
| 155 | if($row['total'] == $row['current']) { |
|---|
| 156 | continue; |
|---|
| 157 | } |
|---|
| 158 | |
|---|
| 159 | if (!isset(SearchEngine::$searchTypeObjects[$type])) { |
|---|
| 160 | throw new SystemException('unknown search type '.$type, 101001); |
|---|
| 161 | } |
|---|
| 162 | |
|---|
| 163 | $this->loadDocuments($row['current'] + 1, min($row['total'], $row['current'] + 1 + $limit)); |
|---|
| 164 | |
|---|
| 165 | // write to solr |
|---|
| 166 | $this->commit(); |
|---|
| 167 | } |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | /** |
|---|
| 171 | * |
|---|
| 172 | */ |
|---|
| 173 | public function getIndexStatus($types = null) { |
|---|
| 174 | |
|---|
| 175 | // read available types |
|---|
| 176 | $status = array(); |
|---|
| 177 | |
|---|
| 178 | // get types |
|---|
| 179 | $types = is_array($types) ? $types : SearchEngine::getSearchTypes(); |
|---|
| 180 | |
|---|
| 181 | // set counters to zero |
|---|
| 182 | foreach ($types as $type) { |
|---|
| 183 | $status[$type] = array( |
|---|
| 184 | 'current' => 0, |
|---|
| 185 | 'total' => 0, |
|---|
| 186 | ); |
|---|
| 187 | } |
|---|
| 188 | |
|---|
| 189 | // read current status |
|---|
| 190 | |
|---|
| 191 | while ($row = WCF::getDB()->fetchArray($result)) { |
|---|
| 192 | $typeName = $row['typeName']; |
|---|
| 193 | $status[$typeName]['current'] = $row['messageID']; |
|---|
| 194 | } |
|---|
| 195 | |
|---|
| 196 | // read totals |
|---|
| 197 | foreach ($this->getTotals($type) as $typeName => $total) { |
|---|
| 198 | $status[$typeName]['total'] = $row['messageID']; |
|---|
| 199 | } |
|---|
| 200 | |
|---|
| 201 | return $messages; |
|---|
| 202 | } |
|---|
| 203 | } |
|---|
| 204 | ?> |
|---|