| 1 | <?php |
|---|
| 2 | // wcf imports |
|---|
| 3 | require_once(WCF_DIR.'lib/data/message/search/SearchEngine.class.php'); |
|---|
| 4 | require_once(WCF_DIR.'lib/data/solr/SolrService.php'); |
|---|
| 5 | |
|---|
| 6 | |
|---|
| 7 | /** |
|---|
| 8 | * |
|---|
| 9 | */ |
|---|
| 10 | class SolrBridge { |
|---|
| 11 | |
|---|
| 12 | /** |
|---|
| 13 | * |
|---|
| 14 | * @var array<Apache_Solr_Document> |
|---|
| 15 | */ |
|---|
| 16 | protected $documents = array(); |
|---|
| 17 | |
|---|
| 18 | /** |
|---|
| 19 | * |
|---|
| 20 | * @var SolrService |
|---|
| 21 | */ |
|---|
| 22 | protected $solr = null; |
|---|
| 23 | |
|---|
| 24 | protected static $typeids = null; |
|---|
| 25 | |
|---|
| 26 | /** |
|---|
| 27 | * |
|---|
| 28 | */ |
|---|
| 29 | public function __construct() { |
|---|
| 30 | |
|---|
| 31 | // load search type objects |
|---|
| 32 | SearchEngine::getSearchTypes(); |
|---|
| 33 | |
|---|
| 34 | $this->solr = new SolrService(); |
|---|
| 35 | } |
|---|
| 36 | |
|---|
| 37 | /** |
|---|
| 38 | * |
|---|
| 39 | */ |
|---|
| 40 | protected function commit() { |
|---|
| 41 | #$this->solr->addDocuments( $this->documents ); |
|---|
| 42 | #$this->solr->commit(); |
|---|
| 43 | |
|---|
| 44 | // mark items as done |
|---|
| 45 | $sql = "INSERT IGNORE INTO |
|---|
| 46 | wcf".WCF_N."_solr_index |
|---|
| 47 | (typeID, messageID) |
|---|
| 48 | VALUES "; |
|---|
| 49 | foreach($this->documents as $doc) { |
|---|
| 50 | $typeID = $this->getTypeID($doc->messageType); |
|---|
| 51 | $sql .= "($typeID,".$doc->messageID."),"; |
|---|
| 52 | } |
|---|
| 53 | $sql = rtrim($sql, ','); |
|---|
| 54 | $result = WCF::getDB()->sendQuery($sql); |
|---|
| 55 | |
|---|
| 56 | // reset array |
|---|
| 57 | $this->documents = array(); |
|---|
| 58 | |
|---|
| 59 | // optimize solr index |
|---|
| 60 | #$this->solr->optimize(); |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | /** |
|---|
| 64 | * |
|---|
| 65 | */ |
|---|
| 66 | protected function getTotals(array $types, $func) { |
|---|
| 67 | |
|---|
| 68 | $sql = ''; |
|---|
| 69 | foreach ($types as $type) { |
|---|
| 70 | |
|---|
| 71 | // get search type object |
|---|
| 72 | $doc = SearchEngine::$searchTypeObjects[$type]; |
|---|
| 73 | if (!$doc->isAccessible()) continue; |
|---|
| 74 | if (!empty($sql)) $sql .= "\nUNION\n"; |
|---|
| 75 | |
|---|
| 76 | // get field names |
|---|
| 77 | $messageIDFieldName = $doc->getIDFieldName(); |
|---|
| 78 | $messageIDFieldName = strpos($messageIDFieldName, '.') !== false ? $messageIDFieldName : "messageTable.".$messageIDFieldName; |
|---|
| 79 | |
|---|
| 80 | $sql .= "( |
|---|
| 81 | SELECT ".$func."(".$messageIDFieldName.") AS messageID, |
|---|
| 82 | '".$type."' AS messageType |
|---|
| 83 | FROM ".$doc->getTableName()." messageTable |
|---|
| 84 | ".$doc->getJoins()." |
|---|
| 85 | )"; |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | // send search query |
|---|
| 89 | $types = array(); |
|---|
| 90 | $result = WCF::getDB()->sendQuery($sql); |
|---|
| 91 | while ($row = WCF::getDB()->fetchArray($result)) { |
|---|
| 92 | $types[$row['messageType']] = $row['messageID']; |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | return $types; |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | private function cleanText($message) { |
|---|
| 99 | require_once(WCF_DIR.'lib/data/message/bbcode/MessageParser.class.php'); |
|---|
| 100 | |
|---|
| 101 | // add cache resources |
|---|
| 102 | WCF::getCache()->addResource('bbcodes', WCF_DIR.'cache/cache.bbcodes.php', WCF_DIR.'lib/system/cache/CacheBuilderBBCodes.class.php'); |
|---|
| 103 | WCF::getCache()->addResource('smileys', WCF_DIR.'cache/cache.smileys.php', WCF_DIR.'lib/system/cache/CacheBuilderSmileys.class.php'); |
|---|
| 104 | |
|---|
| 105 | $parser = MessageParser::getInstance(); |
|---|
| 106 | $parser->setOutputType('text/plain'); |
|---|
| 107 | $message = StringUtil::stripHTML($message); |
|---|
| 108 | return $parser->parse($message, false, false, true, false); |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | /** |
|---|
| 112 | * |
|---|
| 113 | * @return integer number of added documents |
|---|
| 114 | */ |
|---|
| 115 | public function loadDocuments($type, $min, $max, $limit) { |
|---|
| 116 | // get search type object |
|---|
| 117 | $doc = SearchEngine::$searchTypeObjects[$type]; |
|---|
| 118 | if (!$doc->isAccessible()) continue; |
|---|
| 119 | |
|---|
| 120 | // get field names |
|---|
| 121 | $messageIDFieldName = $doc->getIDFieldName(); |
|---|
| 122 | $messageIDFieldName = strpos($messageIDFieldName, '.') !== false ? $messageIDFieldName : "messageTable.".$messageIDFieldName; |
|---|
| 123 | $subjectFieldNames = $doc->getSubjectFieldNames(); |
|---|
| 124 | $messageFieldNames = $doc->getMessageFieldNames(); |
|---|
| 125 | $userIDFieldName = $doc->getUserIDFieldName(); |
|---|
| 126 | $usernameFieldName = $doc->getUsernameFieldName(); |
|---|
| 127 | $timeFieldName = $doc->getTimeFieldName(); |
|---|
| 128 | |
|---|
| 129 | $sql = "SELECT |
|---|
| 130 | '".$type."' AS messageType, |
|---|
| 131 | ".$messageIDFieldName." AS messageID, |
|---|
| 132 | CAST(messageTable.".reset($subjectFieldNames)." AS CHAR CHARACTER SET ".WCF::getDB()->getCharset().") AS subject, |
|---|
| 133 | CAST(messageTable.".reset($messageFieldNames)." AS CHAR CHARACTER SET ".WCF::getDB()->getCharset().") AS message, |
|---|
| 134 | ".$userIDFieldName." AS userID, |
|---|
| 135 | CAST(".$usernameFieldName." AS CHAR CHARACTER SET ".WCF::getDB()->getCharset().") AS username, |
|---|
| 136 | ".$timeFieldName." AS time |
|---|
| 137 | FROM ".$doc->getTableName()." messageTable |
|---|
| 138 | ".$doc->getJoins()." |
|---|
| 139 | WHERE ".$messageIDFieldName." BETWEEN $min AND $max |
|---|
| 140 | GROUP BY messageID |
|---|
| 141 | ORDER BY messageID ASC"; |
|---|
| 142 | |
|---|
| 143 | $result = WCF::getDB()->sendQuery($sql, $limit); |
|---|
| 144 | $i = 0; |
|---|
| 145 | while ($row = WCF::getDB()->fetchArray($result)) { |
|---|
| 146 | $row['message'] = $this->cleanText($row['message']); |
|---|
| 147 | $this->addDocument($row); |
|---|
| 148 | $i++; |
|---|
| 149 | } |
|---|
| 150 | return $i; |
|---|
| 151 | } |
|---|
| 152 | |
|---|
| 153 | /** |
|---|
| 154 | * |
|---|
| 155 | */ |
|---|
| 156 | protected function addDocument($fields) { |
|---|
| 157 | $part = new Apache_Solr_Document(); |
|---|
| 158 | foreach ( $fields as $key => $value ) { |
|---|
| 159 | if ( is_array( $value ) ) { |
|---|
| 160 | foreach ( $value as $deppval ) { |
|---|
| 161 | $part->setMultiValue( $key, $deppval ); |
|---|
| 162 | } |
|---|
| 163 | } |
|---|
| 164 | else { |
|---|
| 165 | $part->$key = $value; |
|---|
| 166 | } |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | $this->documents[] = $part; |
|---|
| 170 | } |
|---|
| 171 | |
|---|
| 172 | /** |
|---|
| 173 | * |
|---|
| 174 | */ |
|---|
| 175 | public function doCrawl($types = null, $limit = null) { |
|---|
| 176 | |
|---|
| 177 | // get types |
|---|
| 178 | $types = is_array($types) ? $types : $this->getSearchTypes(); |
|---|
| 179 | |
|---|
| 180 | $i = 0; |
|---|
| 181 | |
|---|
| 182 | foreach($this->getIndexStatus($types, 'MAX') as $type => $status) { |
|---|
| 183 | |
|---|
| 184 | // nothing to do? |
|---|
| 185 | if($status['total'] == $status['current']) { |
|---|
| 186 | continue; |
|---|
| 187 | } |
|---|
| 188 | // get search type object |
|---|
| 189 | $doc = SearchEngine::$searchTypeObjects[$type]; |
|---|
| 190 | if (!$doc->isAccessible()) continue; |
|---|
| 191 | |
|---|
| 192 | if (!isset(SearchEngine::$searchTypeObjects[$type])) { |
|---|
| 193 | throw new SystemException('unknown search type '.$type, 101001); |
|---|
| 194 | } |
|---|
| 195 | $j = $this->loadDocuments($type, $status['current'] + 1, $status['total'], $limit); |
|---|
| 196 | if($j) { |
|---|
| 197 | $i += $j; |
|---|
| 198 | |
|---|
| 199 | // write to solr |
|---|
| 200 | $this->commit(); |
|---|
| 201 | } |
|---|
| 202 | } |
|---|
| 203 | |
|---|
| 204 | return $i; |
|---|
| 205 | } |
|---|
| 206 | |
|---|
| 207 | private function getTypeID($type) { |
|---|
| 208 | |
|---|
| 209 | if(self::$typeids === null) { |
|---|
| 210 | self::$typeids = array(); |
|---|
| 211 | |
|---|
| 212 | $sql = 'SELECT * |
|---|
| 213 | FROM wcf'.WCF_N.'_searchable_message_type'; |
|---|
| 214 | $result = WCF::getDB()->sendQuery($sql); |
|---|
| 215 | while ($row = WCF::getDB()->fetchArray($result)) { |
|---|
| 216 | self::$typeids[$row['typeName']] = $row['typeID']; |
|---|
| 217 | } |
|---|
| 218 | } |
|---|
| 219 | return self::$typeids[$type]; |
|---|
| 220 | } |
|---|
| 221 | |
|---|
| 222 | private function getSearchTypes() { |
|---|
| 223 | $types = SearchEngine::getSearchTypes(); |
|---|
| 224 | $return = array(); |
|---|
| 225 | foreach($types as $type) { |
|---|
| 226 | $doc = SearchEngine::$searchTypeObjects[$type]; |
|---|
| 227 | if (!$doc->isAccessible()) continue; |
|---|
| 228 | |
|---|
| 229 | $return[] = $type; |
|---|
| 230 | } |
|---|
| 231 | return $return; |
|---|
| 232 | } |
|---|
| 233 | |
|---|
| 234 | /** |
|---|
| 235 | * |
|---|
| 236 | */ |
|---|
| 237 | public function getIndexStatus($types = null, $func = 'COUNT') { |
|---|
| 238 | |
|---|
| 239 | // read available types |
|---|
| 240 | $status = array(); |
|---|
| 241 | |
|---|
| 242 | // get types |
|---|
| 243 | $types = is_array($types) ? $types : $this->getSearchTypes(); |
|---|
| 244 | |
|---|
| 245 | // set counters to zero |
|---|
| 246 | foreach ($types as $type) { |
|---|
| 247 | $status[$type] = array( |
|---|
| 248 | 'current' => 0, |
|---|
| 249 | 'total' => 0, |
|---|
| 250 | 'percent' => 0, |
|---|
| 251 | ); |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | // read current status |
|---|
| 255 | $sql = 'SELECT typeName, |
|---|
| 256 | c |
|---|
| 257 | FROM ( |
|---|
| 258 | SELECT typeID, |
|---|
| 259 | '.$func.'(messageID) AS c |
|---|
| 260 | FROM wcf'.WCF_N.'_solr_index |
|---|
| 261 | GROUP BY typeID |
|---|
| 262 | ) x |
|---|
| 263 | INNER JOIN wcf'.WCF_N.'_searchable_message_type USING(typeID) |
|---|
| 264 | WHERE typeName IN ("'.implode('","', $types).'")'; |
|---|
| 265 | $result = WCF::getDB()->sendQuery($sql); |
|---|
| 266 | while ($row = WCF::getDB()->fetchArray($result)) { |
|---|
| 267 | $typeName = $row['typeName']; |
|---|
| 268 | $status[$typeName]['current'] = $row['c']; |
|---|
| 269 | } |
|---|
| 270 | |
|---|
| 271 | // read totals |
|---|
| 272 | foreach ($this->getTotals($types, $func) as $typeName => $count) { |
|---|
| 273 | $status[$typeName]['total'] = $count; |
|---|
| 274 | $status[$typeName]['percent'] = $count ? 100 / $count * $status[$typeName]['current'] : 0; |
|---|
| 275 | } |
|---|
| 276 | |
|---|
| 277 | return $status; |
|---|
| 278 | } |
|---|
| 279 | } |
|---|
| 280 | ?> |
|---|