Skip to content

Commit 4dfb9e7

Browse files
authored
Merge pull request #10241 from Recherche-Data-Gouv/10161-Http2SolrClient
Migration HttpSolrClient to Http2SolrClient and ConcurrentUpdateHttp2SolrClient
2 parents 5c2cf5b + 7cae923 commit 4dfb9e7

File tree

12 files changed

+275
-142
lines changed

12 files changed

+275
-142
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[HttpSolrClient](https://solr.apache.org/docs/9_4_1/solrj/org/apache/solr/client/solrj/impl/HttpSolrClient.html) is deprecated as of Solr 9, and which will be removed in a future major release of Solr. It's recommended to use [Http2SolrClient](https://solr.apache.org/docs/9_4_1/solrj/org/apache/solr/client/solrj/impl/Http2SolrClient.html) instead.
2+
3+
[Solr documentation](https://solr.apache.org/guide/solr/latest/deployment-guide/solrj.html#types-of-solrclients) describe it as a _async, non-blocking and general-purpose client that leverage HTTP/2 using the Jetty Http library_.
4+
5+
With Solr 9.4.1, the Http2SolrClient is indicate as experimental. But since the 9.6 version of Solr, this mention is no longer maintained.
6+
7+
The ConcurrentUpdateHttp2SolrClient is now also used in some cases, which is supposed to be more efficient for indexing.
8+
9+
For more information, see issue [#10161](https://github.com/IQSS/dataverse/issues/10161) and pull request [#10241](https://github.com/IQSS/dataverse/pull/10241)

doc/sphinx-guides/source/installation/config.rst

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3356,9 +3356,6 @@ please find all known feature flags below. Any of these flags can be activated u
33563356
* - reduce-solr-deletes
33573357
- Avoids deleting and recreating solr documents for dataset files when reindexing.
33583358
- ``Off``
3359-
* - reduce-solr-deletes
3360-
- Avoids deleting and recreating solr documents for dataset files when reindexing.
3361-
- ``Off``
33623359
* - disable-return-to-author-reason
33633360
- Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call.
33643361
- ``Off``

src/main/java/edu/harvard/iq/dataverse/DatasetPage.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@
163163
import edu.harvard.iq.dataverse.util.FileMetadataUtil;
164164
import java.util.Comparator;
165165
import org.apache.solr.client.solrj.SolrQuery;
166-
import org.apache.solr.client.solrj.impl.HttpSolrClient;
166+
import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException;
167167
import org.apache.solr.client.solrj.response.FacetField;
168168
import org.apache.solr.client.solrj.response.QueryResponse;
169169
import org.apache.solr.common.SolrDocument;
@@ -1041,7 +1041,7 @@ public Set<Long> getFileIdsInVersionFromSolr(Long datasetVersionId, String patte
10411041

10421042
try {
10431043
queryResponse = solrClientService.getSolrClient().query(solrQuery);
1044-
} catch (HttpSolrClient.RemoteSolrException ex) {
1044+
} catch (RemoteSolrException ex) {
10451045
logger.fine("Remote Solr Exception: " + ex.getLocalizedMessage());
10461046
String msg = ex.getLocalizedMessage();
10471047
if (msg.contains(SearchFields.FILE_DELETED)) {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package edu.harvard.iq.dataverse.search;
2+
3+
import java.io.IOException;
4+
import java.util.logging.Logger;
5+
6+
import org.apache.solr.client.solrj.SolrClient;
7+
8+
import edu.harvard.iq.dataverse.settings.JvmSettings;
9+
import edu.harvard.iq.dataverse.util.SystemConfig;
10+
import jakarta.ejb.EJB;
11+
12+
/**
13+
* Generics methods for Solr clients implementations
14+
*
15+
* @author jeromeroucou
16+
*/
17+
public abstract class AbstractSolrClientService {
18+
private static final Logger logger = Logger.getLogger(AbstractSolrClientService.class.getCanonicalName());
19+
20+
@EJB
21+
SystemConfig systemConfig;
22+
23+
public abstract void init();
24+
public abstract void close();
25+
public abstract SolrClient getSolrClient();
26+
public abstract void setSolrClient(SolrClient solrClient);
27+
28+
public void close(SolrClient solrClient) {
29+
if (solrClient != null) {
30+
try {
31+
solrClient.close();
32+
} catch (IOException e) {
33+
logger.warning("Solr closing error: " + e);
34+
}
35+
solrClient = null;
36+
}
37+
}
38+
39+
public void reInitialize() {
40+
close();
41+
init();
42+
}
43+
44+
public String getSolrUrl() {
45+
// Get from MPCONFIG. Might be configured by a sysadmin or simply return the
46+
// default shipped with resources/META-INF/microprofile-config.properties.
47+
final String protocol = JvmSettings.SOLR_PROT.lookup();
48+
final String path = JvmSettings.SOLR_PATH.lookup();
49+
return protocol + "://" + this.systemConfig.getSolrHostColonPort() + path;
50+
}
51+
}

src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,34 @@
11
package edu.harvard.iq.dataverse.search;
22

3-
import edu.harvard.iq.dataverse.*;
3+
import edu.harvard.iq.dataverse.ControlledVocabularyValue;
4+
import edu.harvard.iq.dataverse.DataFile;
5+
import edu.harvard.iq.dataverse.DataFileServiceBean;
6+
import edu.harvard.iq.dataverse.DataFileTag;
7+
import edu.harvard.iq.dataverse.Dataset;
8+
import edu.harvard.iq.dataverse.DatasetField;
9+
import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
10+
import edu.harvard.iq.dataverse.DatasetFieldConstant;
11+
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
12+
import edu.harvard.iq.dataverse.DatasetFieldType;
13+
import edu.harvard.iq.dataverse.DatasetFieldValue;
14+
import edu.harvard.iq.dataverse.DatasetFieldValueValidator;
15+
import edu.harvard.iq.dataverse.DatasetLinkingServiceBean;
16+
import edu.harvard.iq.dataverse.DatasetServiceBean;
17+
import edu.harvard.iq.dataverse.DatasetVersion;
418
import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
19+
import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean;
20+
import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
21+
import edu.harvard.iq.dataverse.Dataverse;
22+
import edu.harvard.iq.dataverse.DataverseLinkingServiceBean;
23+
import edu.harvard.iq.dataverse.DataverseServiceBean;
24+
import edu.harvard.iq.dataverse.DvObject;
525
import edu.harvard.iq.dataverse.DvObject.DType;
26+
import edu.harvard.iq.dataverse.DvObjectServiceBean;
27+
import edu.harvard.iq.dataverse.Embargo;
28+
import edu.harvard.iq.dataverse.FileMetadata;
29+
import edu.harvard.iq.dataverse.GlobalId;
30+
import edu.harvard.iq.dataverse.PermissionServiceBean;
31+
import edu.harvard.iq.dataverse.Retention;
632
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
733
import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean;
834
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
@@ -48,8 +74,6 @@
4874
import java.util.logging.Logger;
4975
import java.util.regex.Pattern;
5076
import java.util.stream.Collectors;
51-
import jakarta.annotation.PostConstruct;
52-
import jakarta.annotation.PreDestroy;
5377
import jakarta.ejb.AsyncResult;
5478
import jakarta.ejb.Asynchronous;
5579
import jakarta.ejb.EJB;
@@ -66,11 +90,9 @@
6690

6791
import org.apache.commons.io.IOUtils;
6892
import org.apache.commons.lang3.StringUtils;
69-
import org.apache.solr.client.solrj.SolrClient;
7093
import org.apache.solr.client.solrj.SolrQuery;
7194
import org.apache.solr.client.solrj.SolrQuery.SortClause;
7295
import org.apache.solr.client.solrj.SolrServerException;
73-
import org.apache.solr.client.solrj.impl.HttpSolrClient;
7496
import org.apache.solr.client.solrj.response.QueryResponse;
7597
import org.apache.solr.client.solrj.response.UpdateResponse;
7698
import org.apache.solr.common.SolrDocument;
@@ -125,16 +147,15 @@ public class IndexServiceBean {
125147
@EJB
126148
SettingsServiceBean settingsService;
127149
@EJB
128-
SolrClientService solrClientService;
150+
SolrClientService solrClientService; // only for query index on Solr
151+
@EJB
152+
SolrClientIndexService solrClientIndexService; // only for add, update, or remove index on Solr
129153
@EJB
130154
DataFileServiceBean dataFileService;
131155

132156
@EJB
133157
VariableServiceBean variableService;
134-
135-
@EJB
136-
IndexBatchServiceBean indexBatchService;
137-
158+
138159
@EJB
139160
DatasetFieldServiceBean datasetFieldService;
140161

@@ -157,37 +178,10 @@ public class IndexServiceBean {
157178
private static final String IN_REVIEW_STRING = "In Review";
158179
private static final String DEACCESSIONED_STRING = "Deaccessioned";
159180
public static final String HARVESTED = "Harvested";
160-
private String rootDataverseName;
161181
private Dataverse rootDataverseCached;
162-
SolrClient solrServer;
163182

164183
private VariableMetadataUtil variableMetadataUtil;
165184

166-
@PostConstruct
167-
public void init() {
168-
// Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with
169-
// resources/META-INF/microprofile-config.properties.
170-
String protocol = JvmSettings.SOLR_PROT.lookup();
171-
String path = JvmSettings.SOLR_PATH.lookup();
172-
173-
String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path;
174-
solrServer = new HttpSolrClient.Builder(urlString).build();
175-
176-
rootDataverseName = findRootDataverseCached().getName();
177-
}
178-
179-
@PreDestroy
180-
public void close() {
181-
if (solrServer != null) {
182-
try {
183-
solrServer.close();
184-
} catch (IOException e) {
185-
logger.warning("Solr closing error: " + e);
186-
}
187-
solrServer = null;
188-
}
189-
}
190-
191185
@TransactionAttribute(REQUIRES_NEW)
192186
public Future<String> indexDataverseInNewTransaction(Dataverse dataverse) throws SolrServerException, IOException{
193187
return indexDataverse(dataverse, false);
@@ -326,7 +320,7 @@ public Future<String> indexDataverse(Dataverse dataverse, boolean processPaths)
326320
String status;
327321
try {
328322
if (dataverse.getId() != null) {
329-
solrClientService.getSolrClient().add(docs);
323+
solrClientIndexService.getSolrClient().add(docs);
330324
} else {
331325
logger.info("WARNING: indexing of a dataverse with no id attempted");
332326
}
@@ -1745,7 +1739,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
17451739
final SolrInputDocuments docs = toSolrDocs(indexableDataset, datafilesInDraftVersion);
17461740

17471741
try {
1748-
solrClientService.getSolrClient().add(docs.getDocuments());
1742+
solrClientIndexService.getSolrClient().add(docs.getDocuments());
17491743
} catch (SolrServerException | IOException ex) {
17501744
if (ex.getCause() instanceof SolrServerException) {
17511745
throw new SolrServerException(ex);
@@ -2007,7 +2001,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc
20072001

20082002
sid.removeField(SearchFields.SUBTREE);
20092003
sid.addField(SearchFields.SUBTREE, paths);
2010-
UpdateResponse addResponse = solrClientService.getSolrClient().add(sid);
2004+
UpdateResponse addResponse = solrClientIndexService.getSolrClient().add(sid);
20112005
if (object.isInstanceofDataset()) {
20122006
for (DataFile df : dataset.getFiles()) {
20132007
solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString()));
@@ -2020,7 +2014,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc
20202014
}
20212015
sid.removeField(SearchFields.SUBTREE);
20222016
sid.addField(SearchFields.SUBTREE, paths);
2023-
addResponse = solrClientService.getSolrClient().add(sid);
2017+
addResponse = solrClientIndexService.getSolrClient().add(sid);
20242018
}
20252019
}
20262020
}
@@ -2062,7 +2056,7 @@ public String delete(Dataverse doomed) {
20622056
logger.fine("deleting Solr document for dataverse " + doomed.getId());
20632057
UpdateResponse updateResponse;
20642058
try {
2065-
updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId());
2059+
updateResponse = solrClientIndexService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId());
20662060
} catch (SolrServerException | IOException ex) {
20672061
return ex.toString();
20682062
}
@@ -2082,7 +2076,7 @@ public String removeSolrDocFromIndex(String doomed) {
20822076
logger.fine("deleting Solr document: " + doomed);
20832077
UpdateResponse updateResponse;
20842078
try {
2085-
updateResponse = solrClientService.getSolrClient().deleteById(doomed);
2079+
updateResponse = solrClientIndexService.getSolrClient().deleteById(doomed);
20862080
} catch (SolrServerException | IOException ex) {
20872081
return ex.toString();
20882082
}
@@ -2285,7 +2279,7 @@ public List<String> findPermissionsInSolrOnly() throws SearchException {
22852279
boolean done = false;
22862280
while (!done) {
22872281
q.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
2288-
QueryResponse rsp = solrServer.query(q);
2282+
QueryResponse rsp = solrClientService.getSolrClient().query(q);
22892283
String nextCursorMark = rsp.getNextCursorMark();
22902284
logger.fine("Next cursor mark (1K entries): " + nextCursorMark);
22912285
SolrDocumentList list = rsp.getResults();
@@ -2367,7 +2361,7 @@ private List<String> findDvObjectInSolrOnly(String type) throws SearchException
23672361
solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
23682362
QueryResponse rsp = null;
23692363
try {
2370-
rsp = solrServer.query(solrQuery);
2364+
rsp = solrClientService.getSolrClient().query(solrQuery);
23712365
} catch (SolrServerException | IOException ex) {
23722366
throw new SearchException("Error searching Solr type: " + type, ex);
23732367

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package edu.harvard.iq.dataverse.search;
2+
3+
import java.util.logging.Logger;
4+
5+
import org.apache.solr.client.solrj.SolrClient;
6+
import org.apache.solr.client.solrj.impl.ConcurrentUpdateHttp2SolrClient;
7+
import org.apache.solr.client.solrj.impl.Http2SolrClient;
8+
9+
import jakarta.annotation.PostConstruct;
10+
import jakarta.annotation.PreDestroy;
11+
import jakarta.ejb.Singleton;
12+
import jakarta.inject.Named;
13+
14+
/**
15+
* Solr client to provide insert/update/delete operations.
16+
* Don't use this service with queries to Solr, use {@link SolrClientService} instead.
17+
*/
18+
@Named
19+
@Singleton
20+
public class SolrClientIndexService extends AbstractSolrClientService {
21+
22+
private static final Logger logger = Logger.getLogger(SolrClientIndexService.class.getCanonicalName());
23+
24+
private SolrClient solrClient;
25+
26+
@PostConstruct
27+
public void init() {
28+
solrClient = new ConcurrentUpdateHttp2SolrClient.Builder(
29+
getSolrUrl(), new Http2SolrClient.Builder().build()).build();
30+
}
31+
32+
@PreDestroy
33+
public void close() {
34+
close(solrClient);
35+
}
36+
37+
public SolrClient getSolrClient() {
38+
// Should never happen - but?
39+
if (solrClient == null) {
40+
init();
41+
}
42+
return solrClient;
43+
}
44+
45+
public void setSolrClient(SolrClient solrClient) {
46+
this.solrClient = solrClient;
47+
}
48+
49+
}

0 commit comments

Comments
 (0)