Skip to content

Commit 4e1e063

Browse files
Fixed ChEBI Converter (recent chebi.obo differs from that in the past in e.g. how SMILES and InChI values are there defined).
Added/updated JSONLD converter related tests (paxtools-jsonld is back to jena v3 from v4/v5 which generated quite different output/errors). Migrated from weird jakarta.json.Json* to jackson in BasicController.
1 parent 290e622 commit 4e1e063

File tree

10 files changed

+618
-61
lines changed

10 files changed

+618
-61
lines changed

src/main/java/cpath/converter/ChebiOboConverter.java

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ class ChebiOboConverter extends BaseConverter
4242
private final String _NAME = "name: ";
4343
private final String _DEF = "def: ";
4444
private final String _SYNONYM = "synonym: ";
45+
private final String _PROPERTY_VALUE = "property_value: "; //new fields in recent chebi.obo
4546

46-
//to extract a text value between quotation marks from 'def:' and 'synonym:' lines:
47+
//to extract a text value between quotation marks in the 'def:', 'synonym:', 'property_value:' lines:
4748
private final Pattern namePattern = Pattern.compile("\"(.+?)\"");
4849
//to extract ID, DB values from 'xref:' lines:
4950
//(since ChEBI OBO format has been slightly changed in 2017, pattern was updated)
@@ -95,6 +96,9 @@ else if (line.startsWith(_ALT_ID)) {
9596
else if (line.startsWith(_SYNONYM)) {
9697
updateMapEntry(chebiEntryMap, _SYNONYM, line);
9798
}
99+
else if (line.startsWith(_PROPERTY_VALUE)) {
100+
updateMapEntry(chebiEntryMap, _PROPERTY_VALUE, line);
101+
}
98102
else if (line.startsWith(_XREF)) {
99103
updateMapEntry(chebiEntryMap, _XREF, line);
100104
}
@@ -168,32 +172,68 @@ private void buildSmallMoleculeReference(Model model, Map<String, String> chebiE
168172
String name = matcher.group(1);
169173
if (sy.contains("IUPAC_NAME")) {
170174
smr.setStandardName(name);
171-
} else if (sy.contains("InChIKey")) {
175+
} else if (sy.contains("InChIKey")) { //not the case with new chebi.obo - moved to _PROPERTY_VALUE, e.g.: property_value: http://purl.obolibrary.org/obo/chebi/inchikey "CRPUJAZIXJMDBK-DTWKUNHWSA-N" xsd:string
172176
if (name.startsWith("InChIKey=")) {
173177
//exclude the prefix
174178
name = name.substring(9);
175179
}
176180
//add RX because a InChIKey can map to several CHEBI IDs
177181
RelationshipXref rx = CPathUtils
178-
.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "InChIKey", name, model);
182+
.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "InChIKey", name, model);
179183
smr.addXref(rx);
180184
} else if (sy.contains("InChI=")) {
181185
String structureUri = Normalizer
182-
.uri(xmlBase, null, name, ChemicalStructure.class);
186+
.uri(xmlBase, null, name, ChemicalStructure.class);
183187
ChemicalStructure structure = (ChemicalStructure) model.getByID(structureUri);
184188
if (structure == null) {
185189
structure = model.addNew(ChemicalStructure.class, structureUri);
186190
structure.setStructureFormat(StructureFormatType.InChI);
187191
structure.setStructureData(name); //contains "InChI=" prefix
188192
}
189193
smr.setStructure(structure);
190-
} else if (sy.contains("FORMULA")) {
194+
} else if (sy.contains("FORMULA")) {//in new chebi.obo, this also moved from synonym:... to e.g. property_value: http://purl.obolibrary.org/obo/chebi/formula "C10H16" xsd:string
191195
smr.setChemicalFormula(name);
192196
smr.addName(name); //helps to map/search by name
193-
} else if (sy.contains("MASS")) {
197+
} else if (sy.contains("MASS")) {//in new chebi.obo, this also moved from synonym:... to e.g. property_value: http://purl.obolibrary.org/obo/chebi/mass "136.23404" xsd:string
194198
smr.setMolecularWeight(Float.parseFloat(name));
195199
} else {
196-
smr.addName(name); //incl. for SMILES
200+
smr.addName(name); //incl. for SMILES (in older chebi.obo data)
201+
}
202+
}
203+
}
204+
205+
//use property_value data to add names, structure, formula, InChIKey rel.xref, if the field is present
206+
final String propvals = chebiEntryMap.get(_PROPERTY_VALUE);
207+
if(propvals != null && !propvals.isEmpty()) {
208+
String[] entries = propvals.split("\t");
209+
for (String sy : entries) {
210+
Matcher matcher = namePattern.matcher(sy);
211+
if (!matcher.find()) {
212+
throw new IllegalStateException("Pattern failed to find a quoted text within: " + sy);
213+
}
214+
String name = matcher.group(1);
215+
if (sy.contains("purl.obolibrary.org/obo/chebi/inchikey")) {//e.g.: property_value: http://purl.obolibrary.org/obo/chebi/inchikey "CRPUJAZIXJMDBK-DTWKUNHWSA-N" xsd:string
216+
if (name.startsWith("InChIKey=")) {
217+
name = name.substring(9);
218+
}
219+
RelationshipXref rx = CPathUtils.findOrCreateRelationshipXref(RelTypeVocab.IDENTITY, "InChIKey", name, model);
220+
smr.addXref(rx);
221+
} else if (sy.contains("InChI=")) { //e.g. http://purl.obolibrary.org/obo/chebi/inchi "InChI=1S/C10H16/c1-7-8-4-5-9(6-8)10(7,2)3/h8-9H,1,4-6H2,2-3H3/t8-,9+/m0/s1" xsd:string
222+
String structureUri = Normalizer.uri(xmlBase, null, name, ChemicalStructure.class);
223+
ChemicalStructure structure = (ChemicalStructure) model.getByID(structureUri);
224+
if (structure == null) {
225+
structure = model.addNew(ChemicalStructure.class, structureUri);
226+
structure.setStructureFormat(StructureFormatType.InChI);
227+
structure.setStructureData(name); //keep "InChI=" prefix
228+
}
229+
smr.setStructure(structure);
230+
} else if (sy.contains("purl.obolibrary.org/obo/chebi/formula")) {//e.g. property_value: http://purl.obolibrary.org/obo/chebi/formula "C10H16" xsd:string
231+
smr.setChemicalFormula(name);
232+
smr.addName(name); //helps to map/search by name
233+
} else if (sy.contains("purl.obolibrary.org/obo/chebi/mass")) {//e.g. property_value: http://purl.obolibrary.org/obo/chebi/mass "136.23404" xsd:string
234+
smr.setMolecularWeight(Float.parseFloat(name));
235+
} else if (sy.contains("purl.obolibrary.org/obo/chebi/smiles")) {//e.g. property_value: http://purl.obolibrary.org/obo/chebi/smiles "CC1(C)[C@@H]2CC[C@@H](C2)C1=C" xsd:string
236+
smr.addName(name);
197237
}
198238
}
199239
}

src/main/java/cpath/service/BiopaxConverter.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.biopax.paxtools.io.*;
1919
import org.biopax.paxtools.model.*;
2020
import org.biopax.paxtools.model.level3.Provenance;
21+
import org.biopax.paxtools.normalizer.ConfigurableIDFetcher;
2122
import org.biopax.paxtools.pattern.miner.*;
2223
import org.biopax.paxtools.pattern.util.Blacklist;
2324
import org.slf4j.Logger;

src/main/java/cpath/web/BasicController.java

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
import java.util.Arrays;
1010
import java.util.Set;
1111

12-
import jakarta.json.Json;
13-
import jakarta.json.JsonObjectBuilder;
12+
import com.fasterxml.jackson.databind.ObjectMapper;
13+
import com.fasterxml.jackson.databind.node.ObjectNode;
1414
import jakarta.servlet.http.HttpServletRequest;
1515
import jakarta.servlet.http.HttpServletResponse;
1616

@@ -45,6 +45,8 @@ public abstract class BasicController
4545

4646
protected Service service;
4747

48+
protected ObjectMapper jsonObjectMapper = new ObjectMapper();
49+
4850
@Autowired
4951
public void setService(Service service) {
5052
this.service = service;
@@ -186,55 +188,24 @@ final BufferedImage scaleImage(BufferedImage img, int width, int height)
186188
return newImage;
187189
}
188190

189-
190-
/*
191-
* Extracts the client's IP from the request headers.
192-
*/
193-
// private static String clientIpAddress(HttpServletRequest request)
194-
// {
195-
// String ip = request.getHeader("X-Forwarded-For");
196-
// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) {
197-
// ip = request.getHeader("Proxy-Client-IP");
198-
// }
199-
// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) {
200-
// ip = request.getHeader("WL-Proxy-Client-IP");
201-
// }
202-
// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) {
203-
// ip = request.getHeader("HTTP_CLIENT_IP");
204-
// }
205-
// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) {
206-
// ip = request.getHeader("HTTP_X_FORWARDED_FOR");
207-
// }
208-
// if (ip == null || ip.isEmpty() || "unknown".equalsIgnoreCase(ip)) {
209-
// ip = request.getRemoteAddr();
210-
// }
211-
// return ip;
212-
// }
213-
214191
void audit(HttpServletRequest request, ServiceQuery command, Set<String> providers, ErrorResponse err)
215192
{
216-
JsonObjectBuilder jb = Json.createObjectBuilder();
217-
218-
//get user-agent, IP, status, etc. from nginx/apache logs instead of here in the app...
219-
// jb.add("ip", clientIpAddress(request));
193+
ObjectNode root = jsonObjectMapper.createObjectNode();
220194

221195
if (err != null) {
222-
jb.add("error", err.toString());
196+
root.put("error", err.toString());
223197
}
224198

225199
if(command != null) {
226-
// TODO: change if there is any use (now we just add truncated string, not json object here
227-
//(can be very large if many URIs or SIF patterns are submitted in the request)
228-
jb.add("query", StringUtils.truncate(command.toString(),128));
200+
//truncate as it can be very large when many URIs or SIF patterns were submitted with the request
201+
root.put("query", StringUtils.truncate(command.toString(),128));
229202
}
230203

231204
if (!CollectionUtils.isEmpty(providers)) {
232-
jb.add("pro", Json.createArrayBuilder(providers));
205+
root.put("pro", jsonObjectMapper.valueToTree(providers));
233206
}
234207

235-
// jb.add("accept", request.getHeader("Accept"));
236-
237-
log.info(jb.build().toString());
208+
log.info(root.toString());
238209
}
239210

240211
}

src/main/resources/metadata.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"datasources": [
55
{
66
"dataUrl": "classpath:test_uniprot_data.dat.zip",
7-
"identifier": "TEST_UNIPROT",
7+
"identifier": "TESTUNIPROT",
88
"homepageUrl": "http://www.uniprot.org",
99
"name": [
1010
"UniProt"
@@ -18,13 +18,13 @@
1818
},
1919
{
2020
"dataUrl": "classpath:chebi.obo.zip",
21-
"identifier": "TEST_CHEBI",
21+
"identifier": "TESTCHEBI",
2222
"homepageUrl": "https://www.ebi.ac.uk/chebi/",
2323
"name": [
2424
"ChEBI"
2525
],
2626
"converterClass": "cpath.converter.ChebiOboConverter",
27-
"description": "Test ChEBI OBO",
27+
"description": "Test ChEBI OBO (extracted from chebi.obo release v231 and modified for integration/merge tests!)",
2828
"iconUrl": "https://pathwaycommons.github.io/cpath2/logos/chebi.png",
2929
"availability": "free",
3030
"type": "WAREHOUSE",

src/test/java/cpath/converter/ChebiConvertersTest.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public void convertObo() throws IOException {
6262

6363
// check some props
6464
assertEquals("(S)-lactic acid", smallMoleculeReference.getDisplayName());
65-
assertEquals(13, smallMoleculeReference.getName().size()); //now includes Wikipedia, SMILES(CHEBI) names
65+
assertEquals(13, smallMoleculeReference.getName().size()); //includes Wikipedia, SMILES names
6666
assertEquals("C3H6O3", smallMoleculeReference.getChemicalFormula());
6767
int relationshipXrefCount = 0;
6868
int unificationXrefCount = 0;
@@ -78,27 +78,29 @@ public void convertObo() throws IOException {
7878

7979
// following checks work in this test only (using in-memory model); with DAO - use getObject...
8080
assertTrue(model.containsID("http://bioregistry.io/chebi:20"));
81-
EntityReference er20 = (EntityReference) model.getByID("http://bioregistry.io/chebi:20");
81+
SmallMoleculeReference er20 = (SmallMoleculeReference) model.getByID("http://bioregistry.io/chebi:20");
8282
assertTrue(model.containsID("http://bioregistry.io/chebi:28"));
83-
// EntityReference er28 = (EntityReference) model.getByID("http://bioregistry.io/chebi:28");
8483
assertTrue(model.containsID("http://bioregistry.io/chebi:422"));
8584
EntityReference er422 = (EntityReference) model.getByID("http://bioregistry.io/chebi:422");
8685

8786
assertTrue(er20.getMemberEntityReferenceOf().isEmpty());
87+
assertNotNull(er20.getStructure());
88+
assertTrue(StringUtils.isNotBlank(er20.getChemicalFormula()));
89+
8890
assertTrue(er422.getMemberEntityReferenceOf().isEmpty());
89-
assertTrue(model.containsID("RX_chebi_CHEBI_422_multiple_parent_reference"));
9091

91-
// check new elements (created by the OBO converter) exist in the model;
92-
// (particularly, these assertions are important to test within the persistent model (DAO) session)
92+
assertTrue(model.containsID("RX_chebi_CHEBI_422_multiple_parent_reference"));
9393
assertTrue(model.containsID("RX_chebi_CHEBI_20_see-also"));
9494
assertTrue(model.containsID("RX_chebi_CHEBI_422_see-also"));
9595

96-
9796
//after refactoring, make sure there are no CHEBI:* xref.id anymore (only unprefixed)
9897
model.getObjects(Xref.class).stream().filter(x -> StringUtils.contains(x.getUri(),"chebi"))
9998
.forEach(x -> assertTrue(StringUtils.containsIgnoreCase(x.getId(),"CHEBI:")));
100-
10199
model.getObjects(Xref.class).stream().filter(x -> StringUtils.contains(x.getUri(),"chebi"))
102100
.forEach(x -> assertEquals("chebi", x.getDb()));
101+
102+
SmallMoleculeReference smr28 = (SmallMoleculeReference) model.getByID("http://bioregistry.io/chebi:28");
103+
assertNotNull(smr28);
104+
assertEquals(8, smr28.getXref().size());
103105
}
104106
}

src/test/java/cpath/service/BiopaxConverterTest.java

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,23 @@
55
import cpath.service.api.OutputFormat;
66
import cpath.service.jaxb.DataResponse;
77
import cpath.service.jaxb.ServiceResponse;
8+
import org.biopax.paxtools.io.SimpleIOHandler;
89
import org.biopax.paxtools.model.BioPAXLevel;
910
import org.biopax.paxtools.model.Model;
1011
import org.biopax.paxtools.model.level3.Pathway;
12+
import org.junit.jupiter.api.Assertions;
1113
import org.junit.jupiter.api.Test;
1214

15+
import java.io.IOException;
16+
import java.net.URI;
17+
import java.nio.file.Files;
18+
import java.nio.file.Paths;
19+
1320

1421
public class BiopaxConverterTest {
1522

1623
@Test
17-
public final void testToJsonLd() {
24+
public final void testToJsonld() {
1825
Model m = BioPAXLevel.L3.getDefaultFactory().createModel();
1926
m.setXmlBase("http://pathwaycommons.org/pc2/"); //Jena fails if not set!
2027
Pathway bpe = m.addNew(Pathway.class, "http://pathwaycommons.org/pc2/Pathway_test-URI");
@@ -25,4 +32,25 @@ public final void testToJsonLd() {
2532
assertFalse(sr.isEmpty());
2633
}
2734

35+
@Test
36+
public final void testDemo1ToJsonld() throws IOException {
37+
Model m = new SimpleIOHandler().convertFromOWL(getClass().getResourceAsStream("/demo-pathway.owl"));
38+
ServiceResponse sr = new BiopaxConverter(null).convert(m, OutputFormat.JSONLD, null);
39+
40+
assertTrue(sr instanceof DataResponse && !sr.isEmpty());
41+
42+
String resf = ((DataResponse)sr).getData().toString(); //must be a temp. file name
43+
String res = Files.readString(Paths.get(resf));
44+
45+
Assertions.assertAll(
46+
() -> Assertions.assertThrows(IllegalArgumentException.class, () -> URI.create("http://")), //bad URI
47+
() -> Assertions.assertTrue(res.contains("@id\" : \"http://bioregistry.io/chebi:20")),
48+
() -> Assertions.assertTrue(res.contains("@id\" : \"http://bioregistry.io/mi:0361")),//as long as it has 'http://' (valid abs. uri w/o schema would fail here due Jena bug)
49+
() -> Assertions.assertTrue(res.contains("@id\" : \"chebi:20")), //CURIE of a standard/normalized SMR's UnificationXref
50+
() -> Assertions.assertTrue(res.contains("\"@id\" : \"http://www.biopax.org/release/biopax-level3.owl#displayName\"")),
51+
() -> Assertions.assertTrue(res.contains("\"displayName\" : \"(+)-camphene\"")), //chebi:20
52+
() -> Assertions.assertTrue(res.contains("\"id\" : \"CHEBI:20\"")) //with jena v3 (e.g. 3.2.0 or 3.17.0)
53+
);
54+
}
55+
2856
}

src/test/java/cpath/service/CPathUtilsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public void readWriteMetadata() {
7474
Metadata metadata = CPathUtils.readMetadata(url);
7575
List<Datasource> datasources = metadata.getDatasources();
7676
final Datasource datasource = datasources.stream()
77-
.filter(m -> m.getIdentifier().equals("TEST_UNIPROT"))
77+
.filter(m -> m.getIdentifier().equals("TESTUNIPROT"))
7878
.findFirst().orElse(null);
7979
assertAll(
8080
() -> assertEquals(3, datasources.size()),

src/test/java/cpath/service/ConsoleApplicationIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,9 @@ public void premergeAndMerge() throws IOException {
182182
assertTrue(service.settings().getOrganismsAsTaxonomyToNameMap().containsKey("9606"));
183183
assertEquals("Homo sapiens", service.settings().getOrganismsAsTaxonomyToNameMap().get("9606"));
184184

185-
Datasource ds = service.metadata().findByIdentifier("TEST_UNIPROT");
185+
Datasource ds = service.metadata().findByIdentifier("TESTUNIPROT");
186186
assertNotNull(ds);
187-
ds = service.metadata().findByIdentifier("TEST_CHEBI");
187+
ds = service.metadata().findByIdentifier("TESTCHEBI");
188188
assertNotNull(ds);
189189
ds = service.metadata().findByIdentifier("TEST_MAPPING");
190190
assertNotNull(ds);

src/test/resources/chebi.obo.zip

696 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)