Skip to content

Commit

Permalink
Merge pull request #157 from ate47/fix_multisection
Browse files Browse the repository at this point in the history
Fix #104 "java.lang.NegativeArraySizeException parsing Wikidata HDT file" and fix some warnings
  • Loading branch information
D063520 authored Apr 8, 2022
2 parents 8ddb098 + a2db323 commit a89c71b
Show file tree
Hide file tree
Showing 17 changed files with 1,163 additions and 404 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.nio.ByteBuffer;

import org.rdfhdt.hdt.util.Mutable;
import org.rdfhdt.hdt.util.io.BigByteBuffer;
import org.rdfhdt.hdt.util.io.BigMappedByteBuffer;

/**
Expand Down Expand Up @@ -142,7 +143,7 @@ public static int encode(byte[] data, int offset, int value) {

return i;
}

public static int decode(byte[] data, int offset, Mutable<Long> value) {
long out = 0;
int i=0;
Expand All @@ -157,6 +158,21 @@ public static int decode(byte[] data, int offset, Mutable<Long> value) {
value.setValue(out);
return i;
}

public static int decode(BigByteBuffer data, long offset, Mutable<Long> value) {
long out = 0;
int i = 0;
int shift=0;
while( (0x80 & data.get(offset+i))==0) {
out |= (data.get(offset+i) & 127) << shift;
i++;
shift+=7;
}
out |= (data.get(offset+i) & 127) << shift;
i++;
value.setValue(out);
return i;
}

public static void show(byte[] data, int len) {
for(int i=0;i<len;i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatMappingBack;
import org.rdfhdt.hdt.listener.ProgressListener;

import java.io.IOException;
import java.util.HashMap;

public interface DictionaryCat {
void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener);
void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener) throws IOException;
CatMappingBack getMappingS();
long getNumShared();
HashMap<String, CatMapping> getAllMappings();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,18 @@

public class FourSectionDictionaryCat implements DictionaryCat {

private String location;
private int DEFAULT_BLOCK_SIZE = 16;
private int BLOCK_PER_BUFFER = 1000000;
private final HashMap<String,CatMapping> allMappings = new HashMap<>();
private final String location;
private long numShared;

private HashMap<String,CatMapping> allMappings = new HashMap<>();

private CatMappingBack mappingS;

public FourSectionDictionaryCat(String location) {
this.location = location;
}

public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener){
public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener) throws IOException {
allMappings.put("P1",new CatMapping(location,"P1",dictionary1.getPredicates().getNumberOfElements()));
allMappings.put("P2",new CatMapping(location,"P2",dictionary2.getPredicates().getNumberOfElements()));
allMappings.put("S1",new CatMapping(location,"S1",dictionary1.getSubjects().getNumberOfElements()));
Expand All @@ -64,7 +62,6 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener

int numCommonPredicates = 0;
CatIntersection commonP1P2 = new CatIntersection(new CatWrapper(dictionary1.getPredicates().getSortedEntries(),"P1"),new CatWrapper(dictionary2.getPredicates().getSortedEntries(),"P2"));
long maxPredicates = dictionary1.getPredicates().getNumberOfElements()+dictionary2.getPredicates().getNumberOfElements();
while (commonP1P2.hasNext()){
commonP1P2.next();
numCommonPredicates++;
Expand All @@ -76,8 +73,6 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener
addPredicatesList.add(new CatWrapper(dictionary1.getPredicates().getSortedEntries(),"P1"));
addPredicatesList.add(new CatWrapper(dictionary2.getPredicates().getSortedEntries(),"P2"));
CatUnion itAddPredicates = new CatUnion(addPredicatesList);
// while (itAddPredicates.hasNext())
// System.out.println(itAddPredicates.next().entity);
SectionUtil.createSection(location,numPredicates, 4,itAddPredicates, new CatUnion(new ArrayList<>()),allMappings,0, listener);
System.out.println("SUBJECTS-------------------");
ArrayList<Iterator<CatElement>> skipSubjectList = new ArrayList<>();
Expand Down Expand Up @@ -160,7 +155,6 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener
i2.next();
numCommonS1O2++;
}
Iterator<? extends CharSequence> it = dictionary2.getSubjects().getSortedEntries();
i2 = new CatIntersection(new CatWrapper(dictionary1.getObjects().getSortedEntries(),"O1"), new CatWrapper(dictionary2.getSubjects().getSortedEntries(),"S2"));
int numCommonO1S2=0;
while (i2.hasNext()){
Expand Down Expand Up @@ -210,7 +204,7 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener
}
try {
InputStream in = new FileInputStream(location + "section" + j);
int b = 0;
int b;
while ((b = in.read(buf)) >= 0) {
outFinal.write(buf, 0, b);
outFinal.flush();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,28 @@
import org.rdfhdt.hdt.compact.bitmap.ModifiableBitmap;
import org.rdfhdt.hdt.dictionary.Dictionary;
import org.rdfhdt.hdt.dictionary.DictionaryDiff;
import org.rdfhdt.hdt.dictionary.impl.utilCat.*;
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatElement;
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatIntersection;
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatMapping;
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatUnion;
import org.rdfhdt.hdt.dictionary.impl.utilCat.SectionUtil;
import org.rdfhdt.hdt.dictionary.impl.utilDiff.DiffWrapper;
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.ControlInfo;
import org.rdfhdt.hdt.options.ControlInformation;

import java.io.*;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class FourSectionDictionaryDiff implements DictionaryDiff {

Expand All @@ -29,7 +40,7 @@ public FourSectionDictionaryDiff(String location) {
}

@Override
public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, ProgressListener listener) {
public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, ProgressListener listener) throws IOException {
allMappings.put("predicate", new CatMapping(location, "predicate", dictionary.getPredicates().getNumberOfElements()));
allMappings.put("subject", new CatMapping(location, "subject", dictionary.getSubjects().getNumberOfElements()));
allMappings.put("object", new CatMapping(location, "object", dictionary.getObjects().getNumberOfElements()));
Expand Down Expand Up @@ -60,11 +71,7 @@ public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, P
listSkipSubj.add(itSkipSubs);

SharedWrapper sharedWrapper = new SharedWrapper(0, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
long numNewSubj = 0;
while (sharedWrapper.hasNext()) {
sharedWrapper.next();
numNewSubj++;
}
long numNewSubj = sharedWrapper.count();
sharedWrapper = new SharedWrapper(0, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
listSkipSubj.add(sharedWrapper);

Expand All @@ -84,11 +91,7 @@ public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, P

// flag = 1 for objects
sharedWrapper = new SharedWrapper(1, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
long numNewObj = 0;
while (sharedWrapper.hasNext()) {
numNewObj++;
sharedWrapper.next();
}
long numNewObj = sharedWrapper.count();
sharedWrapper = new SharedWrapper(1, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
listSkipObjs.add(sharedWrapper);

Expand Down Expand Up @@ -215,6 +218,15 @@ public boolean hasNext() {
public CatElement next() {
return next;
}

public int count() {
int i = 0;
while (hasNext()) {
// next();
i++;
}
return i;
}
}

@Override
Expand Down
Loading

0 comments on commit a89c71b

Please sign in to comment.