Skip to content
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
*.swp
*.swo
*.swn
*.sw?
*.ttl
*.bz2
67 changes: 62 additions & 5 deletions src/spatialjoin/SpatialJoinMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

using sj::ParseBatch;
using sj::Sweeper;
using util::geo::DE9IMFilter;
using util::geo::DLine;
using util::geo::DPoint;
using util::geo::I32Line;
Expand Down Expand Up @@ -107,12 +108,14 @@ void printHelp(int argc, char** argv) {
<< " --cache-max-size (default: " + std::to_string(DEFAULT_CACHE_SIZE) +
")"
<< "maximum approx. size in bytes of cache per type and\n"
<< std::setw(42) << " " << "thread, 0 = unlimited\n"
<< std::setw(42) << " "
<< "thread, 0 = unlimited\n"
<< std::setw(42)
<< " --cache-max-elements (default: " +
std::to_string(DEFAULT_CACHE_NUM_ELEMENTS) + ")"
<< "maximum number of elements per cache, type and thread,\n"
<< std::setw(42) << " " << "0 = unlimited\n"
<< std::setw(42) << " "
<< "0 = unlimited\n"
<< std::setw(42) << " --no-geometry-checks"
<< "do not compute geometric relations, only report number of\n"
<< std::setw(42) << " "
Expand All @@ -126,9 +129,6 @@ void printHelp(int argc, char** argv) {

// _____________________________________________________________________________
int main(int argc, char** argv) {
// disable output buffering for standard output
setbuf(stdout, NULL);

// initialize randomness
srand(time(NULL) + rand()); // NOLINT

Expand Down Expand Up @@ -163,6 +163,7 @@ int main(int argc, char** argv) {
size_t numCaches = NUM_THREADS;
size_t geomCacheMaxSizeBytes = DEFAULT_CACHE_SIZE;
size_t geomCacheMaxNumElements = DEFAULT_CACHE_NUM_ELEMENTS;
DE9IMFilter de9imFilter;

std::vector<std::string> inputFiles;

Expand Down Expand Up @@ -206,6 +207,8 @@ int main(int argc, char** argv) {
state = 15;
} else if (cur == "--cache-max-elements") {
state = 16;
} else if (cur == "--de9im-filter") {
state = 17;
} else if (cur == "--de9im") {
computeDE9IM = true;
} else if (cur == "--no-box-ids") {
Expand Down Expand Up @@ -294,7 +297,54 @@ int main(int argc, char** argv) {
std::stringstream(cur) >> geomCacheMaxNumElements;
state = 0;
break;
case 17:
if (cur.size() < 9) cur.insert(cur.size(), 9 - cur.size(), '*');
std::cout << cur << std::endl;
de9imFilter = cur.c_str();
state = 0;
break;
}
}

if (de9imFilter.maxExteriorDim() < 2) {
if (verbose) {
LOGTO(INFO, std::cerr) << "Skipping all comparisons because of DE-9IM "
"filter which will not match any pairs...";
LOGTO(INFO, std::cerr)
<< " (max exterior dim=" << (int)de9imFilter.maxExteriorDim() << ")";
}
return 0;
}

if (de9imFilter.minLeftBoundaryDim() > 1) {
if (verbose) {
LOGTO(INFO, std::cerr) << "Skipping all comparisons because of DE-9IM "
"filter which will not match any pairs...";
LOGTO(INFO, std::cerr)
<< " (min left boundary dim=" << (int)de9imFilter.minLeftBoundaryDim()
<< ")";
}
return 0;
}

if (de9imFilter.minRightBoundaryDim() > 1) {
if (verbose) {
LOGTO(INFO, std::cerr) << "Skipping all comparisons because of DE-9IM "
"filter which will not match any pairs...";
LOGTO(INFO, std::cerr) << " (min right boundary dim="
<< (int)de9imFilter.minRightBoundaryDim() << ")";
}
return 0;
}

if (de9imFilter.maxInteriorDim() < 0) {
if (verbose) {
LOGTO(INFO, std::cerr) << "Skipping all comparisons because of DE-9IM "
"filter which will not match any pairs...";
LOGTO(INFO, std::cerr)
<< " (max interior dim=" << (int)de9imFilter.maxInteriorDim() << ")";
}
return 0;
}

const static size_t CACHE_SIZE = 1024 * 1024;
Expand Down Expand Up @@ -333,6 +383,7 @@ int main(int argc, char** argv) {
noGeometryChecks,
withinDist,
computeDE9IM,
de9imFilter,
writeRelCb,
{},
{},
Expand Down Expand Up @@ -361,6 +412,10 @@ int main(int argc, char** argv) {
<< std::endl;
exit(1);
}

// already set number of sides to 2, if we have two files
sweeper.setNumSides(inputFiles.size());

for (size_t i = 0; i < inputFiles.size(); i++) {
if (util::endsWith(inputFiles[i], ".bz2")) {
#ifndef SPATIALJOIN_NO_BZIP2
Expand Down Expand Up @@ -448,4 +503,6 @@ int main(int argc, char** argv) {
sweeper.log("done (" + std::to_string(TOOK(ts) / 1000000000.0) + "s).");

delete[] buf;

return 0;
}
75 changes: 0 additions & 75 deletions src/spatialjoin/Stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ struct Stats {

size_t totalComps = 0;

uint64_t timeSums[7] = {0, 0, 0, 0, 0, 0, 0};

double areaSizeSum = 0;
size_t areaCmps = 0;

Expand All @@ -57,7 +55,6 @@ struct Stats {
size_t anchorSum = 0;

std::string toString();
void timeHisto(size_t numPoints, uint64_t time);
};

inline std::string Stats::toString() {
Expand Down Expand Up @@ -172,44 +169,6 @@ inline std::string Stats::toString() {
ss << "time for output writing: " << t << " s (" << ((t / sum) * 100.0)
<< "%)\n";

double histoSum = ((timeSums[0] + timeSums[1] + timeSums[2] + timeSums[3] +
timeSums[4] + timeSums[5] + timeSums[6]) *
1.0) /
1000000000.0;

t = (timeSums[6] * 1.0) / 1000000000.0;
ss << "\n";
ss << "comparisons inv. > 1000000 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

t = (timeSums[5] * 1.0) / 1000000000.0;
ss << "comparisons inv. > 100000 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

t = (timeSums[4] * 1.0) / 1000000000.0;
ss << "comparisons inv. > 10000 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

t = (timeSums[3] * 1.0) / 1000000000.0;
ss << "comparisons inv. > 1000 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

t = (timeSums[2] * 1.0) / 1000000000.0;
ss << "comparisons inv. > 100 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

t = (timeSums[1] * 1.0) / 1000000000.0;
ss << "comparisons inv. > 10 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

t = (timeSums[0] * 1.0) / 1000000000.0;
ss << "comparisons inv. > 1 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

t = (timeSums[0] * 1.0) / 1000000000.0;
ss << "comparisons inv. > 1 points on one side: " << t << " s ("
<< ((t / histoSum) * 100.0) << "%)\n";

ss << "\n";

ss << " Avg. max surface area between cmps: " << std::fixed
Expand All @@ -225,36 +184,6 @@ inline std::string Stats::toString() {
return ss.str();
}

inline void Stats::timeHisto(size_t numPoints, uint64_t time) {
if (numPoints > 1000000) {
timeSums[6] += time;
return;
}
if (numPoints > 100000) {
timeSums[5] += time;
return;
}
if (numPoints > 10000) {
timeSums[4] += time;
return;
}
if (numPoints > 1000) {
timeSums[3] += time;
return;
}
if (numPoints > 100) {
timeSums[2] += time;
return;
}
if (numPoints > 10) {
timeSums[1] += time;
return;
}

timeSums[0] += time;
return;
}

inline Stats operator+(const Stats& a, const Stats& b) {
return Stats{
a.timeGeoCacheRetrievalArea + b.timeGeoCacheRetrievalArea,
Expand Down Expand Up @@ -288,10 +217,6 @@ inline Stats operator+(const Stats& a, const Stats& b) {
a.innerOuterChecksAreaLine + b.innerOuterChecksAreaLine,
a.innerOuterChecksAreaPoint + b.innerOuterChecksAreaPoint,
a.totalComps + b.totalComps,
{a.timeSums[0] + b.timeSums[0], a.timeSums[1] + b.timeSums[1],
a.timeSums[2] + b.timeSums[2], a.timeSums[3] + b.timeSums[3],
a.timeSums[4] + b.timeSums[4], a.timeSums[5] + b.timeSums[5],
a.timeSums[6] + b.timeSums[6]},
a.areaSizeSum + b.areaSizeSum,
a.areaCmps + b.areaCmps,
a.lineLenSum + b.lineLenSum,
Expand Down
Loading
Loading